Datasets
davanstrien/DeepURLBench
680 downloads
['task_categories:text-classification''license:cc-by-nc-4.0''size_categories:10M<n<100M'
ghbacct/topic-classifier-news-headlines-classification
679 downloads
['size_categories:1K<n<10K''format:parquet''modality:text'
eduagarcia/CrawlPT_dedup
679 downloads
['task_categories:text-generation''language:pt''size_categories:100M<n<1B'
CoIR-Retrieval/stackoverflow-qa-queries-corpus
679 downloads
['size_categories:10K<n<100K''format:parquet''modality:text'
HuggingFaceH4/testing_self_instruct_small
678 downloads
['size_categories:n<1K''format:parquet''modality:text'
jablonkagroup/chempile-lift-merged
678 downloads
['size_categories:100M<n<1B''format:parquet''modality:text'
bigcode/github-commits-diff-dedup-pjjs-april
677 downloads
['size_categories:100K<n<1M''format:parquet''modality:text'
vumichien/fake-news-dataset
677 downloads
['size_categories:1K<n<10K''format:parquet''modality:text'
howard-hou/OCR-VQA
676 downloads
['size_categories:100K<n<1M''format:parquet''modality:image'
OALL/Arabic_MMLU
676 downloads
['size_categories:10K<n<100K''format:parquet''modality:text'