Datasets
tau/commonsense_qa
90301 downloads
['task_categories:question-answering''task_ids:open-domain-qa''annotations_creators:crowdsourced'
monology/pile-uncopyrighted
84245 downloads
['license:other''size_categories:1M<n<10M''format:json'
wenknow/reddit_dataset_44
83781 downloads
['task_categories:text-classification''task_categories:token-classification''task_categories:question-answering'
T-NOVA/NO_SCORE
83663 downloads
['size_categories:100M<n<1B''format:csv''modality:text'
amphion/Emilia-Dataset
82509 downloads
['task_categories:text-to-speech''task_categories:automatic-speech-recognition''language:zh'
EleutherAI/lambada_openai
82259 downloads
['task_ids:language-modeling''language_creators:machine-generated''multilinguality:translation'
mlfoundations/MINT-1T-HTML
82205 downloads
['task_categories:image-to-text''task_categories:text-generation''language:en'
adams-story/datacomp200m
81814 downloads
['size_categories:100M<n<1B''format:parquet''modality:image'
wikimedia/wikipedia
79914 downloads
['task_categories:text-generation''task_categories:fill-mask''task_ids:language-modeling'
HuggingFaceM4/the_cauldron
76296 downloads
['size_categories:1M<n<10M''format:parquet''modality:image'