Datasets
unimelb-nlp/wikiann
18093 downloads
['task_categories:token-classification''task_ids:named-entity-recognition''annotations_creators:machine-generated'
evalplus/mbppplus
17882 downloads
['license:apache-2.0''size_categories:n<1K''format:parquet'
open-r1/Mixture-of-Thoughts
17786 downloads
['task_categories:text-generation''language:en''size_categories:100K<n<1M'
lmms-lab/MMMU
17631 downloads
['size_categories:10K<n<100K''format:parquet''modality:image'
tiiuae/falcon-refinedweb
17426 downloads
['task_categories:text-generation''language:en''license:odc-by'
japanese-asr/whisper_transcriptions.mls.wer_10.0.vectorized
17378 downloads
['size_categories:1M<n<10M''format:parquet''library:datasets'
cimec/lambada
17295 downloads
['task_categories:text2text-generation''annotations_creators:expert-generated''language_creators:found'
HuggingFaceM4/Docmatix
17281 downloads
['task_categories:visual-question-answering''language:en''license:mit'
asahi417/seamless-align-enA-esA.speaker-embedding.xlsr-2b
17099 downloads
['size_categories:100K<n<1M''format:parquet''modality:tabular'
open-thoughts/OpenThoughts-114k
16598 downloads
['license:apache-2.0''size_categories:100K<n<1M''format:parquet'