Datasets
tensorshield/reddit_dataset_157
34740 downloads
['task_categories:text-classification''task_categories:token-classification''task_categories:question-answering'
occiglot/tokenizer-wiki-bench
33634 downloads
['language:af''language:ar''language:bg'
open-r1/DAPO-Math-17k-Processed
33501 downloads
['size_categories:10K<n<100K''format:parquet''modality:text'
LLM360/MegaMath
33085 downloads
['task_categories:text-generation''language:en''license:odc-by'
asahi417/seamless-align-enA-viA.speaker-embedding.xlsr-2b
29867 downloads
['size_categories:100K<n<1M''format:parquet''modality:tabular'
bigcode/bigcodebench
29496 downloads
['language_creators:expert-generated''language:code''license:apache-2.0'
nvidia/PhysicalAI-Robotics-Manipulation-Kitchen
27946 downloads
['task_categories:robotics''license:cc-by-4.0''size_categories:100K<n<1M'
MMMU/MMMU
27747 downloads
['task_categories:question-answering''task_categories:visual-question-answering''task_categories:multiple-choice'
CohereLabs/aya_collection_language_split
27717 downloads
['language:ace''language:afr''language:amh'
mlfoundations/dclm-baseline-1.0-parquet
27717 downloads
['language:en''license:cc-by-4.0''size_categories:1B<n<10B'