Datasets
lukebarousse/data_jobs
9991 downloads
['license:apache-2.0''size_categories:100K<n<1M''format:csv'
togethercomputer/RedPajama-Data-1T-Sample
9884 downloads
['task_categories:text-generation''language:en''size_categories:100K<n<1M'
Major-TOM/Core-S2L2A
9884 downloads
['license:cc-by-sa-4.0''size_categories:1M<n<10M''format:parquet'
laion/LAION-Audio-300M
9874 downloads
['license:apache-2.0''size_categories:100M<n<1B''format:webdataset'
facebook/belebele
9849 downloads
['task_categories:question-answering''task_categories:zero-shot-classification''task_categories:text-classification'
rekrek/reasoning-engaging-story
9837 downloads
['task_categories:text-generation''language:en''license:apache-2.0'
longvideobench/LongVideoBench
9821 downloads
['task_categories:multiple-choice''task_categories:visual-question-answering''language:en'
SWE-Gym/SWE-Gym
9809 downloads
['license:mit''size_categories:1K<n<10K''format:parquet'
agibot-world/AgiBotWorld-Alpha
9808 downloads
['task_categories:other''language:en''size_categories:10M<n<100M'
yentinglin/aime_2025
9775 downloads
['size_categories:n<1K''format:parquet''modality:tabular'