Datasets
speechcolab/gigaspeech
12223 downloads
['task_categories:automatic-speech-recognition''task_categories:text-to-speech''task_categories:text-to-audio'
AI4Math/MathVista
12215 downloads
['task_categories:multiple-choice''task_categories:question-answering''task_categories:visual-question-answering'
nvidia/Llama-Nemotron-Post-Training-Dataset
12177 downloads
['license:cc-by-4.0''size_categories:1M<n<10M''format:json'
HuggingFaceTB/smollm-corpus
12172 downloads
['language:en''license:odc-by''size_categories:100M<n<1B'
adams-story/imagenet1k-256-wds
10454 downloads
['task_categories:image-classification''size_categories:100K<n<1M''format:webdataset'
HuggingFaceFW/fineweb-edu-score-2
12078 downloads
['task_categories:text-generation''language:en''license:odc-by'
CohereLabs/aya_collection
12062 downloads
['task_categories:text-classification''task_categories:summarization''task_categories:translation'
SabaPivot/KMMLU-Summarized-Chain_of_Thought
12044 downloads
['language:ko''size_categories:100K<n<1M''format:parquet'
espnet/floras
12015 downloads
['task_categories:automatic-speech-recognition''task_categories:translation''task_categories:summarization'
nvidia/OpenCodeReasoning
11958 downloads
['task_categories:text-generation''license:cc-by-4.0''size_categories:100K<n<1M'