Datasets
ByteDance-Seed/mga-fineweb-edu
2116 downloads
['task_categories:text-generation''language:en''license:odc-by'
philschmid/dolly-15k-oai-style
2115 downloads
['size_categories:10K<n<100K''format:parquet''modality:text'
paralym/mint-1t-pdf-gte6-1.3M
2114 downloads
['region:us']
OpenLLM-France/wikipedia
2113 downloads
['task_categories:text-generation''task_categories:fill-mask''task_ids:language-modeling'
ProgramComputer/voxceleb
2111 downloads
['task_categories:automatic-speech-recognition''task_categories:audio-classification''task_categories:image-classification'
karpathy/fineweb-edu-100B-gpt2-token-shards
2111 downloads
['license:odc-by''region:us']
latentcat/animesfw
2110 downloads
['size_categories:1M<n<10M''format:parquet''modality:image'
allenai/olmOCR-mix-0225
2110 downloads
['license:odc-by''size_categories:100K<n<1M''format:parquet'
lmms-lab/OK-VQA
2107 downloads
['size_categories:1K<n<10K''format:parquet''modality:image'
vblagoje/cc_news
2104 downloads
['task_categories:text-generation''task_categories:fill-mask''task_ids:language-modeling'