Datasets
ByteDance-Seed/mga-fineweb-edu
2116 downloads
['task_categories:text-generation''language:en''license:odc-by'
philschmid/dolly-15k-oai-style
2115 downloads
['size_categories:10K<n<100K''format:parquet''modality:text'
OpenLLM-France/wikipedia
2113 downloads
['task_categories:text-generation''task_categories:fill-mask''task_ids:language-modeling'
latentcat/animesfw
2110 downloads
['size_categories:1M<n<10M''format:parquet''modality:image'
allenai/olmOCR-mix-0225
2110 downloads
['license:odc-by''size_categories:100K<n<1M''format:parquet'
lmms-lab/OK-VQA
2107 downloads
['size_categories:1K<n<10K''format:parquet''modality:image'
vblagoje/cc_news
2104 downloads
['task_categories:text-generation''task_categories:fill-mask''task_ids:language-modeling'
PHBJT/cml-tts
2098 downloads
['size_categories:1M<n<10M''format:parquet''modality:audio'
pratyushmaini/llm_dataset_inference
2096 downloads
['size_categories:10K<n<100K''format:parquet''modality:text'
gayanin/babylon-noised
2094 downloads
['size_categories:100K<n<1M''format:parquet''modality:text'