Datasets
mlfoundations/MINT-1T-HTML
82205 downloads
['task_categories:image-to-text''task_categories:text-generation''language:en'
adams-story/datacomp200m
81814 downloads
['size_categories:100M<n<1B''format:parquet''modality:image'
wikimedia/wikipedia
79914 downloads
['task_categories:text-generation''task_categories:fill-mask''task_ids:language-modeling'
banned-historical-archives/banned-historical-archives
79597 downloads
['size_categories:n<1K''format:imagefolder''modality:image'
mlfoundations/MINT-1T-PDF-CC-2023-06
77758 downloads
['task_categories:image-to-text''task_categories:text-generation''language:en'
HuggingFaceM4/the_cauldron
76296 downloads
['size_categories:1M<n<10M''format:parquet''modality:image'
Skylion007/openwebtext
71692 downloads
['task_categories:text-generation''task_categories:fill-mask''task_ids:language-modeling'
atokforps/latent_worker_early-a2_00
71661 downloads
['region:us']
wyu1/Leopard-Instruct
69938 downloads
['language:en''license:apache-2.0''size_categories:1M<n<10M'
Zyphra/Zyda-2
69654 downloads
['task_categories:text-generation''language:en''license:odc-by'