Datasets
mlfoundations/MINT-1T-PDF-CC-2023-50
96390 downloads
['task_categories:image-to-text''task_categories:text-generation''language:en'
amphion/Emilia-Dataset
82509 downloads
['task_categories:text-to-speech''task_categories:automatic-speech-recognition''language:zh'
mlfoundations/MINT-1T-PDF-CC-2023-14
43826 downloads
['task_categories:image-to-text''task_categories:text-generation''language:en'
allenai/s2-naip
43460 downloads
['license:apache-2.0''size_categories:10K<n<100K''format:webdataset'
LanguageBind/Open-Sora-Plan-v1.1.0
41124 downloads
['license:mit''size_categories:100K<n<1M''format:webdataset'
mlfoundations/MINT-1T-PDF-CC-2024-10
39612 downloads
['task_categories:image-to-text''task_categories:text-generation''language:en'
SparkAudio/voxbox
25120 downloads
['task_categories:text-to-speech''language:zh''language:en'
BLIP3o/BLIP3o-Pretrain-Long-Caption
24154 downloads
['license:apache-2.0''size_categories:10M<n<100M''format:webdataset'
mlfoundations/MINT-1T-PDF-CC-2023-23
23292 downloads
['task_categories:image-to-text''task_categories:text-generation''language:en'
agibot-world/AgiBotWorld-Beta
20683 downloads
['task_categories:other''language:en''size_categories:100M<n<1B'