Datasets
picollect/danbooru
2053 downloads
['language:en''license:other''size_categories:10M<n<100M'
LLMDH/marianne_pdf_5
2051 downloads
['size_categories:100K<n<1M''format:webdataset''modality:text'
speechcolab/gigaspeech2
2046 downloads
['task_categories:automatic-speech-recognition''multilinguality:multilingual''language:th'
diffusion-cot/GenRef-wds
1916 downloads
['task_categories:text-to-image''language:en''license:mit'
JUNJIE99/MegaPairs
1830 downloads
['language:en''license:cc-by-4.0''size_categories:10M<n<100M'
RussRobin/SpatialQA-E
1823 downloads
['language:en''license:cc-by-4.0''size_categories:1M<n<10M'
gdsu/csd_files
1789 downloads
['size_categories:n<1K''format:webdataset''modality:image'
collabora/hi-stt-preprocessed-webdataset
1785 downloads
['license:cc-by-4.0''size_categories:100K<n<1M''format:webdataset'
mkrausio/laions_got_talent_embs_only
1783 downloads
['size_categories:1M<n<10M''format:webdataset''modality:text'
TIGER-Lab/arxiv-latex-5T
1647 downloads
['language:en''license:apache-2.0''size_categories:10M<n<100M'