Datasets
HOIGen/HOIGen-1M
4101 downloads
['task_categories:text-to-video''language:en''license:apache-2.0'
allenai/real-toxicity-prompts
4057 downloads
['multilinguality:monolingual''source_datasets:original''language:en'
akariasai/PopQA
4045 downloads
['size_categories:10K<n<100K''format:csv''modality:tabular'
pickapic-anonymous/pickapic_v1
3980 downloads
['size_categories:100K<n<1M''format:parquet''modality:tabular'
lmms-lab/EgoIT-99K
3959 downloads
['size_categories:100K<n<1M''format:parquet''modality:audio'
wikimedia-community/wikipedia-article-ratings
3905 downloads
['language:en''license:cc0-1.0''size_categories:10M<n<100M'
Zyphra/dclm-dedup
3904 downloads
['task_categories:text-generation''language:en''license:cc'
open-r1/codeforces-cots
3854 downloads
['license:cc-by-4.0''size_categories:100K<n<1M''format:parquet'
Intelligent-Internet/pd12m
3819 downloads
['task_categories:feature-extraction''language:en''license:apache-2.0'
laion/filtered-wit
3794 downloads
['size_categories:1M<n<10M''format:parquet''modality:image'