Datasets
BramVanroy/CommonCrawl-CreativeCommons
3008 downloads
['task_categories:text-generation''task_ids:language-modeling''language:afr'
GAIR/LIMO
3004 downloads
['language:en''license:apache-2.0''size_categories:n<1K'
flaviagiammarino/vqa-rad
3003 downloads
['task_categories:visual-question-answering''language:en''license:cc0-1.0'
snap-stanford/stark
2998 downloads
['task_categories:question-answering''language:en''license:cc-by-4.0'
MaLA-LM/mala-monolingual-dedup
2998 downloads
['license:odc-by''size_categories:100M<n<1B''format:arrow'
BAAI/IndustryCorpus_finance
2995 downloads
['task_categories:text-generation''language:zh''language:en'
tensorshield/reddit_dataset_237
2974 downloads
['task_categories:text-classification''task_categories:token-classification''task_categories:question-answering'
ucinlp/drop
2973 downloads
['task_categories:question-answering''task_categories:text2text-generation''task_ids:extractive-qa'
xlangai/spider
2972 downloads
['task_categories:text2text-generation''annotations_creators:expert-generated''language_creators:expert-generated'
lmms-lab/LMMs-Eval-Lite
2972 downloads
['size_categories:1K<n<10K''format:parquet''modality:image'