Datasets
matchbench/selfkg-dwy100k-dbpwd
3019 downloads
['size_categories:1M<n<10M''modality:tabular''modality:text'
nvidia/HelpSteer2
2945 downloads
['language:en''license:cc-by-4.0''size_categories:10K<n<100K'
mrinaldi/TestiMole
2943 downloads
['task_categories:text-classification''task_categories:text-generation''task_ids:language-modeling'
openfoodfacts/product-database
2931 downloads
['language:en''language:fr''language:de'
bigcode/the-stack-v2
2916 downloads
['task_categories:text-generation''language_creators:crowdsourced''language_creators:expert-generated'
BEE-spoke-data/code_contests_instruct
2908 downloads
['task_categories:text-generation''source_datasets:teven/code_contests''source_datasets:deepmind/code_contests'
etri/ForestPersons
2870 downloads
['task_categories:object-detection''language:en''license:cc-by-nc-sa-4.0'
Cohere/wikipedia-22-12-en-embeddings
2848 downloads
['task_categories:text-retrieval''task_ids:document-retrieval''annotations_creators:expert-generated'
bigcode/the-stack-v2-dedup
2831 downloads
['task_categories:text-generation''language_creators:crowdsourced''language_creators:expert-generated'
opencsg/chinese-fineweb-edu-v2
2820 downloads
['task_categories:text-generation''language:zh''license:apache-2.0'