Datasets
LLM-LAT/harmful-dataset
2940 downloads
['language:en''size_categories:1K<n<10K''format:parquet'
sentence-transformers/parallel-sentences-wikimatrix
2937 downloads
['task_categories:feature-extraction''task_categories:sentence-similarity''language:en'
sentence-transformers/all-nli
2931 downloads
['task_categories:feature-extraction''task_categories:sentence-similarity''multilinguality:monolingual'
openfoodfacts/product-database
2931 downloads
['language:en''language:fr''language:de'
ryoungj/bootstrap-latent-thought-data
2929 downloads
['task_categories:text-generation''license:apache-2.0''size_categories:10M<n<100M'
bigcode/the-stack-v2
2916 downloads
['task_categories:text-generation''language_creators:crowdsourced''language_creators:expert-generated'
hiyouga/math12k
2910 downloads
['task_categories:question-answering''language:en''license:mit'
BEE-spoke-data/code_contests_instruct
2908 downloads
['task_categories:text-generation''source_datasets:teven/code_contests''source_datasets:deepmind/code_contests'
styletts2-community/multilingual-pl-bert
2907 downloads
['language:af''language:an''language:ar'
cvssp/WavCaps
2904 downloads
['language:en''license:cc-by-4.0''size_categories:n<1K'