Datasets
Cohere/wikipedia-22-12-en-embeddings
2848 downloads
['task_categories:text-retrieval''task_ids:document-retrieval''annotations_creators:expert-generated'
togethercomputer/RedPajama-Data-1T
2848 downloads
['task_categories:text-generation''language:en''size_categories:1M<n<10M'
nyu-mll/multi_nli
2846 downloads
['task_categories:text-classification''task_ids:natural-language-inference''task_ids:multi-input-text-classification'
OpenDILabCommunity/LMDrive
2833 downloads
['language:en''license:apache-2.0''size_categories:100K<n<1M'
bigcode/the-stack-v2-dedup
2831 downloads
['task_categories:text-generation''language_creators:crowdsourced''language_creators:expert-generated'
CanCLID/zoengjyutgaai
2829 downloads
['task_categories:automatic-speech-recognition''task_categories:text-to-speech''task_categories:text-generation'
alexandrainst/ftspeech
2825 downloads
['task_categories:automatic-speech-recognition''language:da''license:other'
lansinuote/ChnSentiCorp
2822 downloads
['size_categories:10K<n<100K''format:parquet''modality:text'
opencsg/chinese-fineweb-edu-v2
2820 downloads
['task_categories:text-generation''language:zh''license:apache-2.0'
LLMDH/marianne_pdf_10
2814 downloads
['size_categories:100K<n<1M''format:webdataset''modality:text'