Datasets
ptb-text-only/ptb_text_only
16853 downloads
['task_categories:text-generation''task_categories:fill-mask''task_ids:language-modeling'
legacy-datasets/common_voice
16814 downloads
['task_categories:automatic-speech-recognition''annotations_creators:crowdsourced''language_creators:crowdsourced'
onlymain/onlyone
16768 downloads
['region:us']
nvidia/Nemotron-CrossThink
16742 downloads
['task_categories:question-answering''task_categories:text-generation''language:en'
raushan-testing-hf/videos-test
16713 downloads
['license:apache-2.0''region:us']
mteb/biosses-sts
16653 downloads
['task_categories:sentence-similarity''task_ids:semantic-similarity-scoring''annotations_creators:derived'
codeparrot/github-code
16651 downloads
['task_categories:text-generation''task_ids:language-modeling''language_creators:crowdsourced'
open-thoughts/OpenThoughts-114k
16598 downloads
['license:apache-2.0''size_categories:100K<n<1M''format:parquet'
HuggingFaceGECLM/REDDIT_comments
16505 downloads
['task_categories:text-generation''task_ids:dialogue-modeling''task_ids:language-modeling'
ACCA225/Kaggle-Stable-Diffusion
2184 downloads
['region:us']