Datasets
pietrolesci/pile-deduped-pythia-preshuffled
4977 downloads
['task_categories:text-generation''language:en''size_categories:100M<n<1B'
omar-sharif/BAD-Bengali-Aggressive-Text-Dataset
4957 downloads
['size_categories:10K<n<100K''format:csv''modality:tabular'
SimpleStories/SimpleStories
4865 downloads
['task_categories:text-generation''language:en''license:mit'
bigcode/the-stack-dedup
4829 downloads
['task_categories:text-generation''language_creators:crowdsourced''language_creators:expert-generated'
Salesforce/fineweb_deduplicated
4780 downloads
['license:odc-by''size_categories:1B<n<10B''format:parquet'
data-is-better-together/fineweb-c
4708 downloads
['task_categories:text-classification''language:lvs''language:kor'
TencentARC/VPData
4670 downloads
['task_categories:image-to-video''task_categories:text-to-video''language:en'
HuggingFaceGECLM/REDDIT_threaded
4633 downloads
['size_categories:10M<n<100M''format:parquet''modality:tabular'
lerobot/aloha_static_cups_open
4414 downloads
['task_categories:robotics''license:mit''size_categories:10K<n<100K'
WenhaoWang/VidProM
4384 downloads
['task_categories:text-to-video''task_categories:text-to-image''source_datasets:original'