Datasets
ibrahimhamamci/CT-RATE
42418 downloads
['license:cc-by-nc-sa-4.0''size_categories:100K<n<1M''format:csv'
nexa-collaboration/fineweb_filtered
35132 downloads
['size_categories:100M<n<1B''format:parquet''modality:tabular'
asahi417/seamless-align-enA-viA.speaker-embedding.xlsr-2b
29867 downloads
['size_categories:100K<n<1M''format:parquet''modality:tabular'
nvidia/PhysicalAI-Robotics-Manipulation-Kitchen
27946 downloads
['task_categories:robotics''license:cc-by-4.0''size_categories:100K<n<1M'
CohereLabs/aya_collection_language_split
27717 downloads
['language:ace''language:afr''language:amh'
mlfoundations/dclm-baseline-1.0-parquet
27717 downloads
['language:en''license:cc-by-4.0''size_categories:1B<n<10B'
EpicPinkPenguin/procgen
27629 downloads
['task_categories:reinforcement-learning''language:en''license:apache-2.0'
HPLT/HPLT2.0_cleaned
26753 downloads
['task_categories:fill-mask''task_categories:text-generation''task_ids:language-modeling'
asahi417/seamless-align-enA-frA.speaker-embedding.xlsr-2b
25042 downloads
['size_categories:100K<n<1M''format:parquet''modality:tabular'
prince-canuma/fineweb-CC-MAIN-2024-10-6B-en
24731 downloads
['size_categories:1M<n<10M''format:parquet''modality:tabular'