Datasets
asahi417/seamless-align-enA-jaA.speaker-embedding.w2vbert-600m
7049 downloads
['size_categories:100K<n<1M''format:parquet''modality:tabular'
deepmind/code_contests
6987 downloads
['task_categories:translation''annotations_creators:found''language_creators:found'
BAAI/IndustryCorpus2
6688 downloads
['language:en''language:zh''license:apache-2.0'
HuggingFaceTB/smoltalk
6442 downloads
['language:en''size_categories:1M<n<10M''format:parquet'
applied-ai-018/pretraining_v1-omega_books
6379 downloads
['size_categories:100M<n<1B''format:parquet''modality:tabular'
scikit-learn/iris
6372 downloads
['license:cc0-1.0''size_categories:n<1K''format:csv'
chengjunyan1/smollm-12.5-corpus
6359 downloads
['size_categories:100M<n<1B''format:parquet''modality:tabular'
lmms-lab/NExTQA
6194 downloads
['size_categories:10K<n<100K''format:parquet''modality:tabular'
nvidia/ClimbMix
6096 downloads
['task_categories:text-generation''language:en''license:cc-by-nc-4.0'
trl-lib/ultrafeedback_binarized
5952 downloads
['size_categories:10K<n<100K''format:parquet''modality:tabular'