Datasets
Jakh0103/glotlid_processed
2360 downloads
['size_categories:100M<n<1B''format:parquet''modality:text'
cyberagent/crello
2359 downloads
['task_categories:unconditional-image-generation''annotations_creators:no-annotation''language_creators:found'
zeta-alpha-ai/NanoSCIDOCS
2358 downloads
['task_categories:text-retrieval''task_ids:document-retrieval''multilinguality:monolingual'
alexandrainst/nordjylland-news-summarization
2356 downloads
['task_categories:summarization''language:da''license:cc0-1.0'
zeta-alpha-ai/NanoNQ
2342 downloads
['task_categories:text-retrieval''task_ids:document-retrieval''multilinguality:monolingual'
PrimeIntellect/INTELLECT-2-RL-Dataset
2332 downloads
['license:apache-2.0''size_categories:100K<n<1M''format:parquet'
MeissonFlow/park
2283 downloads
['license:apache-2.0''size_categories:1M<n<10M''format:parquet'
ai4bharat/samanantar
2330 downloads
['task_categories:text-generation''task_categories:translation''annotations_creators:no-annotation'
princeton-nlp/QuRatedPajama-260B
2330 downloads
['size_categories:100M<n<1B''format:parquet''modality:tabular'
cc-clean/CC-MAIN-2017-39
2329 downloads
['size_categories:100M<n<1B''format:parquet''modality:text'