Datasets
M3LEO/europe
8648 downloads
['size_categories:1M<n<10M''format:parquet''modality:text'
cboettig/fishbase
8636 downloads
['license:cc-by-nc-4.0''size_categories:10M<n<100M''format:parquet'
DKYoon/SlimPajama-6B
8619 downloads
['task_categories:text-generation''language:en''size_categories:1M<n<10M'
wecover/OPUS_TED2020
8530 downloads
['size_categories:10M<n<100M''format:parquet''modality:text'
ScalingIntelligence/KernelBench
8510 downloads
['size_categories:n<1K''format:parquet''modality:tabular'
OptimalScale/ClimbLab
8453 downloads
['task_categories:text-generation''language:en''license:apache-2.0'
kakaobrain/coyo-700m
8443 downloads
['task_categories:text-to-image''task_categories:image-to-text''task_categories:zero-shot-classification'
OpenAssistant/oasst1
8420 downloads
['language:en''language:es''language:ru'
prince-canuma/fineweb-CC-MAIN-2024-10-1B-en
8190 downloads
['size_categories:1M<n<10M''format:parquet''modality:tabular'
fineinstructions-pretraining/nemotron_synthetic_1T
8174 downloads
['size_categories:1B<n<10B''format:parquet''modality:text'