Datasets
prince-canuma/fineweb-CC-MAIN-2024-10-1B-en
8190 downloads
['size_categories:1M<n<10M''format:parquet''modality:tabular'
fineinstructions-pretraining/nemotron_synthetic_1T
8174 downloads
['size_categories:1B<n<10B''format:parquet''modality:text'
fineinstructions-pretraining/nemotron_wrap_1T
8150 downloads
['size_categories:100M<n<1B''format:parquet''modality:text'
mikeboss/FIP1
8115 downloads
['task_categories:time-series-forecasting''task_categories:feature-extraction''task_categories:tabular-regression'
mlabonne/guanaco-llama2-1k
8097 downloads
['size_categories:1K<n<10K''format:parquet''modality:text'
wmt/wmt14
8063 downloads
['task_categories:translation''annotations_creators:no-annotation''language_creators:found'
nvidia/describe-anything-dataset
8022 downloads
['task_categories:image-to-text''task_categories:video-text-to-text''language:en'
Vchitect/Vchitect_T2V_DataVerse
8014 downloads
['task_categories:text-to-video''license:apache-2.0''size_categories:1M<n<10M'
tokyotech-llm/swallow-code
7999 downloads
['task_categories:text-generation''language:en''language:ja'
Ammad1Ali/Korean-conversational-dataset
7967 downloads
['size_categories:10K<n<100K''format:csv''modality:text'