Datasets
LLM360/CrystalCoderDatasets
4722 downloads
['language:en''license:odc-by''arxiv:2207.14255'
pscotti/mindeyev2
4718 downloads
['size_categories:100K<n<1M''modality:text''region:us']
hysts-bot-data/daily-papers
4716 downloads
['size_categories:1K<n<10K''format:json''modality:text'
AiAF/SCPWiki-Cleaned-PDF-Archives
4497 downloads
['task_categories:text-generation''license:cc0-1.0''region:us']
data-is-better-together/fineweb-c
4708 downloads
['task_categories:text-classification''language:lvs''language:kor'
codeparrot/apps
4704 downloads
['task_categories:text-generation''task_ids:language-modeling''language_creators:crowdsourced'
thuml/UTSD
4687 downloads
['task_categories:time-series-forecasting''license:apache-2.0''size_categories:100K<n<1M'
Shitao/bge-m3-data
4683 downloads
['size_categories:100K<n<1M''modality:text''arxiv:2402.03216'
Genesis-Intelligence/assets
4683 downloads
['license:mit''size_categories:n<1K''modality:3d'
TRoboto/names
4675 downloads
['license:cc-by-4.0''size_categories:10K<n<100K''modality:text'