Datasets
codeparrot/github-code-clean
2807 downloads
['license:apache-2.0''size_categories:10M<n<100M''modality:text'
imomayiz/darija-english
2805 downloads
['task_categories:translation''language:ar''language:en'
Teklia/IAM-line
2801 downloads
['task_categories:image-to-text''language:en''license:mit'
nlphuji/flickr_1k_test_image_text_retrieval
2800 downloads
['size_categories:1K<n<10K''modality:image''modality:text'
ai4privacy/pii-masking-200k
2781 downloads
['task_categories:text-classification''task_categories:token-classification''task_categories:table-question-answering'
yiting/UnsafeBench
2778 downloads
['task_categories:image-classification''size_categories:10K<n<100K''format:parquet'
finiteautomata/news-argentina
2775 downloads
['size_categories:10K<n<100K''format:parquet''modality:text'
mythicinfinity/libritts_r
2769 downloads
['task_categories:text-to-speech''language:en''license:cc-by-4.0'
pixparse/idl-wds
2768 downloads
['task_categories:image-to-text''license:other''size_categories:1M<n<10M'
hatakeyama-llm-team/PMC
2767 downloads
['size_categories:100K<n<1M''format:parquet''modality:text'