diff --git a/src/huggingface_hub/templates/datasetcard_template.md b/src/huggingface_hub/templates/datasetcard_template.md index 6d9281f9d3..f8cb4c80bf 100644 --- a/src/huggingface_hub/templates/datasetcard_template.md +++ b/src/huggingface_hub/templates/datasetcard_template.md @@ -1,103 +1,143 @@ --- -# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1 +# For reference on dataset card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1 # Doc / guide: https://huggingface.co/docs/hub/datasets-cards {{ card_data }} --- # Dataset Card for {{ pretty_name | default("Dataset Name", true) }} -## Dataset Description + -- **Homepage:** {{ homepage_url | default("", true)}} -- **Repository:** {{ repo_url | default("", true)}} -- **Paper:** {{ paper_url | default("", true)}} -- **Leaderboard:** {{ leaderboard_url | default("", true)}} -- **Point of Contact:** {{ point_of_contact | default("", true)}} +{{ dataset_summary | default("", true) }} -### Dataset Summary +## Dataset Details -{{ dataset_summary | default("[More Information Needed]", true)}} +### Dataset Description -### Supported Tasks and Leaderboards + -{{ supported_tasks_and_leaderboards_section | default("[More Information Needed]", true)}} +{{ dataset_description | default("", true) }} -### Languages +- **Curated by:** {{ curators | default("[More Information Needed]", true)}} +- **Funded by [optional]:** {{ funded_by | default("[More Information Needed]", true)}} +- **Shared by [optional]:** {{ shared_by | default("[More Information Needed]", true)}} +- **Language(s) (NLP):** {{ language | default("[More Information Needed]", true)}} +- **License:** {{ license | default("[More Information Needed]", true)}} -{{ languages_section | default("[More Information Needed]", true)}} +### Dataset Sources [optional] -## Dataset Structure + + +- **Repository:** {{ repo | default("[More Information Needed]", true)}} +- **Paper [optional]:** {{ paper | default("[More Information Needed]", true)}} +- **Demo [optional]:** {{ demo | default("[More Information Needed]", true)}} + +## Uses -### Data Instances + -{{ data_instances_section | default("[More Information Needed]", true)}} +### Direct Use -### Data Fields + -{{ data_fields_section | default("[More Information Needed]", true)}} +{{ direct_use | default("[More Information Needed]", true)}} -### Data Splits +### Out-of-Scope Use -{{ data_splits_section | default("[More Information Needed]", true)}} + + +{{ out_of_scope_use | default("[More Information Needed]", true)}} + +## Dataset Structure + + + +{{ dataset_structure | default("[More Information Needed]", true)}} ## Dataset Creation ### Curation Rationale + + {{ curation_rationale_section | default("[More Information Needed]", true)}} ### Source Data -#### Initial Data Collection and Normalization + + +#### Data Collection and Processing + + -{{ data_collection_section | default("[More Information Needed]", true)}} +{{ data_collection_and_processing_section | default("[More Information Needed]", true)}} -#### Who are the source language producers? +#### Who are the source data producers? -{{ source_language_producers_section | default("[More Information Needed]", true)}} + -### Annotations +{{ source_data_producers_section | default("[More Information Needed]", true)}} + +### Annotations [optional] + + #### Annotation process + + {{ annotation_process_section | default("[More Information Needed]", true)}} #### Who are the annotators? + + {{ who_are_annotators_section | default("[More Information Needed]", true)}} -### Personal and Sensitive Information +#### Personal and Sensitive Information + + + +{{ personal_and_sensitive_information | default("[More Information Needed]", true)}} + +## Bias, Risks, and Limitations + + + +{{ bias_risks_limitations | default("[More Information Needed]", true)}} + +### Recommendations -{{ personal_and_sensitive_information_section | default("[More Information Needed]", true)}} + -## Considerations for Using the Data +{{ bias_recommendations | default("Users should be made aware of the risks, biases and limitations of the dataset. More information needed for further recommendations.", true)}} -### Social Impact of Dataset +## Citation [optional] -{{ social_impact_section | default("[More Information Needed]", true)}} + -### Discussion of Biases +**BibTeX:** -{{ discussion_of_biases_section | default("[More Information Needed]", true)}} +{{ citation_bibtex | default("[More Information Needed]", true)}} -### Other Known Limitations +**APA:** -{{ known_limitations_section | default("[More Information Needed]", true)}} +{{ citation_apa | default("[More Information Needed]", true)}} -## Additional Information +## Glossary [optional] -### Dataset Curators + -{{ dataset_curators_section | default("[More Information Needed]", true)}} +{{ glossary | default("[More Information Needed]", true)}} -### Licensing Information +## More Information [optional] -{{ licensing_information_section | default("[More Information Needed]", true)}} +{{ more_information | default("[More Information Needed]", true)}} -### Citation Information +## Dataset Card Authors [optional] -{{ citation_information_section | default("[More Information Needed]", true)}} +{{ dataset_card_authors | default("[More Information Needed]", true)}} -### Contributions +## Dataset Card Contact -{{ contributions_section | default("[More Information Needed]", true)}} +{{ dataset_card_contact | default("[More Information Needed]", true)}} \ No newline at end of file diff --git a/src/huggingface_hub/templates/modelcard_template.md b/src/huggingface_hub/templates/modelcard_template.md index ec2d18d427..8c9243fbd6 100644 --- a/src/huggingface_hub/templates/modelcard_template.md +++ b/src/huggingface_hub/templates/modelcard_template.md @@ -19,6 +19,7 @@ {{ model_description | default("", true) }} - **Developed by:** {{ developers | default("[More Information Needed]", true)}} +- **Funded by [optional]:** {{ funded_by | default("[More Information Needed]", true)}} - **Shared by [optional]:** {{ shared_by | default("[More Information Needed]", true)}} - **Model type:** {{ model_type | default("[More Information Needed]", true)}} - **Language(s) (NLP):** {{ language | default("[More Information Needed]", true)}} @@ -77,7 +78,7 @@ Use the code below to get started with the model. ### Training Data - + {{ training_data | default("[More Information Needed]", true)}} @@ -108,7 +109,7 @@ Use the code below to get started with the model. #### Testing Data - + {{ testing_data | default("[More Information Needed]", true)}} diff --git a/tests/test_repocard.py b/tests/test_repocard.py index 454d3df3ee..8cef6ba04e 100644 --- a/tests/test_repocard.py +++ b/tests/test_repocard.py @@ -886,10 +886,8 @@ def test_dataset_card_from_default_template_with_template_variables(self): # Here we pass the card data as kwargs as well so template picks up pretty_name. card = DatasetCard.from_template( card_data, - homepage_url="https://huggingface.co", - repo_url="https://github.com/huggingface/huggingface_hub", - paper_url="https://arxiv.org/pdf/1910.03771.pdf", - point_of_contact="https://huggingface.co/nateraw", + repo="https://github.com/huggingface/huggingface_hub", + paper="https://arxiv.org/pdf/1910.03771.pdf", dataset_summary=( "This is a test dataset card to check if the template variables " "in the dataset card template are working." @@ -898,8 +896,8 @@ def test_dataset_card_from_default_template_with_template_variables(self): self.assertTrue(card.text.strip().startswith("# Dataset Card for My Cool Dataset")) self.assertIsInstance(card, DatasetCard) - matches = re.findall(r"Homepage:\*\* https:\/\/huggingface\.co", str(card)) - self.assertEqual(matches[0], "Homepage:** https://huggingface.co") + matches = re.findall(r"Repository:\*\* https://github\.com/huggingface/huggingface_hub", str(card)) + self.assertEqual(matches[0], "Repository:** https://github.com/huggingface/huggingface_hub") @require_jinja def test_dataset_card_from_custom_template(self):