From 7c1e21f1399a66d9146049facc0e23936107e07a Mon Sep 17 00:00:00 2001 From: Mudassar Iqbal Date: Sat, 4 Nov 2023 19:24:25 +0500 Subject: [PATCH] changed extra_urls to json format, added pk-hfad-2 --- .gitignore | 2 +- README.md | 28 ++++++--- extra_urls.json | 60 +++++++++++++++++++ extra_urls.yaml | 22 ------- ...lection_to_label_to_language_to_words.json | 19 ++++++ sign_recordings/recordings_labels.json | 3 + 6 files changed, 102 insertions(+), 32 deletions(-) create mode 100644 extra_urls.json delete mode 100644 extra_urls.yaml diff --git a/.gitignore b/.gitignore index 3de6aa4..e742643 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,7 @@ sign_recordings/reference_clips/**/*.* sign_recordings/videos/**/*.* text_corpora/raw_corpora/**/*.* -text_corpora/supported_substrings_frequency.json +text_corpora/supported_substrings_frequency*.json # keep these so that directory is visible at least !sign_recordings/features/**/.gitkeep diff --git a/README.md b/README.md index 9e183a0..90ecd60 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,11 @@ # sign-language-datasets -Datasets for sign language translation. [See release files for big data files]() + +Datasets for sign language translation. [See release files for big data files](https://github.com/sign-language-translator/sign-language-datasets/releases) This project includes: + - Reference Clips - - pakistan-hamza_foundation (pk-hfad) [788] + - pakistan-hamza_foundation (pk-hfad) [788+1] - wordless (~10 minutes of a person doing everything except signs) - Word Mapping (which spoken language words map to which sign language clips or sequence of clips) - Videos (Recordings) @@ -13,13 +15,17 @@ This project includes: - Text Corpora - A parallel corpus of spoken language sentences and their corresponding sign language gloss (clip sequence) -# Definitions +## Definitions + - gloss: word sequence corresponding to sign sequence for a given complete spoken language text. -# Explanations +## Explanations + - json ... -# Directory Tree +## Directory Tree + +```text sign-language-datasets ├── sign_recordings │ ├── features @@ -27,6 +33,7 @@ This project includes: │ │ │ ├── reference_clips │ │ ├── pk-hfad-1 [788] + │ │ ├── pk-hfad-2 [1] │ │ └── wordless [1] │ │ │ └── videos @@ -63,9 +70,12 @@ This project includes: ├── supported_substrings_frequency.json └── raw_corpora └── wikipedia.json - -# Bonus -##### new file size command ``` + +## Bonus + +**new file size command** + +```bash git status -u --porcelain | awk '{print $2}' | xargs ls -hl | awk '{print $5 "\t" $9}' -``` \ No newline at end of file +``` diff --git a/extra_urls.json b/extra_urls.json new file mode 100644 index 0000000..e581df9 --- /dev/null +++ b/extra_urls.json @@ -0,0 +1,60 @@ +{ + "datasets": { + "features": [ + { + "description": "mediapipe_face_v2_hand_v1", + "file_to_url": { + "features/mediapipe-face-v2-hand-v1_pk-hfad-1_df00001.jsonl.zip": "https://example.com" + } + } + ], + "reference_clips": [ + { + "description": "wordless", + "file_to_url": { + "videos/wordless_wordless.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/wordless_wordless.mp4" + } + }, + { + "description": "pk-hfad-1", + "file_to_url": { + "videos/pk-hfad-1_1.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_1.mp4", + "videos/pk-hfad-1_10.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_10.mp4", + "videos/pk-hfad-1_100.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_100.mp4", + "videos/pk-hfad-1_1000.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_1000.mp4", + "videos/pk-hfad-1_10000.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_10000.mp4", + "videos/pk-hfad-1_100000.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_100000.mp4", + "videos/pk-hfad-1_10000000.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_10000000.mp4", + "videos/pk-hfad-1_4.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_4.mp4", + "videos/pk-hfad-1_5.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_5.mp4", + "videos/pk-hfad-1_6.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_6.mp4", + "videos/pk-hfad-1_7.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_7.mp4", + "videos/pk-hfad-1_8.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_8.mp4", + "videos/pk-hfad-1_9.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_9.mp4", + "videos/pk-hfad-1_a(double-handed-letter).mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_a.double-handed-letter.mp4", + "videos/pk-hfad-1_chips(fries).mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_chips.fries.mp4", + "videos/pk-hfad-1_f(single-handed-letter).mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_f.single-handed-letter.mp4", + "videos/pk-hfad-1_february.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_february.mp4", + "videos/pk-hfad-1_india.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_india.mp4", + "videos/pk-hfad-1_strawberry.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_strawberry.mp4", + "videos/pk-hfad-1_washroom.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_washroom.mp4" + } + }, + { + "description": "pk-hfad-2", + "file_to_url": { + "videos/pk-hfad-2_hour.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-2_hour.mp4" + } + } + ], + "video_recordings": [ + { + "description": "pk-hfad-1", + "file_to_url": { + "videos/pk-hfad-1_df00001_front.zip": "https://example.com" + } + } + ] + }, + "models": {} +} diff --git a/extra_urls.yaml b/extra_urls.yaml deleted file mode 100644 index c48c035..0000000 --- a/extra_urls.yaml +++ /dev/null @@ -1,22 +0,0 @@ -reference_clips: - - name: pk-hfad-1 - files: - - name: videos/pk-hfad-1_1.mp4 - url: https://drive.google.com/uc?export=download&id=122bdHg08J-lhZ7UVyNFM_qREo1SFDGJL - - - name: wordless - files: - - name: videos/wordless_wordless.mp4 - url: https://drive.google.com/uc?export=download&id=13EvV5I5YYcbT6f8eOV_kIF48Is777ZvT - -videos: - - name: pk-hfad-1_df00001 - files: - - name: videos/pk-hfad-1_df00001_1.mp4 - url: https - -features: - - name: mediapipe_face_v2_hand_v1 - files: - - name: features/mediapipe-face-v2-hand-v1_pk-hfad-1_df00001.jsonl - url: http \ No newline at end of file diff --git a/sign_recordings/collection_to_label_to_language_to_words.json b/sign_recordings/collection_to_label_to_language_to_words.json index 8ea3772..d9d387c 100644 --- a/sign_recordings/collection_to_label_to_language_to_words.json +++ b/sign_recordings/collection_to_label_to_language_to_words.json @@ -8543,6 +8543,25 @@ ] } }, + "pk-hfad-2": { + "hour": { + "english": [ + "hour" + ], + "urdu": [ + "گھنٹہ", + "گھنٹے" + ], + "hindi": [ + "घंटा(समय)", + "घंटे" + ], + "roman-urdu": [ + "ghanta", + "ghanty" + ] + } + }, "wordless": { "wordless": { "english": [ diff --git a/sign_recordings/recordings_labels.json b/sign_recordings/recordings_labels.json index 5a0a511..02562c5 100644 --- a/sign_recordings/recordings_labels.json +++ b/sign_recordings/recordings_labels.json @@ -791,5 +791,8 @@ "یقینا", "یہ", "یہاں" + ], + "pk-hfad-2": [ + "hour" ] } \ No newline at end of file