Skip to content

Commit

Permalink
changed extra_urls to json format, added pk-hfad-2
Browse files Browse the repository at this point in the history
  • Loading branch information
mdsrqbl committed Nov 4, 2023
1 parent 1456b64 commit 7c1e21f
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 32 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ sign_recordings/reference_clips/**/*.*
sign_recordings/videos/**/*.*

text_corpora/raw_corpora/**/*.*
text_corpora/supported_substrings_frequency.json
text_corpora/supported_substrings_frequency*.json

# keep these so that directory is visible at least
!sign_recordings/features/**/.gitkeep
Expand Down
28 changes: 19 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# sign-language-datasets
Datasets for sign language translation. [See release files for big data files]()

Datasets for sign language translation. [See release files for big data files](https://github.com/sign-language-translator/sign-language-datasets/releases)

This project includes:

- Reference Clips
- pakistan-hamza_foundation (pk-hfad) [788]
- pakistan-hamza_foundation (pk-hfad) [788+1]
- wordless (~10 minutes of a person doing everything except signs)
- Word Mapping (which spoken language words map to which sign language clips or sequence of clips)
- Videos (Recordings)
Expand All @@ -13,20 +15,25 @@ This project includes:
- Text Corpora
- A parallel corpus of spoken language sentences and their corresponding sign language gloss (clip sequence)

# Definitions
## Definitions

- gloss: word sequence corresponding to sign sequence for a given complete spoken language text.

# Explanations
## Explanations

- json ...

# Directory Tree
## Directory Tree

```text
sign-language-datasets
├── sign_recordings
│ ├── features
│ │ └── landmarks
│ │
│ ├── reference_clips
│ │ ├── pk-hfad-1 [788]
│ │ ├── pk-hfad-2 [1]
│ │ └── wordless [1]
│ │
│ └── videos
Expand Down Expand Up @@ -63,9 +70,12 @@ This project includes:
├── supported_substrings_frequency.json
└── raw_corpora
└── wikipedia.json

# Bonus
##### new file size command
```

## Bonus

**new file size command**

```bash
git status -u --porcelain | awk '{print $2}' | xargs ls -hl | awk '{print $5 "\t" $9}'
```
```
60 changes: 60 additions & 0 deletions extra_urls.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"datasets": {
"features": [
{
"description": "mediapipe_face_v2_hand_v1",
"file_to_url": {
"features/mediapipe-face-v2-hand-v1_pk-hfad-1_df00001.jsonl.zip": "https://example.com"
}
}
],
"reference_clips": [
{
"description": "wordless",
"file_to_url": {
"videos/wordless_wordless.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/wordless_wordless.mp4"
}
},
{
"description": "pk-hfad-1",
"file_to_url": {
"videos/pk-hfad-1_1.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_1.mp4",
"videos/pk-hfad-1_10.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_10.mp4",
"videos/pk-hfad-1_100.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_100.mp4",
"videos/pk-hfad-1_1000.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_1000.mp4",
"videos/pk-hfad-1_10000.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_10000.mp4",
"videos/pk-hfad-1_100000.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_100000.mp4",
"videos/pk-hfad-1_10000000.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_10000000.mp4",
"videos/pk-hfad-1_4.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_4.mp4",
"videos/pk-hfad-1_5.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_5.mp4",
"videos/pk-hfad-1_6.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_6.mp4",
"videos/pk-hfad-1_7.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_7.mp4",
"videos/pk-hfad-1_8.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_8.mp4",
"videos/pk-hfad-1_9.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_9.mp4",
"videos/pk-hfad-1_a(double-handed-letter).mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_a.double-handed-letter.mp4",
"videos/pk-hfad-1_chips(fries).mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_chips.fries.mp4",
"videos/pk-hfad-1_f(single-handed-letter).mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_f.single-handed-letter.mp4",
"videos/pk-hfad-1_february.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_february.mp4",
"videos/pk-hfad-1_india.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_india.mp4",
"videos/pk-hfad-1_strawberry.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_strawberry.mp4",
"videos/pk-hfad-1_washroom.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-1_washroom.mp4"
}
},
{
"description": "pk-hfad-2",
"file_to_url": {
"videos/pk-hfad-2_hour.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/pk-hfad-2_hour.mp4"
}
}
],
"video_recordings": [
{
"description": "pk-hfad-1",
"file_to_url": {
"videos/pk-hfad-1_df00001_front.zip": "https://example.com"
}
}
]
},
"models": {}
}
22 changes: 0 additions & 22 deletions extra_urls.yaml

This file was deleted.

19 changes: 19 additions & 0 deletions sign_recordings/collection_to_label_to_language_to_words.json
Original file line number Diff line number Diff line change
Expand Up @@ -8543,6 +8543,25 @@
]
}
},
"pk-hfad-2": {
"hour": {
"english": [
"hour"
],
"urdu": [
"گھنٹہ",
"گھنٹے"
],
"hindi": [
"घंटा(समय)",
"घंटे"
],
"roman-urdu": [
"ghanta",
"ghanty"
]
}
},
"wordless": {
"wordless": {
"english": [
Expand Down
3 changes: 3 additions & 0 deletions sign_recordings/recordings_labels.json
Original file line number Diff line number Diff line change
Expand Up @@ -791,5 +791,8 @@
"یقینا",
"یہ",
"یہاں"
],
"pk-hfad-2": [
"hour"
]
}

0 comments on commit 7c1e21f

Please sign in to comment.