Skip to content

Commit

Permalink
added utilities for working with ucf dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Apr 25, 2024
1 parent b5f0467 commit d3f7111
Show file tree
Hide file tree
Showing 7 changed files with 540 additions and 0 deletions.
40 changes: 40 additions & 0 deletions Ethosight/bin/ucf_extract_true_negatives.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

# Check if correct number of arguments are passed
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <video_base_dir> <output_base_dir>"
exit 1
fi

# Assign command line arguments to variables
video_base_dir="$1"
output_base_dir="$2"
annotations_file="ucf_temporal_annotations.txt" # Explicitly set the annotations file name

# Ensure output directory exists
mkdir -p "$output_base_dir"

# Read the annotation file
while IFS= read -r line; do
read -ra fields <<< "$line"
filename="${fields[0]}"
label="${fields[1]}"

# Proceed only if the label is "Normal"
if [ "$label" == "Normal" ]; then
# Find the file in the directory hierarchy
found_files=$(find "$video_base_dir" -type f -name "$filename")

if [ -z "$found_files" ]; then
echo "Warning: File not found - $filename"
continue
fi

video_path=$(echo "$found_files" | head -n 1)

# Extract every 10th frame as a true negative sample
ffmpeg -i "$video_path" -vf "select='not(mod(n,10))'" -vsync vfr "$output_base_dir/tn_${filename%.*}_%03d.png"
fi

done < "$annotations_file"

44 changes: 44 additions & 0 deletions Ethosight/bin/ucf_extract_true_negatives_hard.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash

# Check if correct number of arguments are passed
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <video_base_dir> <output_base_dir>"
exit 1
fi

# Assign command line arguments to variables
video_base_dir="$1"
output_base_dir="$2"
annotations_file="ucf_temporal_annotations.txt" # Explicitly set the annotations file name

# Ensure output directory exists
mkdir -p "$output_base_dir"

# Read the annotation file
while IFS= read -r line; do
read -ra fields <<< "$line"
filename="${fields[0]}"
label="${fields[1]}"
start_frame="${fields[2]}"
end_frame="${fields[3]}"

# Skip 'Normal' labeled videos
if [ "$label" == "Normal" ]; then
continue
fi

# Find the file in the directory hierarchy
found_files=$(find "$video_base_dir" -type f -name "$filename")

if [ -z "$found_files" ]; then
echo "Warning: File not found - $filename"
continue
fi

video_path=$(echo "$found_files" | head -n 1)

# Extract frames outside the known anomaly ranges (before start_frame and after end_frame)
ffmpeg -i "$video_path" -vf "select='not(mod(n,10))*(lt(n,$start_frame) + gt(n,$end_frame))'" -vsync vfr "$output_base_dir/hard_tn_${filename%.*}_%03d.png"

done < "$annotations_file"

49 changes: 49 additions & 0 deletions Ethosight/bin/ucf_extract_true_positives.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash

# Check if correct number of arguments are passed
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <video_base_dir> <output_base_dir>"
exit 1
fi

# Assign command line arguments to variables
video_base_dir="$1"
output_base_dir="$2"

# Ensure output directory exists
mkdir -p "$output_base_dir"

# Assuming annotations are in the current directory named 'ucf_temporal_annotations.txt'
while IFS= read -r line; do
# Read fields separated by space
read -ra fields <<< "$line"
filename="${fields[0]}"
start_frame="${fields[2]}"
end_frame="${fields[3]}"

# Find the file in the directory hierarchy
found_files=$(find "$video_base_dir" -type f -name "$filename")

# Check if file was found
if [ -z "$found_files" ]; then
echo "Warning: File not found - $filename"
continue
fi

# Assuming only one file will match, or handling the first match
video_path=$(echo "$found_files" | head -n 1)

# Extract frames if the start and end frames are not -1
if [ "$start_frame" -ne -1 ] && [ "$end_frame" -ne -1 ]; then
ffmpeg -i "$video_path" -vf "select='not(mod(n,10))*between(n,$start_frame,$end_frame)'" -vsync vfr "$output_base_dir/${filename%.*}_${start_frame}_to_${end_frame}_%03d.png"
fi

# Check if there is a second event; fields are indexed starting from 4 and 5
event2_start="${fields[4]}"
event2_end="${fields[5]}"
if [ "$event2_start" -ne -1 ] && [ "$event2_end" -ne -1 ]; then
ffmpeg -i "$video_path" -vf "select='not(mod(n,10))*between(n,$event2_start,$event2_end)'" -vsync vfr "$output_base_dir/${filename%.*}_${event2_start}_to_${event2_end}_%03d.png"
fi

done < "ucf_temporal_annotations.txt"

75 changes: 75 additions & 0 deletions Ethosight/bin/ucf_generate_dataset.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/bin/bash

# Check the number of arguments
if [ "$#" -ne 6 ]; then
echo "Usage: $0 <true_positive_dir> <frame_set_count> <true_positive_count> <true_negative_dir> <true_negative_count> <outputdir>"
exit 1
fi

# Assign arguments to variables
true_positive_dir="$1"
frame_set_count="$2"
true_positive_count="$3"
true_negative_dir="$4"
true_negative_count="$5"
outputdir="$6"

# Create Output Directories
mkdir -p "$outputdir/true_positives"
mkdir -p "$outputdir/true_negatives"

# Function to Process Directories and Ensure Video Diversity
function process_directory() {
local src_dir=$1
local set_count=$2
local total_sets=$3
local dest_dir=$4

declare -A video_counts # Tracks the number of frames each video has contributed

# Preprocess to find and shuffle files
find "$src_dir" -type f -print0 | shuf --zero-terminated | while IFS= read -r -d $'\0' file; do
local base_name=$(basename "$file")
local video_name="${base_name%_*}" # Extract the base video name without frame index

# Ensure video count is initialized
if [ -z "${video_counts[$video_name]}" ]; then
video_counts[$video_name]=0
fi

# Check if the video has already contributed the desired number of sets
if [ "${video_counts[$video_name]}" -ge 1 ]; then
continue
fi

# Select and copy the correct number of consecutive frames
local frames=($(find "$src_dir" -type f -name "${video_name}_*.png" | sort))
local num_frames=${#frames[@]}
if [ "$num_frames" -ge "$set_count" ]; then
for (( i=0; i<=$num_frames-$set_count; i++ )); do
local frame_set=("${frames[@]:i:set_count}")
for frame in "${frame_set[@]}"; do
cp "$frame" "$dest_dir"
done
((video_counts[$video_name]++))
break # Break after processing the first valid set to ensure only one set per video
done
fi

# Check if required sets have been reached
local total_copied=0
for count in "${video_counts[@]}"; do
((total_copied+=count))
done

if [ "$total_copied" -ge "$total_sets" ]; then
break
fi
done
}

# Process True Positives and True Negatives
process_directory "$true_positive_dir" "$frame_set_count" "$true_positive_count" "$outputdir/true_positives"
process_directory "$true_negative_dir" "$frame_set_count" "$true_negative_count" "$outputdir/true_negatives"

echo "Dataset created in '$outputdir'"
20 changes: 20 additions & 0 deletions Ethosight/bin/ucf_readme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
unzipper.sh
will unzip all files in directory

extract_true_positives.sh
will extract true positives based on temporal annotation file

extract_true_negatives.sh
will extract true negatives based on "normal" videos

extract_true_negatives_hard.sh
will extract true negatives based only on the temporal annotation file and does not include normal files

geneerate_dataset.sh
will create a dataset based on serveral parameters including frame_set_count, true_positive_count, and true_negative_count

frame_set_count: the number of consecutive frames per sample (consecutive in the dataset e.g. _10, _20, _30

true_positive_count, true_negative_count: the number of frame sets per video


Loading

0 comments on commit d3f7111

Please sign in to comment.