From 06ba7f6340bfed85f3e3b1f5ac79e132e49b9721 Mon Sep 17 00:00:00 2001 From: Mason Fidino Date: Tue, 20 Aug 2019 08:54:13 -0500 Subject: [PATCH 01/11] Getting some R stuff on .gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index b54e14c..ec9c713 100644 --- a/.gitignore +++ b/.gitignore @@ -93,3 +93,7 @@ ENV/ # Image files **/*.JPG + +# Some R stuff +.Rproj.user +*.Rproj From 26fd36cb36d788628310c239d5d439f73344a983 Mon Sep 17 00:00:00 2001 From: Mason Fidino Date: Tue, 20 Aug 2019 15:47:27 -0500 Subject: [PATCH 02/11] Create process__predict_example.R --- autofocus/predict/process__predict_example.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 autofocus/predict/process__predict_example.R diff --git a/autofocus/predict/process__predict_example.R b/autofocus/predict/process__predict_example.R new file mode 100644 index 0000000..e69de29 From 3128dd868247b18db82604871c74e4986b877359 Mon Sep 17 00:00:00 2001 From: Mason Fidino Date: Tue, 20 Aug 2019 15:49:46 -0500 Subject: [PATCH 03/11] Some more assumptions added to the top --- autofocus/predict/process__predict_example.R | 217 +++++++++++++++++++ 1 file changed, 217 insertions(+) diff --git a/autofocus/predict/process__predict_example.R b/autofocus/predict/process__predict_example.R index e69de29..a7fa215 100644 --- a/autofocus/predict/process__predict_example.R +++ b/autofocus/predict/process__predict_example.R @@ -0,0 +1,217 @@ +#Examples of how to make requests agains the image classification endpoints +#Note: +# 1. This assumes that the image_classifier_api is running +# (i.e., using docker run -p 8000:8000 gaganden/autofocus_serve) +# 2. It also assumes that the api address is at 127.0.0.1 +# (which should be the case) +# 3. Assumes that your current working directory is +# './GitHub/autofocus/autofocus/predict' +# 4. Assumes that the images you are going to send to autofocus have +# not been preprocessed at all. + +#Library requirements: +# RCurl, jsonlite, dplyr, magick, zip, progress + +library(RCurl) +library(jsonlite) +library(magick) +library(zip) +library(progress) +library(dplyr) + +find_image_files <- function(search_dir, + image_list = c("jpeg","jpg","bmp","png", "JPG")){ + # Utility function to find all recursively find all image files + # starting from a directory + + # Args: + # search_dir(character): the starting directory path from which to search + # image_list(list): a list of acceptable file formats + + # Returns: + # image_files(list): list containing the paths of all image files found. + # Each element in this list is a vector of at least 10 images. This split + # is done so that the images can be zipped and sent to autofocus. + + file_list <- list.files(search_dir, recursive = TRUE, full.names = TRUE) + image_files <- file_list[grep(paste(image_list, collapse = "|"), file_list)] + image_files <- normalizePath(image_files, winslash = "/") + # normalize the path, then split into groups of max 10 image + n_groups <- ceiling(length(image_files) / 10) + image_files <- split(image_files, + sort(rep_len(1:n_groups, length(image_files)))) + + return(image_files) +} + + +process_images <- function(image_files = NULL){ + # Utility function to preprocess images to be sent to autofocus + + # Args: + # image_files(list): the output object from find_images() + + # Returns: + # a list: This list has two elements: + # 1. zip(character): A vector of the temporary zip files to be sent to + # autofocus. + # 2. dict(named character): a key-value pair that links the temporary + # image file to the actual file. The elements in this vector are + # the names of the temporary files while the names are the full + # paths to the file names. + + if(!is(image_files, 'list')) + stop('image_files must be a list.') + + if(any(sapply(image_files, length)>10)) + stop('One of the elements is image_files has > 10 images.') + + dict_list <- vector('list', length = length(image_files)) + zip_vector <- rep(NA, length(image_files)) + + cat(paste('Processing', length(unlist(image_files)), 'images...\n')) + + pb <- progress_bar$new( + format = "Images processed [:bar] :elapsed | eta: :eta", + total = length(unlist(image_files)), + width = 60 + ) + + for(photo_group in seq.int(length(image_files))){ + + file_pattern <- paste0("file_",stringr::str_pad(1:length(image_files[[photo_group]]), + width = 2, pad = "0"),"_") + # make some temporary file names + tmp_name <- tempfile(pattern = file_pattern, + fileext = rep('.jpg', + length(image_files[[photo_group]]))) + # sort them + tmp_name <- sort(tmp_name) + + # dictionary to line up temps to actual photo + dict <- sapply(strsplit(tmp_name, "\\\\|/"), function(x) x[length(x)]) + names(dict) <- image_files[[photo_group]] + + # Read in iamge, crop 198 from the bottom, resize to 512 pixels tall, + # then save as a temporary image. + for(image in seq.int(length(image_files[[photo_group]]))){ + pb$tick() + image_read(image_files[[photo_group]][image]) %>% + image_crop(., paste0(image_info(.)$width, + "x", + image_info(.)$height-198)) %>% + image_resize(., '760x512!') %>% + image_write(., tmp_name[image]) + } + + # zip the temporary files together + tmp_zip <- tempfile(fileext = ".zip") + zipr(tmp_zip, tmp_name) + dict_list[[photo_group]] <- dict + zip_vector[photo_group] <- tmp_zip + if(file.exists(tmp_zip)){ + unlink(tmp_name) + } + } + + # return the dictionary and the name of the zipped file. + return(list(zip = zip_vector, dict = dict_list)) + } + + + + + +post_zips <- function(processed_images = NULL, + uri = "http://localhost:8000/predict_zip"){ + # send the zip files to autofocus + + # Args: + # processed_images(list): the output from process_images() + # uri(character): the location autofocus is running + + #Returns: + # response(tibble): A tibble of guesses for each image supplied to + # autofocus. The columns, save for the last one, have species names + # and represent the likelihood that this species is in the image. + # The last column is the file name of the image. +cat(paste('Posting', length(processed_images$zip), + 'zip file(s) to autofocus...\n')) + +pb <- progress_bar$new( + format = "Files processed [:bar] :elapsed | eta: :eta", + total = length(unlist(processed_images$zip)), + width = 60 +) +# the object that initially contains the autofocus json +response <- vector('list', length(processed_images$zip)) +for(zippy in seq.int(length(processed_images$zip))){ + pb$tick() + # post to autofocus + response[[zippy]] <- fromJSON(postForm(uri, + file = fileUpload(processed_images$zip[zippy]), + .checkParams = FALSE)) + + # get the file names from autofocus + file_names <- strsplit(names(response[[zippy]]), "/") + file_names <- sapply(file_names, function(x) x[length(x)]) + file_names <- strsplit(file_names, "_") + file_names <- as.numeric(sapply(file_names, '[[', 2)) + # and line it up with what we did during image processing + OG_file_names <- names(processed_images$dict[[zippy]])[file_names] + # provide a warning just incase autofocus did not ID a specific image + if(!length(OG_file_names) == length(processed_images$dict[[zippy]]) ){ + warning(paste('Autofocus did not ID all images in zip file number', zippy)) + } + # put the file name into each nested list object + for(image in seq.int(length(response[[zippy]]))){ + response[[zippy]][[image]]$file <- OG_file_names[image] + } +} +# bind the list of lists, then bind the list of tibbles +response <- lapply(response, bind_rows) %>% bind_rows +return(response) +} + +most_likely <- function(response_frame = NULL){ + # Utility function that provides the best guess from each classification + + # Args: + # response_frame(tibble): the output from post_zips() + + # Returns: + # A tibble that has three columns: + # 1) file: the file name + # 2) species: the species most likely to be in the image + # 3) probability autofocus's confidence of this classification + + # Find which column has the highest likelihood + best_guess <- apply(response_frame[,-grep('file', colnames(response_frame))], + 1, which.max) + # Grab the highest likelihood + best_prob <- apply(response_frame[,-grep('file', colnames(response_frame))], + 1, max) + # Correspond the highest likelihood to a species name + species_name <- colnames(response_frame)[best_guess] + + # the object to return + to_return <- tibble(file = response_frame$file, + species = species_name, + probability = best_prob) + return(to_return) +} + + + + +# where are the photos located +search_dir <- "./images/" + +all_images <- find_image_files(search_dir) + +processed_images <- process_images(all_images) + +my_ids <- post_zips(processed_images) + +best_ids <- most_likely(my_ids) + From 41dccce7c583b6aab262c78fad946fee70d392cf Mon Sep 17 00:00:00 2001 From: Mason Fidino Date: Thu, 22 Aug 2019 11:15:30 -0500 Subject: [PATCH 04/11] made extensions a constant in `find_image_files` --- autofocus/predict/process__predict_example.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/autofocus/predict/process__predict_example.R b/autofocus/predict/process__predict_example.R index a7fa215..cd0c5d7 100644 --- a/autofocus/predict/process__predict_example.R +++ b/autofocus/predict/process__predict_example.R @@ -19,22 +19,24 @@ library(zip) library(progress) library(dplyr) -find_image_files <- function(search_dir, - image_list = c("jpeg","jpg","bmp","png", "JPG")){ +find_image_files <- function(search_dir = NULL){ # Utility function to find all recursively find all image files # starting from a directory # Args: # search_dir(character): the starting directory path from which to search - # image_list(list): a list of acceptable file formats # Returns: # image_files(list): list containing the paths of all image files found. # Each element in this list is a vector of at least 10 images. This split # is done so that the images can be zipped and sent to autofocus. + valid_extensions <- c("jpeg", "jpg", "bmp", "png") + valid_extensions <- c(valid_extensions, toupper(valid_extensions)) + file_list <- list.files(search_dir, recursive = TRUE, full.names = TRUE) - image_files <- file_list[grep(paste(image_list, collapse = "|"), file_list)] + image_files <- file_list[grep(paste(valid_extensions, + collapse = "|"), file_list)] image_files <- normalizePath(image_files, winslash = "/") # normalize the path, then split into groups of max 10 image n_groups <- ceiling(length(image_files) / 10) From b3bdbc3a83f30d6bee25518515c904acd9574acb Mon Sep 17 00:00:00 2001 From: Mason Fidino Date: Thu, 22 Aug 2019 11:16:29 -0500 Subject: [PATCH 05/11] dropped `= NULL` from `proces_images` argument --- autofocus/predict/process__predict_example.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autofocus/predict/process__predict_example.R b/autofocus/predict/process__predict_example.R index cd0c5d7..4911e1e 100644 --- a/autofocus/predict/process__predict_example.R +++ b/autofocus/predict/process__predict_example.R @@ -47,7 +47,7 @@ find_image_files <- function(search_dir = NULL){ } -process_images <- function(image_files = NULL){ +process_images <- function(image_files){ # Utility function to preprocess images to be sent to autofocus # Args: From 2fefc0702f52e79f0fae72674fcd6009df93dbd2 Mon Sep 17 00:00:00 2001 From: Mason Fidino Date: Thu, 22 Aug 2019 11:17:38 -0500 Subject: [PATCH 06/11] Hugged if statements --- autofocus/predict/process__predict_example.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/autofocus/predict/process__predict_example.R b/autofocus/predict/process__predict_example.R index 4911e1e..64d9ef7 100644 --- a/autofocus/predict/process__predict_example.R +++ b/autofocus/predict/process__predict_example.R @@ -62,11 +62,13 @@ process_images <- function(image_files){ # the names of the temporary files while the names are the full # paths to the file names. - if(!is(image_files, 'list')) + if(!is(image_files, 'list')){ stop('image_files must be a list.') + } - if(any(sapply(image_files, length)>10)) + if(any(sapply(image_files, length)>10)){ stop('One of the elements is image_files has > 10 images.') + } dict_list <- vector('list', length = length(image_files)) zip_vector <- rep(NA, length(image_files)) From ab199401a111ad08fa26b7510e301a4571e3a2d9 Mon Sep 17 00:00:00 2001 From: Mason Fidino Date: Thu, 22 Aug 2019 11:22:30 -0500 Subject: [PATCH 07/11] Calling functions via their libraries --- autofocus/predict/process__predict_example.R | 26 ++++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/autofocus/predict/process__predict_example.R b/autofocus/predict/process__predict_example.R index 64d9ef7..f0b0416 100644 --- a/autofocus/predict/process__predict_example.R +++ b/autofocus/predict/process__predict_example.R @@ -75,7 +75,7 @@ process_images <- function(image_files){ cat(paste('Processing', length(unlist(image_files)), 'images...\n')) - pb <- progress_bar$new( + pb <- progress::progress_bar$new( format = "Images processed [:bar] :elapsed | eta: :eta", total = length(unlist(image_files)), width = 60 @@ -100,17 +100,17 @@ process_images <- function(image_files){ # then save as a temporary image. for(image in seq.int(length(image_files[[photo_group]]))){ pb$tick() - image_read(image_files[[photo_group]][image]) %>% - image_crop(., paste0(image_info(.)$width, + magick::image_read(image_files[[photo_group]][image]) %>% + magick::image_crop(., paste0(image_info(.)$width, "x", - image_info(.)$height-198)) %>% - image_resize(., '760x512!') %>% - image_write(., tmp_name[image]) + magick::image_info(.)$height-198)) %>% + magick::image_resize(., '760x512!') %>% + magick::image_write(., tmp_name[image]) } # zip the temporary files together tmp_zip <- tempfile(fileext = ".zip") - zipr(tmp_zip, tmp_name) + zip::zipr(tmp_zip, tmp_name) dict_list[[photo_group]] <- dict zip_vector[photo_group] <- tmp_zip if(file.exists(tmp_zip)){ @@ -142,7 +142,7 @@ post_zips <- function(processed_images = NULL, cat(paste('Posting', length(processed_images$zip), 'zip file(s) to autofocus...\n')) -pb <- progress_bar$new( +pb <- progress::progress_bar$new( format = "Files processed [:bar] :elapsed | eta: :eta", total = length(unlist(processed_images$zip)), width = 60 @@ -152,9 +152,9 @@ response <- vector('list', length(processed_images$zip)) for(zippy in seq.int(length(processed_images$zip))){ pb$tick() # post to autofocus - response[[zippy]] <- fromJSON(postForm(uri, - file = fileUpload(processed_images$zip[zippy]), - .checkParams = FALSE)) + response[[zippy]] <- jsonlite::fromJSON(RCurl::postForm(uri, + file = RCurl::fileUpload(processed_images$zip[zippy]), + .checkParams = FALSE)) # get the file names from autofocus file_names <- strsplit(names(response[[zippy]]), "/") @@ -173,7 +173,7 @@ for(zippy in seq.int(length(processed_images$zip))){ } } # bind the list of lists, then bind the list of tibbles -response <- lapply(response, bind_rows) %>% bind_rows +response <- lapply(response, dplyr::bind_rows) %>% dplyr::bind_rows return(response) } @@ -199,7 +199,7 @@ most_likely <- function(response_frame = NULL){ species_name <- colnames(response_frame)[best_guess] # the object to return - to_return <- tibble(file = response_frame$file, + to_return <- dplyr::tibble(file = response_frame$file, species = species_name, probability = best_prob) return(to_return) From 05b93f76069a30d0116fb07885dee3d3a33ad169 Mon Sep 17 00:00:00 2001 From: Mason Fidino Date: Thu, 22 Aug 2019 11:32:10 -0500 Subject: [PATCH 08/11] corrupt image fix, referencing issues --- autofocus/predict/process__predict_example.R | 26 ++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/autofocus/predict/process__predict_example.R b/autofocus/predict/process__predict_example.R index f0b0416..9e4a0db 100644 --- a/autofocus/predict/process__predict_example.R +++ b/autofocus/predict/process__predict_example.R @@ -83,24 +83,36 @@ process_images <- function(image_files){ for(photo_group in seq.int(length(image_files))){ - file_pattern <- paste0("file_",stringr::str_pad(1:length(image_files[[photo_group]]), + # get paths to files + image_file_names <- image_files[[photo_group]] + + # files with 0 kb are corrupt, remove them + file_sizes <- file.size(image_file_names) + if(any(file_sizes == 0)){ + image_file_names <- image_file_names[-which(image_file_names == 0)] + } + + # count number of files + num_files <- length(image_file_names) + + file_pattern <- paste0("file_",stringr::str_pad(1:num_files, width = 2, pad = "0"),"_") # make some temporary file names tmp_name <- tempfile(pattern = file_pattern, - fileext = rep('.jpg', - length(image_files[[photo_group]]))) + fileext = rep('.jpg', num_files)) + # sort them tmp_name <- sort(tmp_name) - # dictionary to line up temps to actual photo + # line up temps to actual photo names dict <- sapply(strsplit(tmp_name, "\\\\|/"), function(x) x[length(x)]) - names(dict) <- image_files[[photo_group]] + names(dict) <- image_file_names # Read in iamge, crop 198 from the bottom, resize to 512 pixels tall, # then save as a temporary image. - for(image in seq.int(length(image_files[[photo_group]]))){ + for(image in seq.int(num_files)){ pb$tick() - magick::image_read(image_files[[photo_group]][image]) %>% + magick::image_read(image_file_names[image]) %>% magick::image_crop(., paste0(image_info(.)$width, "x", magick::image_info(.)$height-198)) %>% From 8491404297b6864fb6d9d0e51ae1d4e2dabb9302 Mon Sep 17 00:00:00 2001 From: Mason Fidino Date: Thu, 22 Aug 2019 11:38:27 -0500 Subject: [PATCH 09/11] tmp_name to tmp_names --- autofocus/predict/process__predict_example.R | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/autofocus/predict/process__predict_example.R b/autofocus/predict/process__predict_example.R index 9e4a0db..f3e2f56 100644 --- a/autofocus/predict/process__predict_example.R +++ b/autofocus/predict/process__predict_example.R @@ -95,17 +95,18 @@ process_images <- function(image_files){ # count number of files num_files <- length(image_file_names) - file_pattern <- paste0("file_",stringr::str_pad(1:num_files, - width = 2, pad = "0"),"_") + file_pattern <- paste0("file_", + stringr::str_pad(1:num_files, width = 2, pad = "0"), + "_") # make some temporary file names - tmp_name <- tempfile(pattern = file_pattern, - fileext = rep('.jpg', num_files)) + tmp_names <- tempfile(pattern = file_pattern, + fileext = rep('.jpg', num_files)) # sort them - tmp_name <- sort(tmp_name) + tmp_names <- sort(tmp_names) # line up temps to actual photo names - dict <- sapply(strsplit(tmp_name, "\\\\|/"), function(x) x[length(x)]) + dict <- sapply(strsplit(tmp_names, "\\\\|/"), function(x) x[length(x)]) names(dict) <- image_file_names # Read in iamge, crop 198 from the bottom, resize to 512 pixels tall, @@ -117,16 +118,16 @@ process_images <- function(image_files){ "x", magick::image_info(.)$height-198)) %>% magick::image_resize(., '760x512!') %>% - magick::image_write(., tmp_name[image]) + magick::image_write(., tmp_names[image]) } # zip the temporary files together tmp_zip <- tempfile(fileext = ".zip") - zip::zipr(tmp_zip, tmp_name) + zip::zipr(tmp_zip, tmp_names) dict_list[[photo_group]] <- dict zip_vector[photo_group] <- tmp_zip if(file.exists(tmp_zip)){ - unlink(tmp_name) + unlink(tmp_names) } } From 551b73c39f3d3316f8d84a59672071b223cffc8a Mon Sep 17 00:00:00 2001 From: Mason Fidino Date: Thu, 22 Aug 2019 11:39:34 -0500 Subject: [PATCH 10/11] two lines between funcitions --- autofocus/predict/process__predict_example.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/autofocus/predict/process__predict_example.R b/autofocus/predict/process__predict_example.R index f3e2f56..00ceecf 100644 --- a/autofocus/predict/process__predict_example.R +++ b/autofocus/predict/process__predict_example.R @@ -136,9 +136,6 @@ process_images <- function(image_files){ } - - - post_zips <- function(processed_images = NULL, uri = "http://localhost:8000/predict_zip"){ # send the zip files to autofocus @@ -190,6 +187,7 @@ response <- lapply(response, dplyr::bind_rows) %>% dplyr::bind_rows return(response) } + most_likely <- function(response_frame = NULL){ # Utility function that provides the best guess from each classification @@ -219,8 +217,6 @@ most_likely <- function(response_frame = NULL){ } - - # where are the photos located search_dir <- "./images/" From e8dd85480c7b7e6caa9dd2401ae2252c7d98ac7c Mon Sep 17 00:00:00 2001 From: Mason Fidino Date: Thu, 22 Aug 2019 11:41:53 -0500 Subject: [PATCH 11/11] Slayed `NULL` defaults --- autofocus/predict/process__predict_example.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/autofocus/predict/process__predict_example.R b/autofocus/predict/process__predict_example.R index 00ceecf..15eeea2 100644 --- a/autofocus/predict/process__predict_example.R +++ b/autofocus/predict/process__predict_example.R @@ -19,7 +19,7 @@ library(zip) library(progress) library(dplyr) -find_image_files <- function(search_dir = NULL){ +find_image_files <- function(search_dir){ # Utility function to find all recursively find all image files # starting from a directory @@ -136,7 +136,7 @@ process_images <- function(image_files){ } -post_zips <- function(processed_images = NULL, +post_zips <- function(processed_images, uri = "http://localhost:8000/predict_zip"){ # send the zip files to autofocus @@ -188,7 +188,7 @@ return(response) } -most_likely <- function(response_frame = NULL){ +most_likely <- function(response_frame){ # Utility function that provides the best guess from each classification # Args: