BayAreaMetro · jhelsel11 · Oct 8, 2018 · Oct 8, 2018 · Oct 9, 2018 · Oct 9, 2018
diff --git a/make-uniform/production/Build Standard Database.Rmd b/make-uniform/production/Build Standard Database.Rmd
@@ -72,9 +72,8 @@ user_list <- data.frame(
 When adding a new operator, the user must: add the path to the survey data in 
 the code block below, e.g., `f_bart_survey_path`
 ```{r file-names}
-me <- Sys.getenv("USERNAME")
 dir_path <- user_list %>%
-  filter(user == me) %>%
+  filter(user == Sys.getenv("USERNAME")) %>%
   .$path
 
 f_spatial_to_be_geocoded_path <- paste0(dir_path, 
@@ -108,19 +107,19 @@ f_vta_survey_path <- paste0(dir_path,
   "VTA/As CSV/VTA_DRAFTFINAL_20171114 NO POUND OR SINGLE QUOTE.csv")
 
 f_output_rdata_path <- paste0(dir_path, 
-  "_data Standardized/survey_standard.RData")
+  "_data Standardized/survey_standard.RDS")
 
 f_output_csv_path <- paste0(dir_path, 
   "_data Standardized/survey_standard.csv")
 
 f_ancillary_output_rdata_path <- paste0(dir_path, 
-  "_data Standardized/ancillary_variables.RData")
+  "_data Standardized/ancillary_variables.RDS")
 
 f_ancillary_output_csv_path <- paste0(dir_path, 
   "_data Standardized/ancillary_variables.csv")
 
 f_output_decom_rdata_path <- paste0(dir_path, 
-  "_data Standardized/decomposition/survey_decomposition.RData")
+  "_data Standardized/decomposition/survey_decomposition.RDS")
 
 f_output_decom_csv_path <- paste0(dir_path, 
   "_data Standardized/decomposition/survey_decomposition.csv")

diff --git a/make-uniform/production/compare_against_previous.R b/make-uniform/production/compare_against_previous.R
@@ -4,8 +4,7 @@ load("~/GitHub/onboard-surveys/Data and Reports/_data Standardized/survey_standa
 previous_df <- survey.standard %>%
   rename(unique_ID = Unique_ID)
 
-load("~/GitHub/onboard-surveys/Data and Reports/_data Standardized/survey_standard.Rdata")
-current_df <- survey_standard
+current_df <- readRDS("~/GitHub/onboard-surveys/Data and Reports/_data Standardized/survey_standard.Rdata")
 
 find_differences <- function(anti_outcomes_df, diffed_df) {
 
@@ -47,11 +46,26 @@ find_differences <- function(anti_outcomes_df, diffed_df) {
 
 }
 
-anti_df <- anti_join(previous_df, current_df, by = c("unique_ID"))
-diff_df <- find_differences(anti_df, current_df)
+# do both ways
+anti_previous_df <- anti_join(previous_df, current_df, by = c("unique_ID"))
+diff_previous_df <- find_differences(anti_df, current_df)
 
-relevant_df <- diff_df %>%
-  filter(!(previous_outcome == "missing" & current_outcome == "NA"))
+anti_current_df <- anti_join(current_df, previous_df, by = c("unique_ID"))
+diff_current_df <- find_differences(anti_current_df, previous_df)
 
-table(thin_df$var_name)
+# update the Caltrain IDs and do again
+update_current_df <- current_df %>%
+  mutate(ID = ifelse(str_detect(ID, "S"), str_replace(ID, "S", ""), ID)) %>%
+  mutate(unique_ID = paste(ID, operator, survey_year, sep = "---"))
+
+anti_previous_df <- anti_join(previous_df, update_current_df, by = c("unique_ID"))
+diff_previous_df <- find_differences(anti_df, current_df)
+
+anti_current_df <- anti_join(update_current_df, previous_df, by = c("unique_ID"))
+diff_current_df <- find_differences(anti_current_df, previous_df)
+
+# okay they now match
+
+
+