From 7f52582e8737da5146157161fcd15ae27155b1ae Mon Sep 17 00:00:00 2001
From: kthompson25 <kthompson25@wisc.edu>
Date: Tue, 1 Oct 2024 12:39:15 +0200
Subject: [PATCH] Initial commit

---
 Code/00_BBS Initial Cleaning.R        | 387 ++++++++++++++++++++++++++
 Code/01_Creating spatial BBS routes.R | 290 +++++++++++++++++++
 Code/02_BBS Final Cleaning.R          | 117 ++++++++
 3 files changed, 794 insertions(+)
 create mode 100644 Code/00_BBS Initial Cleaning.R
 create mode 100644 Code/01_Creating spatial BBS routes.R
 create mode 100644 Code/02_BBS Final Cleaning.R

diff --git a/Code/00_BBS Initial Cleaning.R b/Code/00_BBS Initial Cleaning.R
new file mode 100644
index 0000000..444f2ab
--- /dev/null
+++ b/Code/00_BBS Initial Cleaning.R	
@@ -0,0 +1,387 @@
+###############################################################################
+########                                                               ########
+########               Downloading and Cleaning BBS Data               ######## 
+########                                                               ########
+###############################################################################
+
+# Author: Kimberly Thompson
+
+# This code does initial 
+# cleaning and preprocessing of the North American Breeding Bird 
+# Survey Dataset including: stop level data, migrants (stop-level format),
+# routes, and weather.
+
+# Main preprocessing step is to add a unique route ID to each dataset, which 
+# simply combines the state ID with the route ID, so that each dataset can
+# talk to one another
+
+# Website for data download
+# https://www.sciencebase.gov/catalog/item/5ea04e9a82cefae35a129d65
+
+# state level: 1966-2023, 50 stops along each route aggregated into 10-stop
+#              bins
+# stop level: 1997-2023, data for each of 50 stops recorded
+
+
+# Download includes:
+# States - 62 individual files, 1 for each state and province - 1966-2023
+# Stop_Data - 10 files, with 5-9 states represented in each file, data for
+#             all 50 stops for each route, officially 1997-2023 but some other
+#             older years sneak in 
+# Migrants - counts of non-breeding birds, stop data format, data for
+#             all 50 stops for each route, officially 1997-2023 but some other
+#             older years sneak in 
+# Migrant Summary - counts for non-breeding birds, state data format,
+#                   1966-2023
+# Routes - Route information plus the lat long coordinate of the route's
+#          starting point
+# Species List - Links AOU (4-digit species ID code) with order, family, genus
+#                and species
+# Vehicle Data - vehicle counts and excessive noise ??? no info in metadata
+# Weather - route quality information and weather conditions - more info in 
+#           metadata
+
+# Why do we focus only on stop level data here?
+# Because we will match with MODIS data from 2001-2023; therefore the stop level
+# data fits our timeline AND in case we decide to incorporate some method for
+# detection probability, the stop level data provides the best option for that.
+
+
+# Produces datasets:
+
+
+
+
+
+########## clean workspace and load required packages ####################
+
+# clean workspace to improve efficiency: #
+# rm(list = ls() )
+# gc() #releases memory
+
+library(tidyverse) # Data cleaning
+
+
+###############################################
+###                                         ###
+###          Stop-Level Data                ###
+###             Processing                  ###
+###############################################
+
+# Combine data into 1 CSV file, add a unique identifier for each route
+
+# File path - set to Bioviewpoint folder
+path <- "I:/mas/04_personal/Kim_T/BioViewPoint"
+stop.list <- list.files(paste(path, "/00_Data/Raw/BBS/Stop_Data",
+                              sep = ""))
+
+
+# Create blank dataframe
+master <- data.frame()
+
+for (i in 1:length(stop.list)) {
+  
+  setwd(path)
+  
+  tmp.file <- read.csv(stop.list[i], header = TRUE)
+  
+  master <- rbind(master, tmp.file)
+  
+  print(i)
+  
+  }
+
+# Column RPID represents 3-digit run protocol ID
+# 101: Standard BBS Protocol: 3-minute counts, 1 observer, single run per year
+# 102: Standard BBS Protocol, Replicate 1: 3-minute counts, 1 observer 
+#      (same or different person), second run in year
+# 103: Standard BBS Protocol, Replicate 1: 3-minute counts, 1 observer 
+#      (same or different person), second run in year
+# 104: Standard BBS Protocol, Replicate 3: 3-minute counts, 1 observer 
+#      (same or different person), fourth run in year.
+# 203: Experimental Protocol: Double-Observer Independent: 3-minute counts, 
+#      2 observer method, single run per year, Secondary Observer
+# 501: Experimental Protocol: Distance and time-of-detection methods: Three 
+#      1-minute sample periods; two distance bands (0-50 m, >50-400m)
+# 502: Experimental Protocol: Distance and time-of-detection methods, Rep1: 
+#      Three 1-minute sample periods; two distance bands (0-50 m, >50-400m),
+#      second run in year.
+
+# Frequency of each protocol type
+master %>%
+  count(RPID) %>%
+  mutate(prop = prop.table(n))
+
+# We will keep only protocol 101 which represents 99.4% of the data
+master <- master %>%
+  filter(RPID == 101)
+
+# Define both statenum and Route as character columns so that trailing zeros
+# are retained
+master$StateNum <- as.character(master$StateNum)
+master$Route <- as.character(master$Route)
+
+# Create a unique route identifier
+master$unique_route <- paste(master$StateNum, master$Route, sep = "_")
+
+# Filter data to be only for 2001 - 2023
+master <- master %>%
+  filter(Year >= 2001)
+
+# How many routes do we have given these initial filters?
+length(unique(master$unique_route))
+# 4944
+
+
+###############################################
+###                                         ###
+###          Stop-Level Data                ###
+###       Processing Time Series            ###
+###############################################
+
+# To ensure the time series analysis can proceed, we need to see which routes
+# do not have sufficient observations within the 2001-2023 period
+
+# Make a vector of unique route numbers
+route.num <- unique(master$unique_route)
+
+# Create a data frame in which to store results
+total.years <- data.frame(Route = character(), Total = integer())
+
+# Loop to calculate the number of years for the routes
+for (i in 1:length(route.num)) {
+  
+  # find the unique years for each unique route
+  years <- length(unique(master$Year[master$unique_route == route.num[i]]))
+  
+  # Create a dataframe for consecutive years
+  tmp.df <- data.frame(Route = route.num[i], Total = years)
+  
+  # Rbind to the total.years dataframe
+  total.years <- rbind(total.years, tmp.df)
+  
+  print(i)
+  
+}
+
+# Frequency of total years surveyed between 2001-2023
+total.years %>%
+  count(Total) %>%
+  mutate(prop = prop.table(n))
+
+# Retain routes that have been surveyed at least 15 years out of the 22 possible
+# *** This threshold could be adjusted later depending on the causal methodology
+#     we use
+
+# Create a vector of routes to retain
+routes_to_retain <- total.years$Route[total.years$Total >= 15]
+
+# Filter the master df according to routes to retain
+master <- master %>%
+  filter(unique_route %in% routes_to_retain)
+
+# Create a column for the total abundance (summed across each stop)
+master$Total.Abund <- rowSums(master[, grep("^Stop", names(master))],
+                              na.rm = TRUE)
+
+# Reorder columns
+master <- master[ , c(1:4, 58, 5:57, 59)]
+
+# clean up workspace
+rm(tmp.df, tmp.file, total.years, i, path, route.num, stop.list,
+   years)
+
+
+###############################################
+###                                         ###
+###                 Migrants                ###
+###                Processing               ###
+###############################################
+
+# RPID, unique_route, year range, routes-to-retain, Total abundance
+
+migrants <- read.csv(paste(path, "/00_Data/Raw/BBS/Migrants.csv",
+                           sep = ""),
+                           header = TRUE)
+
+# keep only protocol 101
+migrants <- migrants %>%
+  filter(RPID == 101)
+
+# Define both statenum and Route as character columns so that trailing zeros
+# are retained
+migrants$StateNum <- as.character(migrants$StateNum)
+migrants$Route <- as.character(migrants$Route)
+
+# Create a unique route identifier
+migrants$unique_route <- paste(migrants$StateNum, migrants$Route, sep = "_")
+
+# Filter data to be only for 2001 - 2023
+migrants <- migrants %>%
+  filter(Year >= 2001)
+
+# Filter the migrants df according to routes to retain
+migrants <- migrants %>%
+  filter(unique_route %in% routes_to_retain)
+
+length(unique(migrants$unique_route))
+# 1418 routes of the 2605 in the master data have info on migrants
+
+# Create a column for the total abundance (summed across each stop)
+migrants$Total.Abund <- rowSums(migrants[, grep("^Stop", names(migrants))],
+                              na.rm = TRUE)
+
+
+###############################################
+###                                         ###
+###                 Routes                  ###
+###               Processing                ###
+###############################################
+
+routes <- read.csv(paste(path, "/00_Data/Raw/BBS/Routes.csv",
+                         sep = ""),
+                   header = TRUE)
+
+# Active: active (1) or discontinued (0)
+
+# RouteTypeID: indicates if a route was established along a roadside or 
+# on a body of water (1 = Roadside, 2 = water)
+
+# RouteTypeDetailID: Indicates route length and selection criteria
+# 1: Location of route randomly established, 50 point counts
+# 2: Location of route non-randomly established, 50 point counts
+# 3: Location of route non-randomly established, < 50 point counts
+
+# Frequency of DetailID types
+routes %>%
+  count(RouteTypeDetailID) %>%
+  mutate(prop = prop.table(n))
+
+# Remove type 3 which accounts for 0.003% of routes
+routes <- routes %>%
+  filter(RouteTypeDetailID != 3)
+
+# Define both statenum and Route as character columns so that trailing zeros
+# are retained
+routes$StateNum <- as.character(routes$StateNum)
+routes$Route <- as.character(routes$Route)
+
+# Create a unique route identifier
+routes$unique_route <- paste(routes$StateNum, routes$Route, sep = "_")
+
+
+
+###############################################
+###                                         ###
+###            Weather/Quality              ###
+###               Processing                ###
+###############################################
+
+# RPID, unique_route, year range, routes-to-retain, Total abundance
+
+weather <- read.csv(paste(path, "/00_Data/Raw/BBS/Weather.csv",
+                          sep = ""),
+                    header = TRUE)
+
+# keep only protocol 101
+weather <- weather %>%
+  filter(RPID == 101)
+
+# Define both statenum and Route as character columns so that trailing zeros
+# are retained
+weather$StateNum <- as.character(weather$StateNum)
+weather$Route <- as.character(weather$Route)
+
+# Create a unique route identifier
+weather$unique_route <- paste(weather$StateNum, weather$Route, sep = "_")
+
+# Filter data to be only for 2001 - 2023
+weather <- weather %>%
+  filter(Year >= 2001)
+
+# Filter the weather df according to routes to retain
+weather <- weather %>%
+  filter(unique_route %in% routes_to_retain)
+
+length(unique(weather$unique_route))
+# 2605 
+
+# Month: Month surveyed; values range from April - July. We may want to filter 
+# this further depending on what we acquire for MODIS, or just include this 
+# in the analysis
+
+# Day: Day surveyed; same as above
+
+# Quality Current ID: indicates if the sampling took place under suitable 
+# weather conditions and met suitable time, data, and route completion criteria
+# 1 - Data meet quality control criteria
+# 0 - Data do not meet one or more quality control criteria
+
+# Remove samplings with a 0 for quality control
+weather <- weather %>%
+  filter(QualityCurrentID == 1)
+
+# Runtype: identifies which data were collected consistent with all standard
+# BBS criteria
+# 1: consistent
+# 0: not consistent
+
+# Remove samplings with a 0 for RunType
+weather <- weather %>%
+  filter(RunType == 1)
+
+
+
+###############################################
+###                                         ###
+###      Last step in Initial Cleaning      ###
+###                                         ###
+###############################################
+
+
+# Each dataset has been pared to retain only routes with high quality and 
+# adherence to protocols
+
+length(unique(master$unique_route))
+# 2605
+
+length(unique(routes$unique_route))
+# 5805
+
+length(unique(weather$unique_route))
+# 2455
+
+# The routes in weather should be used to further pare the master and routes 
+# dataframes, as well as the migrant dataframe.
+
+final.routes <- unique(weather$unique_route)
+
+# Filter the master df according to final.routes
+master <- master %>%
+  filter(unique_route %in% final.routes)
+
+# Filter the routes df according to final.routes
+routes <- routes %>%
+  filter(unique_route %in% final.routes)
+
+migrants <- migrants %>%
+  filter(unique_route %in% final.routes)
+
+
+
+# Write the resulting files
+write.csv(master, paste(path,
+                        "/00_Data/Processed/BBS/1st Cleaning/BBS_StopLev_01_23_15plus.csv", 
+                        sep = ""), row.names = FALSE)
+
+write.csv(migrants, paste(path,
+                          "/00_Data/Processed/BBS/1st Cleaning/BBS_MigStop_01_23_15plus.csv", 
+                          sep = ""), row.names = FALSE)
+
+write.csv(routes, paste(path,
+                        "/00_Data/Processed/BBS/1st Cleaning/BBS_Routes.csv",
+                        sep = ""), row.names = FALSE)
+
+write.csv(weather, paste(path,
+                         "/00_Data/Processed/BBS/1st Cleaning/BBS_Weather_Quality.csv",
+                         sep = ""), row.names = FALSE)
\ No newline at end of file
diff --git a/Code/01_Creating spatial BBS routes.R b/Code/01_Creating spatial BBS routes.R
new file mode 100644
index 0000000..1c7a62f
--- /dev/null
+++ b/Code/01_Creating spatial BBS routes.R	
@@ -0,0 +1,290 @@
+############################################################################################
+########                                                                            ########
+########              Determining spatial coordinates of BBS routes                 ######## 
+########                                                                            ########
+############################################################################################
+
+# Author: Kimberly Thompson
+
+# This code creates a simple features (package sf) dataframe of spatial points for each route
+# in the North American Breeding Bird Survey Dataset (subsetted to SW as a pilot).
+# The name of the route (as an identifier that can be matched with the biodiversity data)
+# is also included.
+
+# Important websites:
+# Download shapefile of BBS routes from
+# https://www.mbr-pwrc.usgs.gov/bbs/geographic_information/Instructions_trend_route.htm
+
+# This shapefile of routes is from the 2004 release, the USGS no longer maintains
+# data on the route paths so we have to work with what we have. 
+
+########## clean workspace and load required packages ####################
+
+# clean workspace to improve efficiency: #
+rm(list = ls() )
+gc() #releases memory
+
+library(sp)
+library(sf)
+library(units)
+
+library(ggplot2) # Plotting
+
+library(tidyverse) # Data organization
+
+# Set path to Bioviewpoint folder
+path <- "I:/mas/04_personal/Kim_T/BioViewPoint"
+
+
+###############################################
+###                                         ###
+###   Some Notes about Coordinate Systems   ###
+###                                         ###
+###############################################
+
+# Deciding which projection to use
+# https://gis.stackexchange.com/questions/104005/choosing-projected-coordinate-system-for-mapping-all-us-states
+# Albers Equal Area Conic (Heinrich Albers, 1805): Like Lambert Conformal Conic, 
+# this is a very popular map projection for the US, Canada and other continental/large countries
+# with a primarily E-W extent. Used by the USGS for maps showing the conterminous United States
+# (48 states) or large areas of the United States. Used for many thematic maps, especially
+# choropleth and dot density maps.
+
+# From searchable list of codes at http://www.spatialreference.org 
+# Could not actually find the epsg code for this reference though
+# EPSG code found at this site
+#https://guides.library.duke.edu/r-geospatial/CRS
+# epsg 5070: USA_Contiguous_Albers_Equal_Area_Conic
+
+
+###############################################
+###                                         ###
+###        Load Shapefiles and CSVs         ###
+###                                         ###
+###############################################
+
+
+# Define the coordinate system
+albers = sp:: CRS("+init=epsg:5070")
+
+# Load USA and Canada shapefile
+usa.shape <- sf :: st_read(paste(path, "/00_Data/Raw/USACANAB.shp",
+                                 sep = ""))
+
+# Define coordintate system as albers:
+usa.shape <- sf :: st_set_crs( usa.shape, albers )
+
+
+# Load BBS routes shapefile
+route.shapefile <- sf :: st_read(paste(path, 
+                                       "/00_Data/Raw/BBS/GIS/nabbs02_mis_alb.shp",
+                                       sep = ""))
+
+# Define coordintate system as albers:
+route.shapefile <- sf :: st_set_crs( route.shapefile, albers )
+
+
+# Load the BBS route data
+bbs_route.data <- read.csv(paste(path, "/00_Data/Processed/BBS/BBS_Routes.csv",
+                                 sep = ""),
+                           header = TRUE)
+
+
+
+###############################################
+###                                         ###
+###    Convert bbs.routes into sf object    ###
+###                                         ###
+###                                         ###
+###############################################
+
+# Make bbs.route.data an sf object (lat, long in degrees)
+bbs_route.data_sf <- sf :: st_as_sf(x = bbs_route.data,
+                          coords = c("Longitude", "Latitude"),
+                          crs = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
+
+# Convert crs to albers:
+bbs_route.data_sf <- sf :: st_transform( bbs_route.data_sf, albers )
+
+
+
+###############################################
+###                                         ###
+###    Find which route shapes match the    ###
+###           bbs observations              ###
+###                                         ###
+###############################################
+
+# The spatial data provided in the BBS download gives the starting point of
+# each route. We need to match the starting point to the vector of route 
+# shapes.
+
+# Find the route shape feature that is closest to the bbs observations (point)
+closest.route <- st_nearest_feature(bbs_route.data_sf, route.shapefile) 
+
+# Check that the order of the vector of indices for the closest linestring lines up
+# with the corresponding point
+plot(route.shapefile$geometry[closest.route[1]])
+plot(bbs_route.data_sf$geometry[1], add = TRUE)
+
+
+###############################################
+###                                         ###
+###    Calculate the distance btw the       ###
+###  staring point and the closest route    ###
+###                                         ###
+###############################################
+
+# Find the nearest points on the linestrings
+nearest.points <- st_nearest_points(bbs_route.data_sf, 
+                                    route.shapefile[closest.route, ],
+                                    pairwise = TRUE)
+# Calculate distances
+distances <- st_length(nearest.points)
+
+# Make dataframe from distances and convert to integer
+df <- data.frame(distance = distances)
+df$distance <- as.integer(df$distance)
+
+# Examine frequencies of distances (in meters)
+ggplot(df, aes(x = distance)) +
+  geom_histogram(binwidth = 100, fill = "steelblue", color = "black") +
+  scale_x_continuous(lim = c(0, 2500)) +
+  scale_y_continuous(lim = c(0, 300)) +
+  labs(title = "Histogram of Distances",
+       x = "Distance",
+       y = "Frequency") +
+  theme_minimal()
+
+# We need to set a threshold for the distance which we fine to be too far to 
+# confidently say that the starting point of the route matches the linestring
+# of the route
+# 1 km (1000 meters) seems appropriate
+
+# Add the distances to bbs_route.data_sf 
+bbs_route.data_sf$dist_to_route <- distances
+
+
+###############################################
+###                                         ###
+###       Merging point geometry and        ###
+###         linestring geometry             ###
+###                                         ###
+###############################################
+
+# Reduce the Route shapefile based on the vector of indices - this already has
+# it in the right order
+updated.route.shapefile <- route.shapefile[closest.route, ]
+
+# Create a dataframe with all the information in both the bbs routes df and the
+# updated.route df but with the point geometry only
+bbs_route.data_sf.point <- cbind(bbs_route.data_sf, updated.route.shapefile) 
+bbs_route.data_sf.point$geometry.1 <- NULL
+
+# Create a dataframe with all the information in both the bbs routes df and the
+# updated.route df but with the route shape (linestring) geometry only
+bbs_route.data_sf.line <- cbind(bbs_route.data_sf, updated.route.shapefile) 
+bbs_route.data_sf.line$geometry <- NULL
+names(bbs_route.data_sf.line)[21] <- "geometry"
+
+
+###############################################
+###                                         ###
+###     Verifying the merge by comparing    ###
+###        route names and examining        ###
+###             distances                   ###
+###############################################
+
+# Make a sepearate dataframe (to aid in visual inspection) with the route
+# names
+
+name.comparison <- data.frame(Point.Name = bbs_route.data_sf.point$RouteName,
+                              Line.Name = bbs_route.data_sf.point$SRTENAME,
+                              Logical = bbs_route.data_sf.point$RouteName == 
+                                bbs_route.data_sf.point$SRTENAME,
+                              Distance = 
+                                as.integer(bbs_route.data_sf.point$dist_to_route))
+
+# Examine the frequency of true/falses
+name.comparison %>%
+  count(Logical) %>%
+  mutate(Prop = prop.table(n))
+
+# 152 falses, 2 NAs
+
+# Visual inspection of these instances to see if it is really a mismatch or
+# simply a slight different in how the names were recorded.
+# It seems like the distance is a better way to determine the routes to retain
+# than matching the names because for some routes it seems the naming convention
+# changed either slightly or to a total renaming.
+
+# check if after removing the high distances does this reduce the number of 
+# falses
+name.comparison_reduced <- name.comparison %>%
+  filter(Distance <= 1000)
+
+# Examine the frequency of true/falses
+name.comparison_reduced %>%
+  count(Logical) %>%
+  mutate(Prop = prop.table(n))
+
+###############################################
+###                                         ###
+###     Pare dataframes to observations     ###
+###        that fit distance criteria       ###
+###                                         ###
+###############################################
+
+# Distance threshold: 1 km
+
+bbs_route.data_sf.line <- bbs_route.data_sf.line %>%
+  filter(dist_to_route <= set_units(1000, "m"))
+
+bbs_route.data_sf.point <- bbs_route.data_sf.point %>%
+  filter(dist_to_route <= set_units(1000, "m")) 
+
+
+###############################################
+###                                         ###
+###     Final Cleaning: Exclude Canada      ###
+###               and Alaska                ###
+###                                         ###
+###############################################
+
+# 840 is the country number of the US
+# 3 is the state number of Alaska
+
+bbs_route.data_sf.line <- bbs_route.data_sf.line %>%
+  filter(bbs_route.data_sf.line$CountryNum == 840 &
+           bbs_route.data_sf.line$StateNum != 3)
+
+bbs_route.data_sf.point <- bbs_route.data_sf.point %>%
+  filter(bbs_route.data_sf.point$CountryNum == 840 &
+           bbs_route.data_sf.point$StateNum != 3)
+
+
+# examine results
+plot(bbs_route.data_sf.line$geometry,
+     col = "black")
+plot(usa.shape$geometry, add = TRUE)
+plot(bbs_route.data_sf.point$geometry, 
+     col = "red", add = TRUE)
+
+
+###############################################
+###                                         ###
+###            Write sf objects             ###
+###                                         ###
+###                                         ###
+###############################################
+
+# Write each sf dataframe as a geo json file
+
+st_write(bbs_route.data_sf.point, 
+         paste(path, "/00_Data/Processed/BBS_Rtes_Point.geojson",
+                                        sep = ""))
+st_write(bbs_route.data_sf.line, 
+         paste(path, "/00_Data/Processed/BBS_Rtes_Linestring.geojson",
+                                       sep = ""))
+
+
diff --git a/Code/02_BBS Final Cleaning.R b/Code/02_BBS Final Cleaning.R
new file mode 100644
index 0000000..fd5ca7c
--- /dev/null
+++ b/Code/02_BBS Final Cleaning.R	
@@ -0,0 +1,117 @@
+###############################################################################
+########                                                               ########
+########                  Final Cleaning of BBS Data                   ######## 
+########                                                               ########
+###############################################################################
+
+# Author: Kimberly Thompson   
+
+# This code harmonizes BBS data including stop-level breeding species counts,
+# route data, weather data, and migrant data with the spatial data for the 
+# routes, which has already undergone final cleaning to represent only 
+# quality routes (adhering to quality and protocols) in the contiguous USA.
+
+
+########## clean workspace and load required packages ####################
+
+# clean workspace to improve efficiency: #
+# rm(list = ls() )
+# gc() #releases memory
+
+library(tidyverse) # Data cleaning
+library(sf)
+
+
+###############################################
+###                                         ###
+###             Data Loading                ###
+###                                         ###
+###############################################
+
+# Set path to Bioviewpoint folder (where data resides)
+path <- "~/Library/CloudStorage/GoogleDrive-mainelobster28@gmail.com/.shortcut-targets-by-id/1HhR4gs3fKXyAXBhQom6t69gAqR6SeKhx/BioViewPoint"
+
+# Load Stop level breeding data
+bbs.data <- read.csv(paste(path, 
+                           "/00_Data/Processed/BBS/1st Cleaning/BBS_StopLev_01_23__15plus.csv",
+                           sep = ""),
+                     header = TRUE)
+
+# Load Migrant data
+migrant.data <- read.csv(paste(path, 
+                           "/00_Data/Processed/BBS/1st Cleaning/BBS_MigStop_01_23__15plus.csv",
+                           sep = ""),
+                     header = TRUE)
+
+# Load route data
+route.data <- read.csv(paste(path, 
+                             "/00_Data/Processed/BBS/1st Cleaning/BBS_Routes.csv",
+                             sep = ""),
+                       header = TRUE)
+
+# Load Weather data
+weather.data <- read.csv(paste(path, 
+                               "/00_Data/Processed/BBS/1st Cleaning/BBS_Weather_Quality.csv",
+                               sep = ""),
+                         header = TRUE)
+
+# Load the spatial point file
+spatial.data <- sf :: st_read(paste(path,
+                                    "/00_Data/Processed/BBS/BBS_Rtes_Point.geojson",
+                                    sep = ""))
+
+
+###############################################
+###                                         ###
+###          Harmonize the Data             ###
+###                                         ###
+###############################################
+
+# Dataframes need to match the routes present in the spatial data
+
+# Make a list of routes to retain
+routes.to.retain <- unique(spatial.data$unique_route)
+
+# Filter each dataframe to be only these routes
+bbs.data <- bbs.data %>%
+  filter(unique_route %in% routes.to.retain)
+
+migrant.data <- migrant.data %>%
+  filter(unique_route %in% routes.to.retain)
+
+route.data <- route.data %>%
+  filter(unique_route %in% routes.to.retain)
+
+weather.data <- weather.data %>%
+  filter(unique_route %in% routes.to.retain)
+
+# All dfs now have 1984 unique routes (with the exception of migrants where
+# not all routes had migrants)
+
+###############################################
+###                                         ###
+###        Write the resulting dfs          ###
+###                                         ###
+###############################################
+
+write.csv(bbs.data, paste(path,
+                        "/00_Data/Processed/BBS/Final_Cleaning/BBS_StopLev_01_23_15plus.csv", 
+                        sep = ""), row.names = FALSE)
+
+write.csv(migrant.data, paste(path,
+                          "/00_Data/Processed/BBS/Final_Cleaning/BBS_MigStop_01_23_15plus.csv", 
+                          sep = ""), row.names = FALSE)
+
+write.csv(route.data, paste(path,
+                        "/00_Data/Processed/BBS/Final_Cleaning/BBS_Routes.csv",
+                        sep = ""), row.names = FALSE)
+
+write.csv(weather.data, paste(path,
+                         "/00_Data/Processed/BBS/Final_Cleaning/BBS_Weather_Quality.csv",
+                         sep = ""), row.names = FALSE)
+
+
+
+
+
+