###How to access ESS data via R

Steps:

Request the data from ESS API using GET() from the httr library
Check status of the response. If you're pulling data via multiple iterations then implement either warn_for_status(x) or stop_for_status(x).
If status is OK (200), then proceed to convert the data from response from JSON character string to an R object using content() as type text
Then convert JSON character string to an R object using fromJSON() function from the jsonlite library
Assign index labels to each data pull (refer to data dictionary above)
Save as dataframes or export as .csv files for offline analysis

#####Data Source 1: Sentiments Data Sample File: https://data.thumbtack.com/v1/sentiments/states

Here is a basic example on how to pull sentiment scores data at the states level. The same can be applied to CBSAs.

library(httr) # functions to send request to ESS API
library(jsonlite) # to convert json data type to R object 

ss_url <- modify_url(paste0("https://data.thumbtack.com/v1/sentiments/states?"))
ss_response <- GET(ss_url)
warn_for_status(ss_response)
ss_text <- content(ss_response, "text")
ss_data <- fromJSON(ss_text, flatten = TRUE)$data

Here is a more complex example of pulling all sentiment scores by demographic variables: gender, age, origin, and employer.

demographics <- c("gender", "age", "origin", "employer")
demographics_levels <- c(2, 6, 5, 3)

demographics_df <- data.frame(demographics, demographics_levels)

for(h in 1:nrow(demographics_df)) {
  
  dem <- demographics_df$demographics[h]

  s_data <- c()
  for(i in 1:demographics_df$demographics_levels[h]){
    s_url <- modify_url(paste0("https://data.thumbtack.com/v1/sentiments/states?", dem, "=", i , "&index=1&industry=0"))
    s_response <- GET(s_url)
    warn_for_status(s_response)
    s_text <- content(s_response, "text")
    s_data_i <- fromJSON(s_text, flatten = TRUE)$data
    s_data_i$index <- i
    s_data <- rbind(s_data, s_data_i)
  }

#Assign labels to the age category codes:
if(dem == "gender"){
    gender_code <- c(Female = 1, Male = 2)
    s_data$gender <- names(gender_code)[match(s_data$index, gender_code)]
} else if (dem == "age"){
    age_code <- c(`Under 25`= 1, `25-34` = 2, `35-44` = 3, `45-54` = 4, `55-64` = 5, `65 or above` = 6)
    s_data$age <- names(age_code)[match(s_data$index, age_code)]
} else if (dem == "origin"){
    origin_code <- c(`Hispanic, Latino, or Spanish` = 1, `Asian` = 2, `Other` = 3, `White (Caucasian)` = 4, `Black or African American ` = 5)
    s_data$origin <- names(origin_code)[match(s_data$index, origin_code)]
} else {
    employer_code <- c(`Non-employer` = 1, `Small Employer` = 2, `Large Employer` = 3)
    s_data$employer <- names(employer_code)[match(s_data$index, employer_code)]
}

write.csv(s_data, paste("s_", dem , "_data.csv", sep = ""), row.names = FALSE)
}

#####Data Source 2: Question Scores Sample File: https://data.thumbtack.com/v1/question-scores
Note that column scores is type list.

library(plyr) # to convert list to dataframe

qs_response <- GET("https://data.thumbtack.com/v1/question-scores")
warn_for_status(qs_response)
qs_text <- content(qs_response, "text")
qs_df <- fromJSON(qs_text, flatten = TRUE)$data

str(qs_df) # check type of each column

Given that the scores column in this data is a list, we want to reshape the data. We can manipulate each item in the list by appending the information in the row of the nested list to each element in the list. Then, we convert this updated list to a data frame.

s <- qs_df$scores

for(i in 1:length(s)) {
    s[[i]]$id <- qs_df$id[i]
    s[[i]]$start_date <- qs_df$start_date[i]
    s[[i]]$end_date <-  qs_df$end_date[i]
}

s_df <- ldply(s, data.frame) # convert s to dataframe
s_df$index <- "National"

#####Data Source 3: Top Problems Sample File: https://data.thumbtack.com/v1/top-problems
Example of pulling all top problem metrics at the industry level for "Landscape and Lawncare"

tp_response <- GET("https://data.thumbtack.com/v1/top-problems?index=1&industry=1")
warn_for_status(tp_response)
tp_text <- content(tp_response, "text")
tp_df <- fromJSON(tp_text, flatten = TRUE)$data
tp_df$industry <- "Landscape and Lawncare"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

R Tutorial.md

R Tutorial.md

Files

R Tutorial.md

Latest commit

History

R Tutorial.md

File metadata and controls