-
Notifications
You must be signed in to change notification settings - Fork 4
/
f - table_chem_cell_stats.R
96 lines (80 loc) · 4.59 KB
/
f - table_chem_cell_stats.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
######################################## CALCULATE SUMMARY STATISTICS #######################################
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Purpose: This function calculates summary statistics for chemicals and cell-types
#
# Inputs: nhanes_subset - dataframe of complete demographics, cells, and chemicals
#
# subset_chemicals - dataframe of percent measurements above LOD per included chemical
#
# Outputs: summary statistics - csv file containing all summary statistics for chemicals
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
table_chem_cell_stats <- function(nhanes_subset,
subset_chemicals)
{
library(tidyverse)
#set the number of digits after the decimal point to be 2
# options(digits=2)
#TEMPORARY
# nhanes_subset <- nhanes_subset_dataset
# subset_chemicals <- use_these_chems
setwd(current_directory)
#############################################################################################################
############################################# Make Long Datasets ############################################
#############################################################################################################
chems <- subset_chemicals$chemical_codename_use
#make the long dataset - grouped by chemicals, not log transformed
long_chemicals <- gather(data = nhanes_subset, #this is the wide dataset subsetted to full demog and chems
key = chemical_codename_use, #this is the name of the new column to describe the chemicals
value = measurement, #these are the chemical measurements
all_of(chems), #these are the columns to adjust
factor_key=TRUE #keeps the columns in order
) %>%
drop_na(measurement) #have to specify measurement because R can't look through everything
print("long chemicals dataset")
print(str(long_chemicals)) #full dataset: 1886172 obs. of 21 variables
#############################################################################################################
######################################## Get Statistics on Chemicals ########################################
#############################################################################################################
#calculate stats on chemicals
stats <- long_chemicals %>%
group_by(chemical_codename_use) %>%
summarise(
n = n(), #total number of measurements
min = round(min(measurement), digits = 2),
quantile_25 = round(quantile(measurement, probs = 0.25), digits = 2),
quantile_75 = round(quantile(measurement, probs = 0.75), digits = 2),
max = round(max(measurement), digits = 2),
mean = round(mean(measurement), digits = 2),
stdev = round(sd(measurement), digits = 2),
median = round(median(measurement), digits = 2)) %>%
arrange(n) %>%
ungroup()
# print(str(stats))
#add the chemical name to stats to make it understandable
stats_chems <- left_join(subset_chemicals, stats, by = "chemical_codename_use")
#reorder the table and drop some columns
stats_chems <- stats_chems %>%
dplyr::select(chemical_name,
chemical_codename_use,
n,
min,
quantile_25,
median,
quantile_75,
max,
mean,
stdev,
above_percentage_unweighted,
above_percentage_weighted) %>%
mutate(above_percentage_unweighted = round(above_percentage_unweighted, digits = 3),
above_percentage_weighted = round(above_percentage_weighted, digits = 3))
print("stats_chems shortened")
print(dim(stats_chems))
#save the table as a csv
setwd(paste0(current_directory, "/Tables - Table 1"))
write.csv(stats_chems, file = "subset_chemical_basic_statistics_new.csv", row.names = FALSE)
setwd(current_directory)
print("stats on chemicals saved as csv")
#############################################################################################################
}