-
Notifications
You must be signed in to change notification settings - Fork 0
/
read_data.R
92 lines (87 loc) · 4.05 KB
/
read_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
check_read_dataset <- function(ifname, ofname, SXY.upper_index = c(5, 10, 18), is.TP = FALSE, is.P = FALSE){
# READING IN SXY: study design (S) and/or covariates (X) and species data (Y)
SXY = read.csv(ifname, stringsAsFactors=TRUE)
# S: study design, including units of study and their possible coordinates
# X: covariates to be used as predictors
# Y: species data
S=SXY[,1:SXY.upper_index[1]]
X=SXY[,(SXY.upper_index[1]+1):SXY.upper_index[2]]
Y=SXY[,(SXY.upper_index[2]+1):SXY.upper_index[3]]
# Check that the data looks as it should!
View(S)
View(X)
View(Y)
# check that community data are numeric and have finite numbers. If the script
# writes "Y looks OK", you are ok.
if (is.numeric(as.matrix(Y)) || is.logical(as.matrix(Y)) && is.finite(sum(Y, na.rm=TRUE))) {
print("Y looks OK")
} else {
print("Y should be numeric and have finite values") }
# Check that the stydy design data do not have missing values (they are allowed for Y but not S, X, P or Tr)
if (any(is.na(S))) {
print("S has NA values - not allowed for")
} else {
print("S looks ok") }
# Check that the covariate data do not have missing values (they are allowed for Y but not S, X, P or Tr)
if (any(is.na(X))) {
print("X has NA values - not allowed for")
} else {
print("X looks ok") }
# READING IN TP: traits (T) and/or phylogenetic information in table format (P)
if(is.TP){
# Read in the species names as rownames, not as a column of the matrix
TP = read.csv("TP.csv", stringsAsFactors=TRUE,row.names = 1)
# The script below checks if the species names in TP are identical and in the same order as in Y
# If the script prints "species names in TP and SXY match", you are ok.
# If it says that they do not match, you need to modify the files so that they match
if(all(rownames(TP)==colnames(Y))) {
print("species names in TP and SXY match")
} else{
print("species names in TP and SXY do not match")
}
# Modify the next two lines to split your TP file to components that relate to
# Tr: species traits (note that T is a reserved word in R and that's why we use Tr)
# P: phylogenetic information given by taxonomical levels, e.g. order, family, genus, species
# If you don't have trait data, indicate this by Tr=NULL.
# If TP does not have phylogenetic data (because you don't have such data at all, or because
# it is given in tree-format, like is the case in this example), indicate this with P=NULL
Tr = TP[,1:2]
P = NULL
# Check that the data looks as it should!
View(Tr)
View(P)
# Check that the Tr data do not have missing values (they are allowed for Y but not S, X, P or Tr)
if (any(is.na(Tr))) {
print("Tr has NA values - not allowed for")
} else {
print("Tr looks ok") }
# Check that the phylogenetic/taxonomic data do not have missing values (they are allowed for Y but not S, X, P or Tr)
if (any(is.na(P))) {
print("P has NA values - not allowed for")
} else {
print("P looks ok") }
}
# READING IN P: phylogenetic information in tree format (P)
# we use ape package for trees, and P.tre must be in a format that ape understands
if(is.P){
# Read in the phylogenetic tree using read.tree from ape
library(ape)
P = read.tree("P.tre")
# When you look at P (e.g. write P and press enter),
# you should see that it is a phylogenetic tree which
# is rooted and includes branch lengths and tip labels
# The script below checks if the species names in P are identical (but not necessarily in the same order) as in Y
# If the script prints "species names in P and SXY match", you are ok.
# If it says that they do not match, you need to modify the files so that they match
if(all(sort(P$tip.label) == sort(colnames(Y)))){
print("species names in P and SXY match")
} else{
print("species names in P and SXY do not match")
}
# Check that the data looks as it should!
plot(P, cex=0.5)
}
Y = as.matrix(Y)
save(S,X,Y,file=ofname)
print(paste("Dataset saved in", ofname))
}