-
Notifications
You must be signed in to change notification settings - Fork 1
/
LocalDeviation.R
107 lines (97 loc) · 4.26 KB
/
LocalDeviation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#This function is used to find the deviation of each heat flow point from its local region of points.
#The regional mean and the regional median are both calculated for each point based on the
#neighborhood criteria. The criteria are the radius size and the maximum number of points.
#The minimum number of points is 3 for the median to be more robust than the mean.
#Fixme: Add in a minimum depth of points for neighborhood.
QsDev = function(Data, # The unprojected dataset
Var, # The variable to be tested for the local region
xName, # Name of the x coordinate in Data in UTM coordinates
yName, # Name of the y coordinate in Data in UTM coordinates
rad, # Radius to search for points (m)
max_pts, # maximum number of points
minDpth=NA, # minimum depth of the neighborhood points
DpthName=NA # name of the field for the well depth
){
#Serial version. Parallel is below.
# # Add a column for the local mean and median
# Data$RegMean = NA
# Data$RegMed = NA
# # Add a column for the number of points in the neighborhood
# Data$RegPts = 0
# # Add a column for the distance to the 3rd point.
# Data$Dist3 = NA
#
# # Find the distance from the point to all other points in the dataset
# for(i in 1:nrow(Data)){
#
# # initializing matrix with one column for index and the other for distance
# dist_vec <- matrix(0,nrow(Data),1)
#
# # calculating weighted Euclidian distance for the points
# dist_vec[,1] <- sqrt((Data[,xName] - Data[i,xName])^2 + (Data[,yName] - Data[i,yName])^2)
# #Remove the point being tested from the dist_vec
# dist_vec = dist_vec[-i,1]
#
# # finding the distance corresponding to the maximum number of points
# dist_cutoff <- sort(dist_vec)[max_pts]
# # Find distance to the third point
# Data$Dist3[i] = sort(dist_vec)[3]
#
# # finding the neighbors within rad. Smaller indices are closer to the point
# if (dist_cutoff <= rad){
# Neighbors <- which(dist_vec <= dist_cutoff)
# }else{
# Neighbors <- which(dist_vec <= rad)
# }
#
# if (length(Neighbors) <= 2){
# #There are too few neighbors within rad
# Data$RegPts[i] = length(Neighbors)
# }else{
# # Adding the number of neighbors to the dataframe
# Data$RegPts[i] = length(Neighbors)
#
# #Calculate the mean and median of these points, and add it to the database
# #Neighbors is in reference to the dataset without i included.
# Data$RegMean[i] = mean(Data[-i,][Neighbors, Var])
# Data$RegMed[i] = median(Data[-i,][Neighbors, Var])
# }
# }
# return(Data)
#Parallel
# Find the distance from the point to all other points in the dataset.
# Compute local median and average deviation.
temp = foreach(i = 1:nrow(Data), .combine = 'rbind') %dopar% {
# initializing matrix with one column for index and the other for distance
dist_vec <- matrix(0,nrow(Data),1)
# calculating weighted Euclidian distance for the points
dist_vec[,1] <- sqrt((Data[,xName] - Data[i,xName])^2 + (Data[,yName] - Data[i,yName])^2)
#Remove the point being tested from the dist_vec
dist_vec = dist_vec[-i,1]
# finding the distance corresponding to the maximum number of points
dist_cutoff <- sort(dist_vec)[max_pts]
# Find distance to the third point
Dist3 = sort(dist_vec)[3]
# finding the neighbors within rad. Smaller indices are closer to the point
if (dist_cutoff <= rad){
Neighbors <- which(dist_vec <= dist_cutoff)
}else{
Neighbors <- which(dist_vec <= rad)
}
if (length(Neighbors) <= 2){
#There are too few neighbors within rad
RegPts = length(Neighbors)
RegMean = NA
RegMed = NA
}else{
# Adding the number of neighbors to the dataframe
RegPts = length(Neighbors)
#Calculate the mean and median of these points, and add it to the database
#Neighbors is in reference to the dataset without i included.
RegMean = mean(Data[-i,][Neighbors, Var])
RegMed = median(Data[-i,][Neighbors, Var])
}
data.frame(RegMean = RegMean, RegMed = RegMed, RegPts = RegPts, Dist3 = Dist3)
}
return(cbind(Data, temp))
}