-
Notifications
You must be signed in to change notification settings - Fork 1
/
Death_to_Kappa.py
175 lines (150 loc) · 6.51 KB
/
Death_to_Kappa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
def ctab(a, b, aggregate = False):
"""
Generate a Confusion-Matrix based on classified and reference image
Args:
a: n-D NumpyArray derived from classified image raster
b: n-D NumpyArray derived from reference image raster
Returns:
result (n-D NumpyArray): Confusion Matrix
"""
import numpy as np
import pandas as pd
#print(np.version.version)
a = a.flatten().astype(int)
b = b.flatten().astype(int)
if aggregate:
b[(b > 220) & (b < 230)] = 220
b[(b == 321)] = 333
b[(b == 324)] = 310
a[(a > 220) & (a < 230)] = 220
a[(a == 321)] = 333
a[(a == 324)] = 310
#a = [np.nan if x == 0 else x for x in a]
#b = [np.nan if x == 0 else x for x in b]
name = np.union1d(a, b)
colnames = name
rownames = name
result = np.zeros((len(colnames), len(rownames)))
for c in range(0, len(rownames)):
for d in range(0, len(rownames)):
# get all indices of classified image labelled as specified class
c_classified=np.where(a==name[c])
# get all indices of reference image labelled as specified class
c_reference=np.where(b==name[d])
# find all indices that are present in both classified and reference image
c_agreement=np.in1d(c_classified,c_reference, assume_unique=True)
# count number of positive matches for all indices present in both images
result[c,d]=np.count_nonzero(c_agreement)
#print(result)
#print(len(result))
# implement mask
#mask = None
#for i in range(0, len(mask)):
# result = result[result[index] != mask[i],:]
# if len(result.as_matrix)==1 or len(result.as_matrix[0])==1:
# raise SystemExit('Calculation of contingency table requires at least two categories')
# result = result[:,result.index != mask[i]]
#
#if len(result.as_matrix)==1 or len(result.as_matrix[0])==1:
# raise SystemExit('Calculation of contingency table requires at least two categories')
return result
def kstat(classified, reference, perCategory = False, aggregate = False, mask_array = []):
"""
Calculate Statistics based on Confusion Matrix.
Indizes for Accuracy and Validation of Image Classifications taken from:
Pontius, R.G., Jr., and Millones, M. (2011): Death to Kappa: Birth of Quantity Disagreement and Allocation Disagreement for Accuracy Assessment. International Journal of Remote Sensing, 32: 4407-4429.
Args:
classified: path to your GTIFF containing the classified image
reference: path to your GTIFF containing the reference image or rasterized Training Data
Returns:
resultDF (pandas DataFrame): DataFrame with Overall Accuracy, Kappa Index, Kappa of location, Kappa of histogram, chance agreement, quantity agreement, allocation agreement, allocation disagreement, quantity disagreement
"""
import numpy as np
import pandas as pd
from osgeo import gdal
##################################################
#Function: Calculate kappa indices and disagreements
def calculateKappa(ct):
ct = ct/np.sum(ct)#percent of pixels
cmax = len(ct)#number of categories
#Fraction of Agreement:
PA = 0
for i in range(0, cmax):
PA += ct[i][i]
#Expected Fraction of Agreement subject to the observed distribution:
PE = sum(np.sum(ct, axis = 1)*np.sum(ct, axis = 0))
#Maximum Fraction of Agreement subject to the observed distribution:
PMax = 0
a1 = np.sum(ct, axis = 1)
a2 = np.sum(ct, axis = 0)
for i in range(0, cmax):
PMax += min(a1[i], a2[i])
#Kappa Index:
K = (PA-PE)/(1-PE)
#Kappa of location:
Kloc = (PA-PE)/(PMax-PE)
#Kappa of histogram:
Khisto = (PMax-PE)/(1-PE)
#chance agreement:
CA = 100*min((1/cmax),PA,PE)
#quantity agreement:
if min((1/cmax),PE,PA)==(1/cmax):
QA = 100*min((PE-1/cmax),PA-1/cmax)
else:
QA = 0
#allocation agreement:
AA = 100*max(PA-PE,0)
#allocation disagreement:
AD = 100*(PMax-PA)
#quantity disagreement:
QD = 100*(1-PMax)
KappaResult = (PA*100,K,Kloc,Khisto,CA,QA,AA,AD,QD)
return KappaResult
#########################################################################
#read tif as array and check dimensions/extent
ds_cl = gdal.Open(classified)
ds_ref = gdal.Open(reference)
if ds_cl.RasterXSize != ds_ref.RasterXSize or ds_cl.RasterYSize != ds_ref.RasterYSize:
raise ValueError('Warning: Classified Image and Reference Image do not have the same Extent!')
a = ds_cl.GetRasterBand(1).ReadAsArray()
b = ds_ref.GetRasterBand(1).ReadAsArray()
if mask_array != []:
a = np.multiply(a, mask_array)
b = np.multiply(b, mask_array)
if 0 in np.unique(b):
b_nonzero = np.nonzero(b)
b = b[b_nonzero]
a = a[b_nonzero]
###########################################################################
# generate confusion matrix
ct = ctab(a,b,aggregate)
ct2 = ctab(a,b,aggregate)
#reclass to calculate kappa per category
result = []
cttmp = ct
if len(cttmp) <= 2 or perCategory == False:
result.append(calculateKappa(ct))
if perCategory == True:
print('Warning: Kappa per category requires at least 3 categories')
if perCategory == True and len(cttmp[0]) > 2:
for ca in range(0, len(cttmp[0])+1):
if ca == len(cttmp[0]):
ct = ct2
else:
ct = cttmp
ctNew = ct[0:2,0:2]
ctNew[0,0] = ct[ca, ca]
ctNew[0,1] = sum(ct[ca])-ct[ca,ca]
ctNew[1,0] = sum(ct[0:-1,ca])-ct[ca,ca]
ctNew[1,1] = sum(sum(ct))-sum(ct[ca])-sum(ct[0:-1, ca])+ctNew[0,0]
ct = ctNew
result.append(calculateKappa(ct))
################################################################
#arrange results
if len(result) > 1:
name = list(np.union1d(a, b))
name.extend(['Overall'])
resultDF = pd.DataFrame(result, columns = ['PA','K','Kloc','Khisto','CA','QA','AA','AD','QD'], index = name)
else:
resultDF = pd.DataFrame(result, columns = ['PA','K','Kloc','Khisto','CA','QA','AA','AD','QD'], index = ['Overall'])
return resultDF