-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRB_fcn_part3.py
301 lines (236 loc) · 13.5 KB
/
RB_fcn_part3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
import numpy as np
import pandas as pd
import geopandas
from geopandas import GeoDataFrame
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from shapely.geometry import Polygon
import tqdm
from functools import reduce
import multiprocess as mp
import matplotlib.gridspec as gridspec
#%% Select_class_ReefCloud_pts
def Select_class_ReefCloud_pts(RB_centroid_csv, RC_csv, label_file, polygon_file, label_poygon_seg, label_poygon_csv):
#access and read files
RB_centroid_csv = pd.read_csv(RB_centroid_csv)
#sam_centroi = RB_centroid_csv[['segment_un', 'center_point_x', 'center_point_y']]
df = pd.read_csv(RC_csv)
label = pd.read_csv(label_file)
label['label_set'] = label['label_set'].replace({'AC _Bran_OTH': 'AC_Bran_OTH'})
#drop water and unid classes
df_UNid = df.apply(lambda row: row[df['pred_desc'].isin(['Un_ID','WA'])]).index
df.drop(df_UNid, inplace = True)
df['pred_desc']= df['pred_desc'].replace({'G_STY ': 'G_STY'})
df_all_sub_04 = df[df['pred_score']>=0.4] #filter 0.40 pred_score
# append Moprhology classes
df_classNum = df_all_sub_04.merge(label[['label_set', 'Morphology','ID_number','Morpho_number']], how='left', left_on='pred_desc', right_on='label_set')
#for Hexagrid drop morpho soft_coral, branching_non_ac, Acropora (except ACS and ACX)
Hexagrid_filter = df_classNum.loc[((df_classNum.SAM_centroid.str.contains('Hexa')) &
((df_classNum['Morphology'].isin(['Soft_Coral','Branching_non_acropora']))|
(df_classNum['pred_desc'].isin(['ACO_MIL','ACO_TEN', 'ACO_OTH', 'ACD', 'AC_Bran_OTH', 'ACT']))))]
df_classNum.drop(Hexagrid_filter.index, inplace = True) #, 'Foliose', 'Branching_non_acropora',
# pre_filter morphology
pred = 'Morphology'
df = df_classNum
#create series with unique values per mask
most_occuring_morpho = df.groupby(['SAM_centroid'])[pred].agg(pd.Series.mode)
most_occuring_morpho_df = most_occuring_morpho.to_frame().reset_index()
df_drop_morpho = pd.merge(df, most_occuring_morpho_df,
how='inner',
on='SAM_centroid')
df_t = df_drop_morpho.loc[df_drop_morpho.apply(lambda row: row.Morphology_x in row.Morphology_y, axis=1)]
df_t = df_t.drop(columns = 'Morphology_y')
df_t = df_t.rename(columns ={'Morphology_x':'Morphology'})
# get label value with weighted formula
pred = 'pred_desc' #'Morphology'
df = df_t
#create series with unique values per mask
unique = df.groupby(['SAM_centroid']).size()
most_occuring_class = df.groupby(['SAM_centroid'])[pred].agg(pd.Series.mode)
count = df.groupby(['SAM_centroid']).size()
max_pred_row = df.loc[df.groupby(["SAM_centroid"])["pred_score"].idxmax()]
#get most occuring class and mean of prediction score
occur = df.groupby(['SAM_centroid', pred]).size()
mean = df.groupby(['SAM_centroid', pred])['pred_score'].mean()
max_pred = df.groupby(['SAM_centroid', pred])['pred_score'].max()
fctn = (occur * mean)
#create dataframe with most occuring classes
df_result = pd.DataFrame( columns= ['n_occur' , 'pred_score_mean', 'Weighted_score', 'max_pred'])
df_result['n_occur'] = occur
df_result['pred_score_mean'] = mean
df_result['Weighted_score'] = fctn
df_result["max_pred"] = max_pred
Weighted_class_result = df_result[df_result['Weighted_score'] == df_result.groupby(level=0)['Weighted_score'].transform(max)]
Weighted_class_result=Weighted_class_result.reset_index(level=[pred])
#merge unique value and weighted classes
df_merged =Weighted_class_result.merge(max_pred_row, on='SAM_centroid') # left_index=True, right_index=True)
df_merged.reset_index(inplace=True)
# Export file
RB_SHP = geopandas.read_file(polygon_file)
df_merged = df_merged.merge(RB_SHP, left_on='SAM_centroid', right_on = "segment_un")
gdf = GeoDataFrame(df_merged, geometry='geometry')
gdf.to_file(label_poygon_seg)
gdf.to_csv(label_poygon_csv)
return gdf
def format_percent_cover(label_polygon_seg, label_file):
label = pd.read_csv(label_file)
label['label_set'] = label['label_set'].replace({'AC _Bran_OTH': 'AC_Bran_OTH'})
RB_shp_df = geopandas.read_file(label_poygon_seg)
RB_shp_df['pred_desc_']= RB_shp_df.pred_desc_.replace({'G_STY ': 'G_STY'})
RB_shp_df_classes = RB_shp_df.groupby(['pred_desc_'])['area'].sum()
RB_shp_df_classes_result = pd.DataFrame(columns=['SAM_10X4_Total_area', 'SAM_10X4_Percent_cover'])
RB_shp_df_classes_result['SAM_10X4_Total_area']= RB_shp_df_classes
RB_shp_df_classes_result['SAM_10X4_Percent_cover']= RB_shp_df_classes/(RB_shp_df_classes.sum())*100
RB_shp_df_classes_result_group = RB_shp_df_classes_result.copy()
RB_shp_df_classes_result_group["class"] = RB_shp_df_classes_result_group.index
RB_shp_df_classes_result_group = RB_shp_df_classes_result_group.reset_index()
RB_shp_df_classes_result_group =RB_shp_df_classes_result_group.drop(['pred_desc_'], axis= 1)
RB_shp_df_classes_result_group = RB_shp_df_classes_result_group.rename(columns={"SAM_10X4_Total_area": "Total_area", "SAM_10X4_Percent_cover": "Percent_cover"})
label = label.dropna()
RB_shp_df_classes_result_group_morpho = label.merge(RB_shp_df_classes_result_group, left_on='label_set', right_on='class', how='outer')
RB_shp_df_classes_result_group_morpho = RB_shp_df_classes_result_group_morpho.dropna(subset = ['class'])
RB_shp_df_classes_result_group_morpho['treatment']='RB'
#create color list dictionary
hex=[]
for i in RB_shp_df_classes_result_group_morpho['RGB']:
chunks = i.split(',')
hex_1 =rgb_to_hex(int(chunks[0]), int(chunks[1]), int(chunks[2]))
hex_1 = "#"+hex_1
hex.append(hex_1)
RB_shp_df_classes_result_group_morpho['hex']=hex
color = zip(RB_shp_df_classes_result_group_morpho['label_set'].unique(), RB_shp_df_classes_result_group_morpho['hex'].unique())
color_list = list(color)
color_list = dict(color_list)
label = label.drop(label[label.Morphology.isin([0])].index)
label = label.dropna()
hex=[]
for i in label['RGB']:
chunks = i.split(',')
hex_1 =rgb_to_hex(int(chunks[0]), int(chunks[1]), int(chunks[2]))
hex_1 = "#"+hex_1
hex.append(hex_1)
label['hex']=hex
#plot multiple dataset per graph
enmax_palette = label['hex']
color_codes_wanted = label['label_set']
cdict = dict(zip(color_codes_wanted, [mcolors.to_rgba(c) for c in enmax_palette]))
palette_RB = mcolors.get_named_colors_mapping().update(cdict)
return RB_shp_df_classes_result_group_morpho, palette_RB
def stack_catplot(x, y, cat, stack, data, palette, out_fig, order):
f = plt.figure( figsize=(6,4), dpi=600)
gs = gridspec.GridSpec(2,1, height_ratios=[1, 3])
ax = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
# pivot the data based on categories and stacks
df = data.pivot_table(values=y, index=[cat, x], columns=stack,
dropna=False, aggfunc='sum').fillna(0)
df['Morph'] = df.index.get_level_values('Morphology')
df['Morph'] = pd.Categorical(df['Morph'], order)
df = df.sort_values('Morph')
morph_class = df.Morph
df=df.drop(columns = 'Morph')
total_list = df.sum(axis=1).round(2)
total_list = list(map(str, total_list))
total_list = list(map(lambda x: '<0.01' if x == '0.0' else x, total_list))
#total_list = list(map(lambda x: x.replace('0.0', '<0.01'), total_list))
ncat = data[cat].nunique()
nx = data[x].nunique()
nstack = data[stack].nunique()
range_x = np.arange(nx)
width = 0.8 / ncat # width of each bar
for i, c in enumerate(data[cat].unique()):
# iterate over categories, i.e., Conditions
# calculate the location of each bar
loc_x = (0.5 + i - ncat / 2) * width + range_x
bottom = 0
for j, s in enumerate(data[stack].unique()):
#print(j, s)
# iterate over stacks, i.e., Hosts
# obtain the height of each stack of a bar
height = df.loc[c][s].values
# plot the bar, you can customize the color yourself
ax.bar(x=loc_x, height=height, bottom=bottom, width=width, zorder=10, color=s)
ax2.bar(x=loc_x, height=height, bottom=bottom, width=width, zorder=10, color=s)
# change the bottom attribute to achieve a stacked barplot
bottom += height
ax.set_ylim(20, 100) # outliers only
ax2.set_ylim(0, 20) # most of the data
ax.spines['bottom'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax.xaxis.set_visible(False)#ax.xaxis.tick_top()
#ax.tick_params(labeltop='off') # don't put tick labels at the top
#ax2.xaxis.tick_bottom()
d = .015
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((-d, +d), (-d, +d), **kwargs) # top-left diagonal
ax.plot((1 - d, 1 + d), (-d, +d), **kwargs) # top-right diagonal
kwargs.update(transform=ax2.transAxes) # switch to the bottom axes
ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs) # bottom-left diagonal
ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs) # bottom-right diagonal
# make xlabel
ax2.set_xticks(range_x)
ax2.xaxis.tick_bottom()
ax2.set_xticklabels(sorted(data[x].unique()), rotation=90)
#ax.set_ylabel(y) # add y axis label
ax2.set_xticklabels((morph_class), rotation=90)
ax2.set_ylabel(' Percent cover')
#labels = ct.tot.tolist()
ax.bar_label(ax.containers[-1], labels=total_list, padding=3, fontsize=7 )
ax2.bar_label(ax.containers[-1], labels=total_list, padding=3,fontsize=7 )
f.savefig(out_fig, bbox_inches='tight')
return f
def rgb_to_hex(r, g, b):
return ('{:02X}' * 3).format(r, g, b)
def ColonyLevel_Segments(label_poygon_seg, ColonyLevelSegments_shp):
#apply buffers based on morphology to merge
gdf = geopandas.read_file(label_poygon_seg)
gdf_NH = gdf.loc[~((gdf.SAM_centro.str.contains('Hexa')) & (~gdf['pred_desc_'].isin(['ACS', 'ACX', 'CB_OTH_C']))) #'G_POR'
| ((gdf['Morphology'].isin(['Acropora']) & (gdf['area'] <= 0.0006)))] #filterout Hexagrid
gdf_buff = gdf_NH.loc[(gdf_NH['area'] <= 0.0025) | (~gdf_NH['Morphology'].isin(['Acropora', 'Branching_non_acropora', 'Soft_Coral']))]
gdf_buff['geometry'] = gdf_buff.geometry.buffer(0.0004)
gdf_Non_buff = gdf_NH[~gdf_NH.index.isin(gdf_buff.index)]
gdf_buff_non_buff = geopandas.GeoDataFrame(pd.concat([gdf_buff, gdf_Non_buff]))
gdf_buff_non_buff['SAM_centro_ID']= gdf_buff_non_buff.SAM_centro
#merge based on class
unique_ID = gdf_buff_non_buff['pred_desc_'].unique()
colony_gdf = []
for i in unique_ID:
class_shp = gdf_buff_non_buff[gdf_buff_non_buff['pred_desc_'] ==i]
class_shp_uu = class_shp.unary_union
cuu = geopandas.GeoSeries(class_shp_uu)
df2 = cuu.explode(ignore_index=True)
df3= GeoDataFrame(geopandas.GeoSeries(df2))
df3['Colony_ID']= i
colony_gdf.append(df3)
colony_gdf_c = GeoDataFrame(pd.concat(colony_gdf, ignore_index=True) )
colony_gdf_c.rename(columns={ colony_gdf_c.columns[0]: "geometry" }, inplace = True)
colony_gdf_c = colony_gdf_c.set_geometry("geometry")
colony_gdf_c['area']=colony_gdf_c.area
#fill holes
colony_gdf_c['rings'] = colony_gdf_c['geometry'].apply(lambda geom: [i for i in geom.interiors])
colony_gdf_c['to_fill'] = colony_gdf_c['rings'].apply(lambda rings: [Polygon(ring) for ring in rings if Polygon(ring).area < 0.001])
colony_gdf_c_toFill= colony_gdf_c[(colony_gdf_c['to_fill'].apply(len) != 0)]
def process_row(chunk): # Function to process each row
from functools import reduce
return chunk.apply(lambda row: reduce(lambda geom1, geom2: geom1.union(geom2), [row['geometry']] + row['to_fill']), axis=1)
num_processes = mp.cpu_count() # Number of parallel processes
chunk_size = len(colony_gdf_c_toFill) // num_processes
chunks = [colony_gdf_c_toFill[i:i+chunk_size] for i in range(0, len(colony_gdf_c_toFill), chunk_size)] # Split the DataFrame into chunks
pool = mp.Pool(processes=num_processes) # Create a multiprocessing Pool
results = pool.map(process_row, chunks) # Apply the function to each chunk in parallel
# Close the pool to release resources
pool.close()
pool.join()
# Concatenate the results
final_result = pd.concat(results)
colony_gdf_c_toFill['newgeom']= final_result
colony_gdf_c_toFill['new_geometry'] = colony_gdf_c_toFill.apply(lambda row: reduce(lambda geom1, geom2: geom1.union(geom2), [row['geometry']] + row['to_fill']), axis=1)
colony_gdf_c_NotToFill= colony_gdf_c[(colony_gdf_c['to_fill'].apply(len) == 0)]
colony_gdf_c_NotToFill['newgeom']=colony_gdf_c_NotToFill['geometry']
colony_gdf_c_merged = pd.concat([colony_gdf_c_NotToFill,colony_gdf_c_toFill], sort = True)
colony_gdf_c_merged.drop(['rings', 'to_fill', 'geometry'], axis=1, inplace=True)
colony_gdf_c_merged=colony_gdf_c_merged.rename(columns={'newgeom':'geometry'})
colony_gdf_c_merged['area']=colony_gdf_c_merged.area
colony_gdf_c_merged.drop(['new_geometry'], axis=1, inplace=True)
colony_gdf_c_merged.to_file(ColonyLevelSegments_shp)
return colony_gdf_c_merged