-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflux_plotter.py
328 lines (274 loc) · 11.5 KB
/
flux_plotter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
# Flux Plotter - the central script for the data analysis
# Generates and imposes cuts on data, makes plots, and runs MCMC analysis
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.optimize import curve_fit
from scipy.optimize import minimize
import emcee
from scipy import stats
import corner
#arcsec/pix conversion - make sure to change this throughout the code
bit_a1689_scale = .140843
bit_cosmos_scale = .140732
# initializing values and methods
data = np.loadtxt('a1689/catalogs/band1_x_hscg_catalog.csv', delimiter = ',', skiprows=1) # change the path for different datasets
psf = np.loadtxt("a1689/flux_plots/psf_cut.txt") # this pulls from the PSF-like form's file
flux_med = np.median(data[:,12])
flux_mean = np.mean(data[:,12])
# curve fitting to generate the PSF-generated curve to eliminate increasing brightness effects (see report)
def psf_form(x, a,b):
return a*np.power(x,b)
def psf_func(x, a, b, c):
return a*psf_form(b*x+c, popt[0], popt[1])
def model(x,a,b,c,d,e):
return a*(b**(c*(x+d)))+ e
# collecting objects that are likely to be influenced by this phenomenon
pairs = [] # uses peak flux
for obj in data:
if (obj[12]>flux_med and bit_a1689_scale*obj[24]<.4):
pairs.append([bit_a1689_scale*obj[24],obj[12]])
pairs = np.array(pairs)
# different model fits to try to approximate this curve on this dataset
popt,pcov = curve_fit(psf_form, psf[:,0],psf[:,1], maxfev=100000) # runs from the imported data
pop, pcov = curve_fit(psf_func, pairs[:,0], pairs[:,1], p0 = [1, 1, 1], bounds = ([0,.0001, .001], [100, 100, 100]), maxfev=100000) # distorts the imported curve to fit data
mod, mvar = curve_fit(model, pairs[:,0], pairs[:,1], p0 = [1, 5, 1, 1,0], bounds = ([0,.0001, .001, -100,-10], [100, 100, 100, 100,100]), maxfev=1000000) # doesn't use imported data, simply tries to fit
# plots of data with cuts drawn
plt.scatter(bit_cosmos_scale*data[:,24], data[:,12], color = 'blue', label = 'Non-curve-fitting data')
plt.scatter(pairs[:,0], pairs[:,1], color = 'black', label = 'Curve-fitting data')
plt.scatter(bit_cosmos_scale*data[:,24], model(bit_cosmos_scale*data[:,24], mod[0], mod[1], mod[2], mod[3], mod[4]), color = 'orange', label = 'Exp fit cut')
plt.scatter(bit_cosmos_scale*data[:,24], psf_func(bit_cosmos_scale*data[:,24], pop[0], pop[1], pop[2]), color = 'red', label = "Jones fit cut")
plt.axhline(y=flux_med, color = 'green', linestyle = '--', label = 'Median: ' + str(flux_med))
plt.axhline(y=flux_mean, color = 'purple', linestyle = '--', label = 'Mean: ' + str(flux_mean))
plt.xlabel('Angular Size (arcseconds)')
plt.ylabel('Peak Flux')
plt.ylim(0,10)
plt.title('SuperBIT: Size vs. Peak Flux with cuts')
plt.legend()
#plt.savefig('cosmosk/flux_plots/kron/cuts/cuts_band1_hscg.png')
plt.show()
plt.clf()
# subjects data to cuts
bit_flux= []
hsc_flux = []
coords = []
ang = []
bit_back = []
errs = []
manual_min_bit = 0.005 # 0.005 for trying to eliminate very dim objects
manual_max_bit = .07 # .07 for trying to get same regime for cosmos and a1689, otherwise can be higher
manual_max_hsc = 15000 # unnecessary, but helpful to have parameter in case
# data must pass these cuts and be below the fitted curve from above
for obj in data:
if obj[4] > manual_min_bit and obj[4] < manual_max_bit and obj[5]< manual_max_hsc and obj[12] < model(bit_a1689_scale*obj[24], mod[0], mod[1], mod[2], mod[3],mod[4]) and obj[12]<flux_mean and obj[4]/obj[8]>10: #if no saturation and falls under constructed psf curve (and obj[8] < flux_mean)
bit_flux.append(obj[4])
hsc_flux.append(obj[5])
coords.append([obj[20],obj[22]])
ang.append(bit_a1689_scale*obj[24])
errs.append([obj[6],obj[7]])
bit_back.append(obj[8])
bit_flux = np.array(bit_flux)
hsc_flux = np.array(hsc_flux)
coords = np.array(coords)
ang = np.array(ang)
bit_back = np.array(bit_back)
errs = np.array(errs)
#outlier exclusion - binary vals for every data point, then removes 0's
binary = np.ones(coords[:,0].size)
outlier_list = np.loadtxt('a1689/flux_plots/outlier_files/c1g_outliers.csv', delimiter=',') # created from inspecting with DS9
for i in range(coords[:,0].size):
alpha = coords[i,0]
delta = coords[i,1]
for j in range(outlier_list[:,0].size):
if alpha == outlier_list[j,0] and delta == outlier_list[j,1]:
binary[i]=0
b_=[]
h_=[]
a_=[]
bb_=[]
er_=[]
co_=[]
# slicing
for i in range(binary.size):
if binary[i] == 1:
b_.append(bit_flux[i])
h_.append(hsc_flux[i])
a_.append(ang[i])
co_.append(coords[i])
bb_.append(bit_back[i])
er_.append(errs[i])
bit_flux = np.array(b_)
hsc_flux = np.array(h_)
ang = np.array(a_)
bit_back = np.array(bb_)
errs = np.array(er_)
coords = np.array(co_)
ratios = (hsc_flux/bit_flux)
# makes region with all data that will be considered for analysis
with open('a1689/region_overlays/MCMC_data/2r_MCMC.reg', 'w') as f:
f.write('icrs\n')
for i in range(bit_flux.size):
f.write('point(' + str(coords[i,0]) +',' +str(coords[i,1]) + ')\n')
# S/N plots
plt.hist(bit_flux/bit_back, density = True, bins = 20)
plt.xlabel('S/N (Kron Flux above Background/Background)')
plt.ylabel('Frequency')
#plt.xlim(0,10000)
plt.title('SuperBIT Signal to Noise on Selection')
plt.figtext(1.0,0.2, pd.DataFrame(bit_flux/bit_back).describe().to_string())
#plt.savefig('cosmosk/flux_plots/kron/sig_and_ang/sig_noise_band2_hscr.png', bbox_inches='tight')
plt.show()
plt.clf()
# ang plots
plt.hist(ang, density = True, bins = 20)
plt.xlabel('Angular Size - BIT Semi-Major Axis (arcseconds)')
plt.ylabel('Frequency')
plt.title('Angular Size of BIT Objects')
plt.figtext(1.0,0.2, pd.DataFrame(ang).describe().to_string())
#plt.savefig('cosmosk/flux_plots/kron/sig_and_ang/ang_band2_hscr.png', bbox_inches = 'tight')
plt.show()
plt.clf()
# plot of processed data
plt.scatter(ang, bit_flux)
plt.xlabel('Angular Size (arcseconds)')
plt.ylabel('Kron Flux')
plt.title('SuperBIT Kron Flux v. Angular Size (cut)')
#plt.savefig('cosmosk/flux_plots/kron/flux_v_size/flux_v_size_band2_hscr.png')
plt.show()
plt.clf()
'''
# AB mag conversion for HSC, denominator is MAGZERO which may change, 27.516 is mean f-value (see report)
#for i in range(hsc_flux.size):
# hsc_flux[i] = -2.5*np.log10(27.516953*hsc_flux[i]/63095734448.01944) #HSC mag conversion
'''
# percent data lying in interval around median
med = np.median(ratios)
margin = 1950
count = 0
for entry in ratios:
if entry < med + margin and entry > med - margin:
count = count + 1
print("Approximately " + str(count/ratios.size) + '% of the data lies within +- ' + str(margin) ' of the median (' + str(med) + ')')
# region generator for ratio percentile outliers
low = np.percentile(ratios, 10)
high = np.percentile(ratios, 90)
with open('region_overlays/ratio_outliers/band0_x_hscg_outliers.reg', 'w') as f:
f.write('icrs\n')
for obj in data:
if obj[0] < manual_cut and obj[8] < flux_mean and obj[8] < model(bit_cosmos_scale*obj[20], mod[0], mod[1], mod[2], mod[3],mod[4]): #if no saturation and falls under constructed psf curve
if obj[1]/obj[0] > high or obj[1]/obj[0]<low:
f.write('point(' + str(obj[16]) +',' +str(obj[18]) + ')\n')
# ratios histogram
plt.hist(ratios, bins = 15, density = True)
#plt.xscale('log')
plt.title('kron_flux_hscr/kron_flux_bitband2')
plt.xlabel('Kron Flux Ratio (non-magnitude)')
plt.ylabel('Frequency')
plt.figtext(1.0,0.2, pd.DataFrame(ratios).describe().to_string())
#plt.savefig('cosmosk/flux_plots/kron/ratios/ratio_band2_hscr.png', bbox_inches='tight')
plt.show()
plt.clf()
#Markov Chain Monte Carlo Analysis
# log-likelihood for 2-d uncertainty orthogonal regression model
def log_likelihood(params):
m,b = params
theta = np.arctan(m)
total = 0
for i in range(bit_flux.size):
x = bit_flux[i]
y = hsc_flux[i]
x_var = errs[i,0]**2
y_var = errs[i,1]**2
total = total - ((-x*np.sin(theta)+(y-b) *np.cos(theta))**2/(x_var*(np.sin(theta)**2)+y_var*(np.cos(theta)**2)))
return total/2
# priors (make sure to change!)
def log_prior(params):
m,b = params
if m > 6000 and m < 10000 and b > -100 and b < 100:
return 0.0
else:
return -np.inf
def log_probability(params):
lp = log_prior(params)
if not np.isfinite(lp):
return -np.inf
else:
return lp + log_likelihood(params)
# initialising parameters and starting values
ndim = 2
nwalkers = 30
nsteps = 2000
initial_state = 10*np.random.randn(nwalkers,ndim)
# initializing the guesses
m_guess = 8000
for i in range(nwalkers):
initial_state[i,0] = initial_state[i,0] + m_guess
# running MCMC
sampler = emcee.EnsembleSampler(nwalkers,ndim,log_probability, a = .1)
sampler.run_mcmc(initial_state,nsteps,progress=True)
samples = sampler.get_chain()
# time series plots
fig, axes = plt.subplots(2,figsize=(10,7),sharex=True)
labels = ['m','b']
for i in range(ndim):
ax = axes[i]
ax.plot(samples[:,:,i], 'k', alpha=.3)
ax.set_xlim(0,len(samples))
ax.set_ylabel(labels[i])
axes[-1].set_xlabel('Step Number')
#plt.savefig('cosmosk/flux_plots/kron/MCMC/2r_param_steps.png')
plt.show()
# cleaning data, turning to flat data
burn_in = 200
thinning = 30
flat_samples = sampler.get_chain(discard = burn_in, thin = thinning, flat = True)
np.savetxt('cosmosk/flux_plots/kron/MCMC/2rMCMC.csv', flat_samples, delimiter=',')
mean_m, mean_b = np.mean(flat_samples, axis=0) # slope and intercept means
std_m, std_b = np.std(flat_samples, axis=0) # raw slope and intercept variances
# triangle plot
fig = corner.corner(flat_samples, labels = labels, truths=[4233,0])
axes = np.array(fig.axes).reshape((ndim,ndim))
axes[0,1].text(.96, .96,
'm: μ = ' + str(round(mean_m, 2)) + ', σ = ' + str(round(std_m, 2)) + '\nb: μ = ' + str(round(mean_b,2)) + ', σ = ' + str(round(std_b,2)),
transform = axes[0,1].transAxes, horizontalalignment = 'right', verticalalignment = 'top', fontsize=12, bbox=(dict(facecolor='white', alpha=.8))
)
#plt.savefig('cosmosk/flux_plots/kron/MCMC/1g_triang.png')
plt.show()
# z-scores of points wrt line
theta = np.arctan(mean_m)
zs = []
for i in range(bit_flux.size):
d = -np.sin(theta)*bit_flux[i] + np.cos(theta)*hsc_flux[i] - mean_b*np.cos(theta)
cov = (errs[i,0]*np.sin(theta))**2 + (errs[i,1]*np.cos(theta))**2
zs.append(d/np.sqrt(cov))
plt.hist(zs, density = True)
plt.xlabel("Point Z-Score from Best-Fit Line")
plt.ylabel("Frequency")
plt.title("Z-Scores of Data Points from Orthogonal Regression")
#plt.savefig('cosmosk/flux_plots/kron/MCMC/zs_2r.png')
plt.show()
# regression plot
plt.plot(bit_flux,bit_flux*mean_m + mean_b, color = 'blue', label = 'Best fit: hsc = ' + str(round(mean_m, 2)) + "*bit + " + str(round(mean_b,2)))
plt.scatter(bit_flux,hsc_flux)
plt.xlabel('SuperBIT Kron Flux')
plt.ylabel('HSC Kron Flux (non-magnitude)')
plt.title('SuperBIT and HSC Kron Fluxes, MCMC Fit')
plt.legend()
#plt.savefig('cosmosk/flux_plots/kron/compares/compare_band2_hscr.png')
plt.show()
plt.clf()
# residuals plot
plt.scatter(bit_flux, bit_flux*mean_m + mean_b - hsc_flux)
plt.axhline(y = 0, color = 'green', linestyle='--', label = 'Residual = 0')
plt.xlabel('SuperBIT Kron Flux')
plt.ylabel('Residual from BIT->HSC Regression')
plt.title('MCMC Residual Plot of hsc = ' + str(round(mean_m, 2)) + "*bit + " + str(round(mean_b,2)))
plt.legend()
#plt.savefig('cosmosk/flux_plots/kron/compares/residuals_band2_hscr.png')
plt.show()
# high residual region generator
with open('cosmosk/region_overlays/outliers/band1_x_hscg_outliers.reg', 'w') as f:
f.write('icrs\n')
for i in range(bit_flux.size):
if np.abs(zs[i])>4:
f.write('point(' + str(coords[i,0]) +',' +str(coords[i,1]) + ')\n')