-
Notifications
You must be signed in to change notification settings - Fork 0
/
step6_BinReorderReconstruct.py
260 lines (226 loc) · 10.1 KB
/
step6_BinReorderReconstruct.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import os
import glob
import tqdm
import yaml
import numpy as np
import copy
import mrcfile
from scipy.spatial.transform import Rotation
from functions.multitetris_python import place, gaussian3d, rotate3d
############# USER #############
tetris_size = (128, 128, 64)
tetrises_pixel_size = 16
tomogram_pixel_size = 8 # output pixel size, in Angstroms, has to be integer for imod to accept it
sirt_iterations = 0 # set to zero if you want only WBP reconstructions
# durning the generations of segmentations (use the same parameters as tetris)
sigma = 2
gray_level_threshold = 100
# Match the output segmentation to the tomogram pixel size
adjust_to_different_size = True
output_size = (256, 256, 128)
############# CODE #############
size_ratio = tetrises_pixel_size/tomogram_pixel_size
z= int(size_ratio*tetris_size[2]) # output tomogram thickness (be careful, this has to be equal to tetris z size at the new pixel size). In other words, z = (tetris_z * tetrisPixelSize / outputPixelSize)
PixelSize = tomogram_pixel_size
simulation_list = glob.glob('parakeet/*')
parent_dir = os.path.abspath(os.curdir)
print(simulation_list)
for simulation in tqdm.tqdm(simulation_list):
os.chdir(simulation)
tilt_series_h5 = 'image.h5'
tilt_series_mrc = 'image.mrc'
if os.path.exists(tilt_series_mrc):
os.remove(tilt_series_mrc)
# export the data to mrc:
os.system('parakeet.export {} -o {}'.format(tilt_series_h5, tilt_series_mrc))
# bin the data with imod
tilt_series_binned = 'tiltseries.mrcs'
if os.path.exists(tilt_series_binned):
os.remove(tilt_series_binned)
os.system('newstack -input {} -output {} -taper 1,1 -bin {} '
'-antialias 6 -imagebinned 1.0'.format(tilt_series_mrc, tilt_series_binned, PixelSize))
# remove the original size tilt series
os.system('rm {}'.format(tilt_series_mrc))
os.chdir(parent_dir)
def createTltfile(name):
presorted = '{}/presorted.tlt'.format(name)
# read the config and get starting and step
config_file = '{}/config.yaml'.format(name)
with open(config_file, 'r') as file:
config = yaml.safe_load(file)
start_angle = int(config['scan']['start_angle'])
step_angle = int(config['scan']['step_angle'])
# create and angles list
angles = []
for i in range(0, -start_angle + step_angle, step_angle):
if i == 0:
angles.append(i)
else:
angles.append(-i)
angles.append(i)
# save as a tlt file
if os.path.exists(presorted):
os.remove(presorted)
np.savetxt(presorted, angles, fmt='%f')
return presorted
def sortTiltSeries(tsfn, tltfn, output):
# Unstacking the tilt series
args = 'e2spt_tiltstacker.py --unstack {} --tltfile {}'.format(tsfn, tltfn)
os.system(args)
tilts = np.loadtxt(tltfn)
framesfn = sorted(list(glob.glob('sptstacker_01/*.mrcs')))
indices = sorted(range(len(tilts)), key=lambda index: tilts[index])
sorted_tltfile = [list(tilts)[index] for index in indices]
sorted_fns = [framesfn[index] for index in indices]
# now creating an imod file:
np.savetxt('sorted.tlt', np.array(sorted_tltfile), fmt="%.2f")
with open('imod_file.txt', 'x') as f:
f.write('{}\n'.format(len(tilts)))
for fn in sorted_fns:
f.write(fn + '\n')
f.write('0\n')
# removing the old link to the unsorted tilt series and creating the sorted tilt series stack
command = 'newstack -fileinlist {} -output {}'.format('imod_file.txt', output)
os.system(command)
simulation_list = glob.glob('parakeet/*')
parent_dir = os.path.abspath(os.curdir)
print(simulation_list)
# sort tilt series
for simulation in tqdm.tqdm(simulation_list):
presorted = createTltfile(simulation)
os.chdir(simulation)
tsfn = 'tiltseries.mrcs'
output = 'sorted.mrcs'
# if the file exists, delete it
if os.path.exists('imod_file.txt'):
os.system('rm -rf {} {}'.format('imod_file.txt', 'sptstacker*'))
sortTiltSeries(tsfn, os.path.basename(presorted), output)
os.chdir(parent_dir)
# # reconstruct
for simulation in tqdm.tqdm(simulation_list):
os.chdir(simulation)
tsfn = 'sorted.mrcs'
reconstruction = 'sorted.rec'
tomogram = 'tomogram.mrc'
tltfile = 'sorted.tlt'
args = 'tilt -InputProjections {} -OutputFile {} -TILTFILE {} -THICKNESS {}' \
' -FalloffIsTrueSigma 1 -RADIAL 0.5,0.5 -SHIFT 0.0,0.0 -OFFSET 0.0,0.0 -MODE 1 -PERPENDICULAR' \
' -AdjustOrigin -UseGPU 0 -ActionIfGPUFails 2,2'.format(tsfn, reconstruction, tltfile, z)
if sirt_iterations != 0:
args += ' -FakeSIRTiterations {}'.format(sirt_iterations)
os.system(args)
args = 'trimvol -rx {} {}'.format(reconstruction, tomogram)
os.system(args)
os.remove(reconstruction)
os.chdir(parent_dir)
simulation_list = glob.glob('parakeet/*')
templates_list = [os.path.basename(template)[:-4] for template in glob.glob('templates/*.pdb')]
tetrises_path = 'tetrises/'
output_path = 'results/'
output_tomograms = 'results/tomograms/'
output_coordinates = 'results/coordinates/'
output_segmentations = 'results/segmentations/'
parent_dir = os.path.abspath(os.curdir)
if os.path.exists(output_path):
print('rename or remove your previous results directory')
exit(1)
else:
os.mkdir(output_path)
os.mkdir(output_tomograms)
os.mkdir(output_coordinates)
os.mkdir(output_segmentations)
# for every tomogram, copy the tomogram and all templates coordinates to the outpath
for simulation in tqdm.tqdm(simulation_list):
# print(simulation)
simulation_id = os.path.basename(simulation)
tetris = '{}{}'.format(tetrises_path, simulation_id)
# print(tetris)
tomogram_path = '{}/tomogram.mrc'.format(simulation)
# copy the tomogram path to the results:
tomogram_output = '{}{}.mrc'.format(output_tomograms,simulation_id)
os.system('cp {} {}'.format(tomogram_path, tomogram_output))
# get the coordinates ground truth
all_coordinates_files = glob.glob('{}/coordinates/*.txt'.format(tetris))
filenames = [os.path.basename(file)[:-4] for file in all_coordinates_files]
templates_coordinates_files = []
for file in all_coordinates_files:
if os.path.basename(file)[:-4] in templates_list:
templates_coordinates_files.append(file)
# now loop over the template coordinates and combine them in a single file
coordinates = None
for file in templates_coordinates_files:
new_coordinates = np.loadtxt(file, skiprows=1, delimiter=',')
if coordinates is None:
coordinates = copy.deepcopy(new_coordinates)
else:
coordinates = np.row_stack((coordinates,new_coordinates))
# adjust the coordiantes to the right pixel size
coordinates *= size_ratio
# save the coordinates
coordinates_output = '{}{}.txt'.format(output_coordinates, simulation_id)
np.savetxt(coordinates_output, coordinates, fmt='%f')
def resize(data, x, y, z):
"""Resize the data.
@param data: input data.
@param x: resized dimension x.
@param y: resized dimension y.
@param z: resized dimension z.
@return: resized data.
"""
s = data.shape
from scipy import mgrid, array
from scipy.ndimage import map_coordinates
grid = mgrid[0:s[0] - 1:x * 1j, 0:s[1] - 1:y * 1j, 0:s[2] - 1:z * 1j]
d = map_coordinates(data, grid, order=0)
return d
simulation_list = glob.glob('parakeet/*')
templates_list = [os.path.basename(template)[:-4] for template in glob.glob('templates/*.pdb')]
tetrises_path = 'tetrises/'
output_path = 'segmentations/'
# for every tomogram, copy the tomogram and all templates coordinates to the outpath
for simulation in tqdm.tqdm(simulation_list):
# print(simulation)
simulation_id = os.path.basename(simulation)
tetris = '{}{}'.format(tetrises_path, simulation_id)
# print(tetris)
# copy the tomogram path to the results:
# get the coordinates ground truth
all_coordinates_files = glob.glob('{}/coordinates/*.txt'.format(tetris))
all_angles_files = glob.glob('{}/angles/*.txt'.format(tetris))
coordinates_filenames = [os.path.basename(file)[:-4] for file in all_coordinates_files]
angles_filenames = [os.path.basename(file)[:-4] for file in all_angles_files]
output_volume = '{}{}.mrc'.format(output_segmentations, simulation_id)
big_volume = np.zeros(tetris_size, dtype=np.float32)
for coordinates_file, angles_file in zip(all_coordinates_files, all_angles_files):
basename = os.path.basename(coordinates_file)[:-4]
if basename in templates_list:
# print(coordinates_file)
# print(angles_file)
# print('volumes/templates/{}.mrc'.format(basename))
mol = 'volumes/templates/{}.mrc'.format(basename)
# read the molecule array
molecule = []
with mrcfile.open(mol) as mrc:
# molecule = mrc.data
# mrcfile reads the axis as zyx, we need xyz, so we transpose
molecule = np.transpose(mrc.data)
# reading the coordinates and angles files
coordinates = np.atleast_2d(np.loadtxt(coordinates_file, skiprows=1, delimiter=',', dtype=int))
angles = np.atleast_2d(np.loadtxt(angles_file, skiprows=1, delimiter=','))
for rotation, position in zip(angles, coordinates):
rotation_matrix = Rotation.from_euler(angles=rotation, seq='ZYZ', degrees=True).inv().as_matrix()
rotated_molecule = rotate3d(molecule, rotation_matrix)
# Apply Gaussian filter to the molecule then binarize it
rotated_binary = gaussian3d(rotated_molecule, sigma)
rotated_binary = (rotated_binary > gray_level_threshold).astype(np.int8)
place(big_volume, rotated_molecule, position)
# adjust the sampling rate
if adjust_to_different_size:
x, y, z = output_size
big_volume = resize(big_volume, x, y, z)
big_volume = (big_volume > gray_level_threshold).astype(np.int8)
# save the output volume
with mrcfile.new(output_volume) as mrc:
# transposing the data to save mrc file in the right way
# mrc.set_data(-outVolume)
mrc.set_data(np.transpose(big_volume))