Skip to content

Commit

Permalink
Add speedup changes. Mostly focused on transforms.
Browse files Browse the repository at this point in the history
  • Loading branch information
Zack Singer committed Aug 12, 2023
1 parent f7e2abd commit 59f9c26
Show file tree
Hide file tree
Showing 17 changed files with 813 additions and 372 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/container_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
cd /github/workspace/

# Install dependencies
yum install -y wget git centos-release-scl
yum install -y wget git centos-release-scl eigen3-devel

# Install xsimd headers
git clone https://github.com/xtensor-stack/xsimd.git
mkdir -p /usr/include/xsimd
cp -r xsimd/include/xsimd/* /usr/include/xsimd/

# Need to install packages that depend on centos-release-scl on a different line.
# This will use gcc==10, which is the same as what manylinux2014 uses.
Expand All @@ -23,6 +28,7 @@ ${HOME}/miniconda3/bin/conda activate hexrd

# Install conda build and create output directory
${HOME}/miniconda3/bin/conda install --override-channels -c conda-forge conda-build -y
${HOME}/miniconda3/bin/conda install -c conda-forge pybind11 -y
mkdir output

# Build the package
Expand Down
74 changes: 73 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ name: test

on:
push:
branches: [ master ]
branches: [ master, refactor ]
pull_request:
branches: [ master ]

env:
EIGEN3_INCLUDE_DIR: /eigen3/eigen-3.4.0

jobs:
pytest:
name: ${{ matrix.config.name }}
Expand Down Expand Up @@ -60,6 +63,75 @@ jobs:
pip install fabio==0.12
if: ${{ matrix.config.os == 'macos-latest'}}

- name: Install Eigen on Linux
run: |
sudo apt-get update
sudo apt-get install -y libeigen3-dev
if: runner.os == 'Linux'

- name: Install Eigen on MacOS
run: |
brew update
brew install eigen
if: runner.os == 'macOS'

- name: Install Eigen on Windows
run: |
Invoke-WebRequest -Uri https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip -o eigen.zip
Expand-Archive -Path eigen.zip -DestinationPath C:\eigen3
cp -r C:\eigen3\eigen-3.4.0 D:/a/hexrd/hexrd/eigen
if: runner.os == 'Windows'
shell: pwsh

- name: Install xsimd on Linux
run: |
sudo apt-get update
sudo apt-get install -y cmake g++ wget unzip
wget -O xsimd.tar.gz https://github.com/xtensor-stack/xsimd/archive/refs/tags/7.4.10.tar.gz
tar -xf xsimd.tar.gz
cd xsimd-7.4.10
mkdir build
cd build
cmake ..
make -j$(nproc)
sudo make install
if: runner.os == 'Linux'

- name: Install xsimd on macOS
run: |
brew install cmake wget
wget -O xsimd.tar.gz https://github.com/xtensor-stack/xsimd/archive/refs/tags/7.4.10.tar.gz
tar -xf xsimd.tar.gz
cd xsimd-7.4.10
mkdir build
cd build
cmake ..
make -j$(sysctl -n hw.physicalcpu)
sudo make install
if: runner.os == 'macOS'

- name: Install xsimd on Windows
run: |
# Download and build xsimd from source
Invoke-WebRequest -Uri https://github.com/xtensor-stack/xsimd/archive/refs/tags/7.4.10.tar.gz -o xsimd.tar.gz
tar -xf xsimd.tar.gz
mv xsimd-7.4.10 xsimd
cd xsimd
mkdir build
cd build
cmake -DCMAKE_INSTALL_PREFIX=/xsimd ..
cmake --build .
cmake --install .
echo "XSIMD_INCLUDE_DIR=/xsimd/include" >> $GITHUB_ENV
if: runner.os == 'Windows'
shell: pwsh

- name: Set up environment variables for Eigen3 and xsimd
run: |
echo "EIGEN3_INCLUDE_DIR=$(pwd)/eigen" >> $GITHUB_ENV
echo "XSIMD_INCLUDE_DIR=$(pwd)/xsimd/include" >> $GITHUB_ENV
if: runner.os == 'Windows'

- name: Install HEXRD
run: |
pip install .
Expand Down
116 changes: 24 additions & 92 deletions hexrd/distortion/ge_41rt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,52 +9,22 @@
from .distortionabc import DistortionABC
from .registry import _RegisterDistortionClass
from .utils import newton
from hexrd.extensions import inverse_distortion

RHO_MAX = 204.8 # max radius in mm for ge detector

if USE_NUMBA:
@numba.njit(nogil=True, cache=True)
def _ge_41rt_inverse_distortion(out, in_, rhoMax, params):
maxiter = 100
prec = cnst.epsf

p0, p1, p2, p3, p4, p5 = params[0:6]
rxi = 1.0/rhoMax
for el in range(len(in_)):
xi, yi = in_[el, 0:2]
ri = np.sqrt(xi*xi + yi*yi)
if ri < cnst.sqrt_epsf:
ri_inv = 0.0
else:
ri_inv = 1.0/ri
sinni = yi*ri_inv
cosni = xi*ri_inv
ro = ri
cos2ni = cosni*cosni - sinni*sinni
sin2ni = 2*sinni*cosni
cos4ni = cos2ni*cos2ni - sin2ni*sin2ni
# newton solver iteration
for i in range(maxiter):
ratio = ri*rxi
fx = (p0*ratio**p3*cos2ni +
p1*ratio**p4*cos4ni +
p2*ratio**p5 + 1)*ri - ro # f(x)
fxp = (p0*ratio**p3*cos2ni*(p3+1) +
p1*ratio**p4*cos4ni*(p4+1) +
p2*ratio**p5*(p5+1) + 1) # f'(x)

delta = fx/fxp
ri = ri - delta
# convergence check for newton
if np.abs(delta) <= prec*np.abs(ri):
break

xi = ri*cosni
yi = ri*sinni
out[el, 0] = xi
out[el, 1] = yi

return out
@numba.njit(nogil=True, cache=True, fastmath=True)
def _ge_41rt_inverse_distortion(inputs, rhoMax, params):
radii = np.hypot(inputs[:, 0], inputs[:, 1])
inverted_radii = np.reciprocal(radii)
cosines = inputs[:, 0]*inverted_radii
cosine_double_angles = 2*np.square(cosines)-1
cosine_quadruple_angles = 2*np.square(cosine_double_angles)-1
sqrt_p_is = rhoMax / np.sqrt(-(params[0]*cosine_double_angles+params[1]*cosine_quadruple_angles+params[2]))
solutions = (2/np.sqrt(3))*sqrt_p_is*np.cos(np.arccos(((-3*np.sqrt(3)/2)*radii/sqrt_p_is))/3+4*np.pi/3)

return solutions[:, None] * inputs * inverted_radii[:, None]

@numba.njit(nogil=True, cache=True)
def _ge_41rt_distortion(out, in_, rhoMax, params):
Expand Down Expand Up @@ -87,53 +57,16 @@ def _ge_41rt_distortion(out, in_, rhoMax, params):
return out
else:
# non-numba versions for the direct and inverse distortion
def _ge_41rt_inverse_distortion(out, in_, rhoMax, params):
maxiter = 100
prec = cnst.epsf

p0, p1, p2, p3, p4, p5 = params[0:6]
rxi = 1.0/rhoMax

xi, yi = in_[:, 0], in_[:, 1]
ri = np.sqrt(xi*xi + yi*yi)
# !!! adding fix TypeError when processings list of coords
zfix = []
if np.any(ri) < cnst.sqrt_epsf:
zfix = ri < cnst.sqrt_epsf
ri[zfix] = 1.0
ri_inv = 1.0/ri
ri_inv[zfix] = 0.

sinni = yi*ri_inv
cosni = xi*ri_inv
ro = ri
cos2ni = cosni*cosni - sinni*sinni
sin2ni = 2*sinni*cosni
cos4ni = cos2ni*cos2ni - sin2ni*sin2ni

# newton solver iteration
#
# FIXME: looks like we hae a problem here,
# should iterate over single coord pairs?
for i in range(maxiter):
ratio = ri*rxi
fx = (p0*ratio**p3*cos2ni +
p1*ratio**p4*cos4ni +
p2*ratio**p5 + 1)*ri - ro # f(x)
fxp = (p0*ratio**p3*cos2ni*(p3+1) +
p1*ratio**p4*cos4ni*(p4+1) +
p2*ratio**p5*(p5+1) + 1) # f'(x)

delta = fx/fxp
ri = ri - delta

# convergence check for newton
if np.max(np.abs(delta/ri)) <= prec:
break

out[:, 0] = ri*cosni
out[:, 1] = ri*sinni

def _ge_41rt_inverse_distortion(out, inputs, rhoMax, params):
radii = np.hypot(inputs[:, 0], inputs[:, 1])
inverted_radii = np.reciprocal(radii)
cosines = inputs[:, 0]*inverted_radii
cosine_double_angles = 2*cosines*cosines-1
cosine_quadruple_angles = 2*cosine_double_angles*cosine_double_angles-1
sqrt_p_is = np.sqrt(-(rhoMax*rhoMax) / (params[0]*cosine_double_angles+params[1]*cosine_quadruple_angles+params[2]))
solutions = 2*(sqrt_p_is/np.sqrt(3))*np.cos(np.arccos((np.sqrt(3)/sqrt_p_is)*(-3*radii)/2)/3+4*np.pi/3)

out[:, 0], out[:, 1] = solutions*inputs[:, 0]*inverted_radii, solutions*inputs[:, 1]*inverted_radii
return out

def _ge_41rt_distortion(out, in_, rhoMax, params):
Expand Down Expand Up @@ -222,8 +155,7 @@ def apply_inverse(self, xy_in):
return xy_in
else:
xy_in = np.asarray(xy_in, dtype=float)
xy_out = np.empty_like(xy_in)
_ge_41rt_inverse_distortion(
xy_out, xy_in, float(RHO_MAX), np.asarray(self.params)
xy_out = inverse_distortion.ge_41rt_inverse_distortion(
xy_in, float(RHO_MAX), np.asarray(self.params[:3])
)
return xy_out
34 changes: 11 additions & 23 deletions hexrd/fitting/grains.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,7 @@ def objFuncFitGrain(gFit, gFull, gFlag,
calc_omes_dict = dict.fromkeys(instrument.detectors, [])
calc_xy_dict = dict.fromkeys(instrument.detectors)
meas_xyo_all = []
det_keys_ordered = []
for det_key, panel in instrument.detectors.items():
det_keys_ordered.append(det_key)

rMat_d, tVec_d, chi, tVec_s = extract_detector_transformation(
instrument.detector_parameters[det_key])

Expand All @@ -202,10 +199,7 @@ def objFuncFitGrain(gFit, gFull, gFlag,
# WARNING: hkls and derived vectors below must be columnwise;
# strictly necessary??? change affected APIs instead?
# <JVB 2017-03-26>
hkls = np.atleast_2d(
np.vstack([x[2] for x in results])
).T

hkls = np.atleast_2d(np.vstack([x[2] for x in results])).T
meas_xyo = np.atleast_2d(
np.vstack([np.r_[x[7], x[6][-1]] for x in results])
)
Expand All @@ -218,7 +212,6 @@ def objFuncFitGrain(gFit, gFull, gFlag,
meas_omes = meas_xyo[:, 2]
xy_unwarped = panel.distortion.apply(meas_xyo[:, :2])
meas_xyo = np.vstack([xy_unwarped.T, meas_omes]).T
pass

# append to meas_omes
meas_xyo_all.append(meas_xyo)
Expand All @@ -229,34 +222,29 @@ def objFuncFitGrain(gFit, gFull, gFlag,
# 3. rotate back into CRYSTAL frame and normalize to unit magnitude
# IDEA: make a function for this sequence of operations with option for
# choosing ouput frame (i.e. CRYSTAL vs SAMPLE vs LAB)
gVec_c = np.dot(bMat, hkls)
gVec_s = np.dot(vMat_s, np.dot(rMat_c, gVec_c))
gHat_c = mutil.unitVector(np.dot(rMat_c.T, gVec_s))
gHat_c = mutil.unitVector(rMat_c.T @ vMat_s @ (rMat_c @ (bMat @ hkls)))

# !!!: check that this operates on UNWARPED xy
match_omes, calc_omes = matchOmegas(
_, calc_omes = matchOmegas(
meas_xyo, hkls, chi, rMat_c, bMat, wavelength,
vInv=vInv_s, beamVec=bVec, etaVec=eVec,
omePeriod=omePeriod)

# append to omes dict
calc_omes_dict[det_key] = calc_omes

# TODO: try Numba implementations
rMat_s = xfcapi.make_sample_rmat(chi, calc_omes)
calc_xy = xfcapi.gvec_to_xy(gHat_c.T,
rMat_d, rMat_s, rMat_c,
tVec_d, tVec_s, tVec_c,
beam_vec=bVec)
calc_xy = xfcapi.gvec_to_xy_from_angles(chi, calc_omes, gHat_c.T,
rMat_d, rMat_c,
tVec_d, tVec_s, tVec_c.squeeze(),
beam_vec=bVec)

# append to xy dict
calc_xy_dict[det_key] = calc_xy
pass

# stack results to concatenated arrays
calc_omes_all = np.hstack([calc_omes_dict[k] for k in det_keys_ordered])
calc_omes_all = np.hstack([calc_omes_dict[k] for k in instrument.detectors.keys()])
tmp = []
for k in det_keys_ordered:
for k in instrument.detectors.keys():
if calc_xy_dict[k] is not None:
tmp.append(calc_xy_dict[k])
calc_xy_all = np.vstack(tmp)
Expand All @@ -274,8 +262,8 @@ def objFuncFitGrain(gFit, gFull, gFlag,
if return_value_flag in [None, 1]:
retval = np.hstack([calc_xy_all, calc_omes_all.reshape(npts, 1)])
else:
rd = dict.fromkeys(det_keys_ordered)
for det_key in det_keys_ordered:
rd = dict.fromkeys(instrument.detectors.keys())
for det_key in instrument.detectors.keys():
rd[det_key] = {'calc_xy': calc_xy_dict[det_key],
'calc_omes': calc_omes_dict[det_key]}
retval = rd
Expand Down
23 changes: 8 additions & 15 deletions hexrd/gridutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,31 +62,24 @@ def cellIndices(edges, points_1d):
must be mapped to the same branch cut, and
abs(edges[0] - edges[-1]) = 2*pi
"""
ztol = sqrt_epsf

assert len(edges) >= 2, "must have at least 2 edges"

points_1d = np.r_[points_1d].flatten()
delta = float(edges[1] - edges[0])
delta = edges[1] - edges[0]

if delta > 0:
on_last_rhs = points_1d >= edges[-1] - ztol
points_1d[on_last_rhs] = points_1d[on_last_rhs] - ztol
points_1d[points_1d >= edges[-1] - sqrt_epsf] -= sqrt_epsf
idx = np.floor((points_1d - edges[0]) / delta)
elif delta < 0:
on_last_rhs = points_1d <= edges[-1] + ztol
points_1d[on_last_rhs] = points_1d[on_last_rhs] + ztol
points_1d[points_1d <= edges[-1] + sqrt_epsf] += sqrt_epsf
idx = np.ceil((points_1d - edges[0]) / delta) - 1
else:
raise RuntimeError("edges array gives delta of 0")

# # mark points outside range with NaN
# off_lo = idx < 0
# off_hi = idx >= len(edges) - 1
# if np.any(off_lo):
# idx[off_lo] = np.nan
# if np.any(off_hi):
# idx[off_hi] = np.nan
try:
return np.array(idx, dtype=int)
except Exception as e:
print(e)
pass
return np.array(idx, dtype=int)


Expand Down
2 changes: 1 addition & 1 deletion hexrd/imageseries/load/framecache.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def shape(self):
return self._shape

def __getitem__(self, key):
return self._framelist[key].toarray()
return self._framelist[key]

def __iter__(self):
return ImageSeriesIterator(self)
Expand Down
Loading

0 comments on commit 59f9c26

Please sign in to comment.