Skip to content

Commit

Permalink
Squashed commit of implementing upsampling/downsampling kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
daurer committed May 6, 2022
1 parent fc3c293 commit 3f756b5
Show file tree
Hide file tree
Showing 28 changed files with 1,636 additions and 277 deletions.
45 changes: 38 additions & 7 deletions archive/engines/projectional_pycuda_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,11 @@ def engine_prepare(self):
prep.err_exit_gpu = gpuarray.to_gpu(prep.err_exit)
if self.do_position_refinement:
prep.error_state_gpu = gpuarray.empty_like(prep.err_fourier_gpu)
kern = self.kernels[prep.label]
if kern.resample:
kern.aux_tmp1_gpu = gpuarray.to_gpu(kern.aux_tmp1)
kern.aux_tmp2_gpu = gpuarray.to_gpu(kern.aux_tmp2)

ma = self.ma.S[dID].data.astype(np.float32)
prep.ma = cuda.pagelocked_empty(ma.shape, ma.dtype, order="C", mem_flags=4)
prep.ma[:] = ma
Expand Down Expand Up @@ -505,13 +510,20 @@ def engine_iterate(self, num=1):
pbound = self.pbound_scan[prep.label]
aux = kern.aux
PROP = kern.PROP

# set streams
queue = streamdata.queue
FUK.queue = queue
AWK.queue = queue
POK.queue = queue
PROP.queue = queue
resample = kern.resample
if resample:
ABS_SQR = kern.ABS_SQR
RSMP = kern.RSMP
RSMP.queue = queue
aux_tmp1 = kern.aux_tmp1_gpu
aux_tmp2 = kern.aux_tmp2_gpu

# get addresses and auxilliary array
addr = prep.addr_gpu
Expand Down Expand Up @@ -546,7 +558,12 @@ def engine_iterate(self, num=1):
t1 = time.time()
AWK.build_aux_no_ex(aux, addr, ob, pr)
PROP.fw(aux, aux)
FUK.log_likelihood(aux, addr, mag, ma, err_phot)
if resample:
ABS_SQR(aux, aux_tmp1, stream=queue)
RSMP.resample(aux_tmp2, aux_tmp1)
FUK.log_likelihood(aux_tmp2, addr, mag, ma, err_phot, aux_is_intensity=True)
else:
FUK.log_likelihood(aux, addr, mag, ma, err_phot)
self.benchmark.F_LLerror += time.time() - t1

## prep + forward FFT
Expand All @@ -561,19 +578,33 @@ def engine_iterate(self, num=1):

## Deviation from measured data
t1 = time.time()
FUK.fourier_error(aux, addr, mag, ma, ma_sum)
FUK.error_reduce(addr, err_fourier)
FUK.fmag_all_update(aux, addr, mag, ma, err_fourier, pbound)
if resample:
ABS_SQR(aux, aux_tmp1, stream=queue)
RSMP.resample(aux_tmp2, aux_tmp1)
FUK.fourier_error(aux_tmp2, addr, mag, ma, ma_sum, aux_is_intensity=True)
FUK.error_reduce(addr, err_fourier)
FUK.fmag_all_update(aux_tmp2, addr, mag, ma, err_fourier, pbound, mult=False)
RSMP.resample(aux_tmp1, aux_tmp2)
# aux *= aux_tmp1 , but with stream
aux._elwise_multiply(aux_tmp1, aux, stream=queue)
else:
FUK.fourier_error(aux, addr, mag, ma, ma_sum)
FUK.error_reduce(addr, err_fourier)
FUK.fmag_all_update(aux, addr, mag, ma, err_fourier, pbound)
self.benchmark.C_Fourier_update += time.time() - t1
streamdata.record_done_ma(dID)

## Backward FFT
t1 = time.time()
PROP.bw(aux, aux)
## apply changes
#AWK.build_exit(aux, addr, ob, pr, ex, alpha=self.p.alpha)
AWK.make_exit(aux, addr, ob, pr, ex, c_a=self._b, c_po=self._a, c_e=-(self._a + self._b))
FUK.exit_error(aux, addr)
if resample:
ABS_SQR(aux, aux_tmp1, stream=queue)
RSMP.resample(aux_tmp2, aux_tmp1)
FUK.exit_error(aux_tmp2, addr, aux_is_intensity=True)
else:
FUK.exit_error(aux, addr)
FUK.error_reduce(addr, err_exit)
self.benchmark.E_Build_exit += time.time() - t1

Expand Down
24 changes: 24 additions & 0 deletions ptypy/accelerate/base/array_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
'''
import numpy as np
from scipy import ndimage as ndi
from ptypy import utils as u


def dot(A, B, acc_dtype=np.float64):
Expand Down Expand Up @@ -148,3 +149,26 @@ def crop_pad_2d_simple(A, B):
b1, b2 = B.shape[-2:]
offset = [0, a1 // 2 - b1 // 2, a2 // 2 - b2 // 2]
fill3D(A, B, offset)

def resample(A, B):
"""
Resamples the last two dimensions of B onto shape of A and places it in A.
The ratio between shapes needs to be a power of 2 along the last two dimension.
upsampling (A larger than B): nearest neighbour interpolation
downsampling (B larger than A): integrate over neighbouring regions
"""
assert A.ndim > 2, "Arrays must have at least 2 dimensions"
assert B.ndim > 2, "Arrays must have at least 2 dimensions"
assert A.shape[:-2] == B.shape[:-2], "Arrays must have same shape expect along the last 2 dimensions"
assert A.shape[-2] == A.shape[-1], "Last two dimensions must be of equal length"
assert B.shape[-2] == B.shape[-2], "Last two dimensions must be of equal length"
# same sampling, no need to call this function
assert A.shape != B.shape, "A and B have the same shape, no need to do any resampling"
# upsampling
if A.shape[-1] > B.shape[-1]:
resample = A.shape[-1] // B.shape[-1]
A[:] = u.repeat_2d(B, resample) / (resample**2)
# downsampling
elif A.shape[-1] < B.shape[-1]:
resample = B.shape[-1] // A.shape[-1]
A[:] = u.rebin_2d(B, resample) * (resample**2)
39 changes: 36 additions & 3 deletions ptypy/accelerate/base/engines/ML_serial.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from ptypy.engines import register
from ptypy.accelerate.base.kernels import GradientDescentKernel, AuxiliaryWaveKernel, PoUpdateKernel, PositionCorrectionKernel
from ptypy.accelerate.base import address_manglers
from ptypy.accelerate.base import array_utils as au


__all__ = ['ML_serial']
Expand Down Expand Up @@ -90,6 +91,13 @@ def _setup_kernels(self):
kern.a = np.zeros(ash, dtype=np.complex64)
kern.b = np.zeros(ash, dtype=np.complex64)

# create extra array for resampling (if needed)
kern.resample = scan.resample > 1
if kern.resample:
ish = (ash[0],) + tuple(geo.shape//scan.resample)
kern.aux_tmp1 = np.zeros(ash, dtype=np.float32)
kern.aux_tmp2 = np.zeros(ish, dtype=np.float32)

# setup kernels, one for each SCAN.
kern.GDK = GradientDescentKernel(aux, nmodes)
kern.GDK.allocate()
Expand Down Expand Up @@ -382,6 +390,13 @@ def prepare(self):
prep = self.engine.diff_info[d.ID]
prep.weights = (self.Irenorm * self.engine.ma.S[d.ID].data
/ (1. / self.Irenorm + d.data)).astype(d.data.dtype)
prep.intensity = self.engine.di.S[d.ID].data
kern = self.engine.kernels[prep.label]
if kern.resample:
prep.weights2 = np.zeros((prep.weights.shape[0],) + kern.aux_tmp1.shape[-2:], dtype=prep.weights.dtype)
au.resample(prep.weights2, prep.weights)
prep.intensity2 = np.zeros((prep.intensity.shape[0],) + kern.aux_tmp1.shape[-2:], dtype=prep.weights.dtype)
au.resample(prep.intensity2, prep.intensity)

def __del__(self):
"""
Expand Down Expand Up @@ -423,7 +438,8 @@ def new_grad(self):

# get addresses and auxilliary array
addr = prep.addr
w = prep.weights
w = prep.weights2
I = prep.intensity2
err_phot = prep.err_phot
fic = prep.float_intens_coeff

Expand All @@ -432,6 +448,12 @@ def new_grad(self):
obg = ob_grad.S[oID].data
pr = self.engine.pr.S[pID].data
prg = pr_grad.S[pID].data

# resampling
if kern.resample:
aux_tmp1 = kern.aux_tmp1
aux_tmp2 = kern.aux_tmp2

I = prep.I

# make propagated exit (to buffer)
Expand All @@ -440,7 +462,13 @@ def new_grad(self):
# forward prop
aux[:] = FW(aux)

GDK.make_model(aux, addr)
if kern.resample:
aux_tmp1 = np.abs(aux)**2
au.resample(aux_tmp2, aux_tmp1)
au.resample(aux_tmp1, aux_tmp2)
GDK.make_model(aux_tmp1, addr, aux_is_intensity=True)
else:
GDK.make_model(aux, addr)

if self.p.floating_intensities:
GDK.floating_intensity(addr, w, I, fic)
Expand Down Expand Up @@ -505,14 +533,19 @@ def poly_line_coeffs(self, c_ob_h, c_pr_h):
# get addresses and auxilliary array
addr = prep.addr
w = prep.weights
I = prep.intensity
fic = prep.float_intens_coeff

# local references
ob = self.ob.S[oID].data
ob_h = c_ob_h.S[oID].data
pr = self.pr.S[pID].data
pr_h = c_pr_h.S[pID].data
I = self.di.S[dID].data

# resampling
if kern.resample:
w = prep.weights2
I = prep.intensity2

# make propagated exit (to buffer)
AWK.build_aux_no_ex(f, addr, ob, pr, add=False)
Expand Down
47 changes: 41 additions & 6 deletions ptypy/accelerate/base/engines/projectional_serial.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,18 @@ def _setup_kernels(self):
aux = np.zeros(ash, dtype=np.complex64)
kern.aux = aux

# create extra array for resampling (if needed)
kern.resample = scan.resample > 1
if kern.resample:
ish = (ash[0],) + tuple(geo.shape//scan.resample)
kern.aux_tmp1 = np.zeros(ash, dtype=np.float32)
kern.aux_tmp2 = np.zeros(ish, dtype=np.float32)
aux_f = kern.aux_tmp2 # making sure to pass the correct shapes to FUK
else:
aux_f = aux

# setup kernels, one for each SCAN.
kern.FUK = FourierUpdateKernel(aux, nmodes)
kern.FUK = FourierUpdateKernel(aux_f, nmodes)
kern.FUK.allocate()

kern.POK = PoUpdateKernel()
Expand Down Expand Up @@ -266,6 +276,12 @@ def engine_iterate(self, num=1):
pbound = self.pbound_scan[prep.label]
aux = kern.aux

# resampling
resample = kern.resample
if resample:
aux_tmp1 = kern.aux_tmp1
aux_tmp2 = kern.aux_tmp2

# local references
ma = prep.ma
ob = self.ob.S[oID].data
Expand All @@ -277,7 +293,12 @@ def engine_iterate(self, num=1):
t1 = time.time()
AWK.build_aux_no_ex(aux, addr, ob, pr)
aux[:] = FW(aux)
FUK.log_likelihood(aux, addr, mag, ma, err_phot)
if resample:
aux_tmp1 = np.abs(aux)**2
au.resample(aux_tmp2, aux_tmp1)
FUK.log_likelihood(aux_tmp2, addr, mag, ma, err_phot, aux_is_intensity=True)
else:
FUK.log_likelihood(aux, addr, mag, ma, err_phot)
self.benchmark.F_LLerror += time.time() - t1

## build auxilliary wave
Expand All @@ -292,9 +313,18 @@ def engine_iterate(self, num=1):

## Deviation from measured data
t1 = time.time()
FUK.fourier_error(aux, addr, mag, ma, ma_sum)
FUK.error_reduce(addr, err_fourier)
FUK.fmag_all_update(aux, addr, mag, ma, err_fourier, pbound)
if resample:
aux_tmp1 = np.abs(aux)**2
au.resample(aux_tmp2, aux_tmp1)
FUK.fourier_error(aux_tmp2, addr, mag, ma, ma_sum, aux_is_intensity=True)
FUK.error_reduce(addr, err_fourier)
FUK.fmag_all_update(aux_tmp2, addr, mag, ma, err_fourier, pbound, mult=False)
au.resample(aux_tmp1, aux_tmp2)
aux *= aux_tmp1
else:
FUK.fourier_error(aux, addr, mag, ma, ma_sum)
FUK.error_reduce(addr, err_fourier)
FUK.fmag_all_update(aux, addr, mag, ma, err_fourier, pbound)
self.benchmark.C_Fourier_update += time.time() - t1

## backward FFT
Expand All @@ -305,7 +335,12 @@ def engine_iterate(self, num=1):
## build exit wave
t1 = time.time()
AWK.make_exit(aux, addr, ob, pr, ex, c_a=self._b, c_po=self._a, c_e=-(self._a+self._b))
FUK.exit_error(aux,addr)
if resample:
aux_tmp1 = np.abs(aux)**2
au.resample(aux_tmp2, aux_tmp1)
FUK.exit_error(aux_tmp2, addr, aux_is_intensity=True)
else:
FUK.exit_error(aux,addr)
FUK.error_reduce(addr, err_exit)
self.benchmark.E_Build_exit += time.time() - t1

Expand Down
Loading

0 comments on commit 3f756b5

Please sign in to comment.