diff --git a/topaz/commands/denoise.py b/topaz/commands/denoise.py index 5c81d6c..0434929 100644 --- a/topaz/commands/denoise.py +++ b/topaz/commands/denoise.py @@ -16,7 +16,11 @@ from topaz.utils.data.loader import load_image from topaz.utils.image import downsample import topaz.mrc as mrc -import topaz.cuda +import topaz.gpu +try: + import intel_extension_for_pytorch as ipex +except: + pass name = 'denoise' help = 'denoise micrographs with various denoising algorithms' @@ -265,13 +269,12 @@ def make_hdf5_datasets(path, paired=True, preload=False, holdout=0.1, cutoff=0): def denoise_image(mic, models, lowpass=1, cutoff=0, gaus=None, inv_gaus=None, deconvolve=False , deconv_patch=1, patch_size=-1, padding=0, normalize=False - , use_cuda=False): + , device='cpu'): if lowpass > 1: mic = dn.lowpass(mic, lowpass) mic = torch.from_numpy(mic) - if use_cuda: - mic = mic.cuda() + mic = mic.to(device) # normalize and remove outliers mu = mic.mean() @@ -316,8 +319,8 @@ def main(args): set_num_threads(num_threads) ## set the device - use_cuda = topaz.cuda.set_device(args.device) - print('# using device={} with cuda={}'.format(args.device, use_cuda), file=sys.stderr) + device = topaz.gpu.set_device(args.device) + print('# Using device={} with GPU={}'.format(args.device, device), file=sys.stderr) cutoff = args.pixel_cutoff # pixel truncation limit @@ -393,8 +396,7 @@ def main(args): else: raise Exception('Unknown architecture: ' + arch) - if use_cuda: - model = model.cuda() + model = model.to(device) # train optim = args.optim @@ -417,7 +419,7 @@ def main(args): , criteria=criteria , num_epochs=num_epochs , dataset_val=dataset_val - , use_cuda=use_cuda + , device=device , num_workers=num_workers , shuffle=shuffle ) @@ -428,7 +430,7 @@ def main(args): , criteria=criteria , num_epochs=num_epochs , dataset_val=dataset_val - , use_cuda=use_cuda + , device=device , num_workers=num_workers , shuffle=shuffle ) @@ -446,8 +448,7 @@ def main(args): model.cpu() model.eval() torch.save(model, path) - if use_cuda: - model.cuda() + model.to(device) models = [model] @@ -461,8 +462,7 @@ def main(args): model = dn.load_model(arg) model.eval() - if use_cuda: - model.cuda() + model.to(device) models.append(model) @@ -481,15 +481,13 @@ def main(args): gaus = args.gaussian if gaus > 0: gaus = dn.GaussianDenoise(gaus) - if use_cuda: - gaus.cuda() + gaus.to(device) else: gaus = None inv_gaus = args.inv_gaussian if inv_gaus > 0: inv_gaus = dn.InvGaussianFilter(inv_gaus) - if use_cuda: - inv_gaus.cuda() + inv_gaus.to(device) else: inv_gaus = None deconvolve = args.deconvolve @@ -516,7 +514,7 @@ def main(args): , inv_gaus=inv_gaus, deconvolve=deconvolve , deconv_patch=deconv_patch , patch_size=ps, padding=padding, normalize=normalize - , use_cuda=use_cuda + , device=device ) denoised[i] = mic @@ -538,7 +536,7 @@ def main(args): return # make the output directory if it doesn't exist - if not os.path.exists(args.output): + if args.output and (not os.path.exists(args.output)): os.makedirs(args.output) for path in args.micrographs: @@ -550,7 +548,7 @@ def main(args): , inv_gaus=inv_gaus, deconvolve=deconvolve , deconv_patch=deconv_patch , patch_size=ps, padding=padding, normalize=normalize - , use_cuda=use_cuda + , device=device ) # write the micrograph diff --git a/topaz/commands/denoise3d.py b/topaz/commands/denoise3d.py index b47cb47..8e7077b 100644 --- a/topaz/commands/denoise3d.py +++ b/topaz/commands/denoise3d.py @@ -19,7 +19,11 @@ from topaz.utils.data.loader import load_image from topaz.utils.image import downsample import topaz.mrc as mrc -import topaz.cuda +import topaz.gpu +try: + import intel_extension_for_pytorch as ipex +except: + pass from topaz.denoise import UDenoiseNet3D from topaz.filters import GaussianDenoise @@ -73,19 +77,20 @@ def add_arguments(parser=None): return parser -def train_epoch(iterator, model, cost_func, optim, epoch=1, num_epochs=1, N=1, use_cuda=False): +def train_epoch(iterator, model, cost_func, optim, epoch=1, num_epochs=1, N=1, device='cpu'): c = 0 loss_accum = 0 model.train() +# if 'ipex' in dir(): +# model, optim = ipex.optimize(model, optimizer=optim) for batch_idx , (source,target), in enumerate(iterator): b = source.size(0) loss_mb = 0 - if use_cuda: - source = source.cuda() - target = target.cuda() + source = source.to(device) + target = target.to(device) denoised_source = model(source) loss = cost_func(denoised_source,target) @@ -108,7 +113,7 @@ def train_epoch(iterator, model, cost_func, optim, epoch=1, num_epochs=1, N=1, u return loss_accum -def eval_model(iterator, model, cost_func, epoch=1, num_epochs=1, N=1, use_cuda=False): +def eval_model(iterator, model, cost_func, epoch=1, num_epochs=1, N=1, device='cpu'): c = 0 loss_accum = 0 @@ -119,9 +124,8 @@ def eval_model(iterator, model, cost_func, epoch=1, num_epochs=1, N=1, use_cuda= b = source.size(0) loss_mb = 0 - if use_cuda: - source = source.cuda() - target = target.cuda() + source = source.to(device) + target = target.to(device) denoised_source = model(source) loss = cost_func(denoised_source,target) @@ -408,7 +412,7 @@ def train_model(even_path, odd_path, save_prefix, save_interval, device # initialize the model print('# initializing model...', file=log) model_base = UDenoiseNet3D(base_width=base_kernel_width) - model,use_cuda,num_devices = set_device(model_base, device) + model,use_device,num_devices = set_device(model_base, device) if cost_func == 'L2': cost_func = nn.MSELoss() @@ -469,7 +473,7 @@ def train_model(even_path, odd_path, save_prefix, save_interval, device epoch=epoch, num_epochs=num_epochs, N=N_train, - use_cuda=use_cuda) + device=use_device) line = '\t'.join([str(epoch+1), 'train', str(epoch_loss_accum)]) print(line, file=output) @@ -482,7 +486,7 @@ def train_model(even_path, odd_path, save_prefix, save_interval, device epoch=epoch, num_epochs=num_epochs, N=N_test, - use_cuda=use_cuda) + device=use_device) line = '\t'.join([str(epoch+1), 'test', str(epoch_loss_accum)]) print(line, file=output) @@ -491,8 +495,7 @@ def train_model(even_path, odd_path, save_prefix, save_interval, device if save_prefix is not None and (epoch+1)%save_interval == 0: model.eval().cpu() save_model(model, epoch+1, save_prefix, digits=digits) - if use_cuda: - model.cuda() + model.to(use_device) print('# training completed!', file=log) @@ -557,17 +560,29 @@ def load_model(path, base_kernel_width=11): def set_device(model, device, log=sys.stderr): # set the device or devices d = device - use_cuda = (d != -1) and torch.cuda.is_available() + use_device = 'cpu' + if d != -1: + if torch.cuda.is_available(): + import torch.cuda as acc + use_device = 'cuda' + elif hasattr(torch,'xpu'): + if torch.xpu.is_available(): + import torch.xpu as acc + use_device = 'xpu' + else: + import torch.cpu as acc + else: + import torch.cpu as acc num_devices = 1 - if use_cuda: - device_count = torch.cuda.device_count() + if use_device != 'cpu': + device_count = acc.device_count() try: if d >= 0: assert d < device_count - torch.cuda.set_device(d) - print('# using CUDA device:', d, file=log) + acc.set_device(d) + print('# using GPU device:', d, file=log) elif d == -2: - print('# using all available CUDA devices:', device_count, file=log) + print('# using all available GPU devices:', device_count, file=log) num_devices = device_count model = nn.DataParallel(model) else: @@ -579,10 +594,9 @@ def set_device(model, device, log=sys.stderr): print('ERROR: Something went wrong with setting the compute device', file=log) sys.exit(2) - if use_cuda: - model.cuda() + model.to(use_device) - return model, use_cuda, num_devices + return model, use_device, num_devices class PatchDataset: @@ -756,7 +770,9 @@ def main(args): model = nn.Sequential(model, GaussianDenoise(gaussian_sigma, dims=3)) model.eval() - model, use_cuda, num_devices = set_device(model, args.device) + model, use_device, num_devices = set_device(model, args.device) +# if 'ipex' in dir(): +# model = ipex.optimize(model) #batch_size = args.batch_size #batch_size *= num_devices @@ -783,4 +799,4 @@ def main(args): if __name__ == '__main__': parser = add_arguments() args = parser.parse_args() - main(args) \ No newline at end of file + main(args) diff --git a/topaz/commands/extract.py b/topaz/commands/extract.py index fdd13ff..0cd34ec 100644 --- a/topaz/commands/extract.py +++ b/topaz/commands/extract.py @@ -18,7 +18,11 @@ from topaz.algorithms import non_maximum_suppression, match_coordinates from topaz.metrics import average_precision import topaz.predict -import topaz.cuda +import topaz.gpu +try: + import intel_extension_for_pytorch as ipex +except: + pass name = 'extract' help = 'extract particles from segmented images or segment and extract in one step with a trained classifier' @@ -187,15 +191,16 @@ def stream_images(paths): def score_images(model, paths, device=-1, batch_size=1): if model is not None and model != 'none': # score each image with the model ## set the device - use_cuda = topaz.cuda.set_device(device) + device = topaz.gpu.set_device(device) ## load the model from topaz.model.factory import load_model model = load_model(model) model.eval() model.fill() - if use_cuda: - model.cuda() - scores = topaz.predict.score_stream(model, stream_images(paths), use_cuda=use_cuda + model.to(device) +# if 'ipex' in dir(): +# model = ipex.optimize(model) + scores = topaz.predict.score_stream(model, stream_images(paths), device=device , batch_size=batch_size) else: # load scores directly scores = stream_images(paths) @@ -311,4 +316,4 @@ def main(args): if __name__ == '__main__': parser = add_arguments() args = parser.parse_args() - main(args) \ No newline at end of file + main(args) diff --git a/topaz/commands/normalize.py b/topaz/commands/normalize.py index 0fd5caa..7ff9fbc 100644 --- a/topaz/commands/normalize.py +++ b/topaz/commands/normalize.py @@ -12,7 +12,11 @@ from topaz.stats import normalize from topaz.utils.data.loader import load_image from topaz.utils.image import downsample, save_image -import topaz.cuda +import topaz.gpu +try: + import intel_extension_for_pytorch as ipex +except: + pass name = 'normalize' help = 'normalize a set of images using the 2-component Gaussian mixture model' @@ -48,7 +52,7 @@ def add_arguments(parser=None): class Normalize: def __init__(self, dest, scale, affine, num_iters, alpha, beta - , sample, metadata, formats, use_cuda): + , sample, metadata, formats, device): self.dest = dest self.scale = scale self.affine = affine @@ -58,7 +62,7 @@ def __init__(self, dest, scale, affine, num_iters, alpha, beta self.sample = sample self.metadata = metadata self.formats = formats - self.use_cuda = use_cuda + self.device = device def __call__(self, path): # load the image @@ -72,7 +76,7 @@ def __call__(self, path): if self.affine: method = 'affine' x,metadata = normalize(x, alpha=self.alpha, beta=self.beta, num_iters=self.num_iters - , method=method, sample=self.sample, use_cuda=self.use_cuda) + , method=method, sample=self.sample, device=self.device) # save the image and the metadata name,_ = os.path.splitext(os.path.basename(path)) @@ -116,9 +120,9 @@ def main(args): from topaz.torch import set_num_threads set_num_threads(num_threads) - # set CUDA device - use_cuda = topaz.cuda.set_device(args.device) - if use_cuda: + # set GPU device + device = topaz.gpu.set_device(args.device) + if device != 'cpu': # when using GPU, turn off multiple processes num_workers = 0 @@ -126,7 +130,7 @@ def main(args): os.makedirs(dest) process = Normalize(dest, scale, affine, num_iters, alpha, beta - , sample, metadata, formats, use_cuda) + , sample, metadata, formats, device) if num_workers > 1: pool = mp.Pool(num_workers) diff --git a/topaz/commands/segment.py b/topaz/commands/segment.py index 66471d3..ff4762e 100644 --- a/topaz/commands/segment.py +++ b/topaz/commands/segment.py @@ -12,7 +12,11 @@ import torch from topaz.utils.data.loader import load_image -import topaz.cuda +import topaz.gpu +try: + import intel_extension_for_pytorch as ipex +except: + pass name = 'segment' help = 'segment images using a trained region classifier' @@ -43,7 +47,7 @@ def main(args): set_num_threads(num_threads) ## set the device - use_cuda = topaz.cuda.set_device(args.device) + device = topaz.gpu.set_device(args.device) ## load the model from topaz.model.factory import load_model @@ -51,8 +55,9 @@ def main(args): model.eval() model.fill() - if use_cuda: - model.cuda() + model.to(device) +# if 'ipex' in dir(): +# model = ipex.optimize(model) ## make output directory if doesn't exist destdir = args.destdir @@ -68,8 +73,7 @@ def main(args): ## process image with the model with torch.no_grad(): X = torch.from_numpy(np.array(image, copy=False)).unsqueeze(0).unsqueeze(0) - if use_cuda: - X = X.cuda() + X = X.to(device) score = model(X).data[0,0].cpu().numpy() im = Image.fromarray(score) @@ -83,4 +87,4 @@ def main(args): if __name__ == '__main__': parser = add_arguments() args = parser.parse_args() - main(args) \ No newline at end of file + main(args) diff --git a/topaz/commands/train.py b/topaz/commands/train.py index 7b83596..1acaabe 100644 --- a/topaz/commands/train.py +++ b/topaz/commands/train.py @@ -19,7 +19,11 @@ from topaz.utils.printing import report from topaz.utils.data.loader import load_images_from_list from topaz.utils.data.coordinates import match_coordinates_to_images -import topaz.cuda +import topaz.gpu +try: + import intel_extension_for_pytorch as ipex +except: + pass name = 'train' help = 'train region classifier from images with labeled coordinates' @@ -508,7 +512,7 @@ def make_data_iterators(train_images, train_targets, test_images, test_targets return train_iterator, test_iterator -def evaluate_model(classifier, criteria, data_iterator, use_cuda=False): +def evaluate_model(classifier, criteria, data_iterator, device='cpu'): from topaz.metrics import average_precision classifier.eval() @@ -523,9 +527,8 @@ def evaluate_model(classifier, criteria, data_iterator, use_cuda=False): for X,Y in data_iterator: Y = Y.view(-1) Y_true.append(Y.numpy()) - if use_cuda: - X = X.cuda() - Y = Y.cuda() + X = X.to(device) + Y = Y.to(device) score = classifier(X).view(-1) @@ -551,12 +554,11 @@ def evaluate_model(classifier, criteria, data_iterator, use_cuda=False): return loss, precision, tpr, fpr, auprc -def fit_epoch(step_method, data_iterator, epoch=1, it=1, use_cuda=False, output=sys.stdout): +def fit_epoch(step_method, data_iterator, epoch=1, it=1, device='cpu', output=sys.stdout): for X,Y in data_iterator: Y = Y.view(-1) - if use_cuda: - X = X.cuda() - Y = Y.cuda() + X = X.to(device) + Y = Y.to(device) metrics = step_method.step(X, Y) line = '\t'.join([str(epoch), str(it), 'train'] + [str(metric) for metric in metrics] + ['-']) print(line, file=output) @@ -566,7 +568,7 @@ def fit_epoch(step_method, data_iterator, epoch=1, it=1, use_cuda=False, output= def fit_epochs(classifier, criteria, step_method, train_iterator, test_iterator, num_epochs - , save_prefix=None, use_cuda=False, output=sys.stdout): + , save_prefix=None, device='cpu', output=sys.stdout): ## fit the model, report train/test stats, save model if required header = step_method.header line = '\t'.join(['epoch', 'iter', 'split'] + header + ['auprc']) @@ -576,13 +578,15 @@ def fit_epochs(classifier, criteria, step_method, train_iterator, test_iterator, for epoch in range(1,num_epochs+1): ## update the model classifier.train() +# if 'ipex' in dir(): +# classifier, step_method.optim = ipex.optimize(classifier, optimizer=step_method.optim) it = fit_epoch(step_method, train_iterator, epoch=epoch, it=it - , use_cuda=use_cuda, output=output) + , device=device, output=output) ## measure validation performance if test_iterator is not None: loss,precision,tpr,fpr,auprc = evaluate_model(classifier, criteria, test_iterator - , use_cuda=use_cuda) + , device=device) line = '\t'.join([str(epoch), str(it), 'test', str(loss)] + ['-']*(len(header)-4) + [str(precision), str(tpr), str(fpr), str(auprc)]) print(line, file=output) output.flush() @@ -594,8 +598,7 @@ def fit_epochs(classifier, criteria, step_method, train_iterator, test_iterator, path = prefix + ('_epoch{:0'+str(digits)+'}.sav').format(epoch) classifier.cpu() torch.save(classifier, path) - if use_cuda: - classifier.cuda() + classifier.to(device) def main(args): @@ -614,20 +617,18 @@ def main(args): ## set the device """ - use_cuda = False if args.device >= 0: - use_cuda = torch.cuda.is_available() - if use_cuda: - torch.cuda.set_device(args.device) + use_gpu = torch.[cuda|xpu].is_available() + if use_gpu: + torch.[cuda|xpu].set_device(args.device) else: - print('WARNING: you specified GPU (device={}) but no GPUs were detected. This may mean there is a mismatch between your system CUDA version and your pytorch CUDA version.'.format(args.device), file=sys.stderr) + print('WARNING: you specified GPU (device={}) but no GPUs were detected. This may mean there is a mismatch between your system GPU and your pytorch GPU version.'.format(args.device), file=sys.stderr) """ - use_cuda = topaz.cuda.set_device(args.device) - report('Using device={} with cuda={}'.format(args.device, use_cuda)) + device = topaz.gpu.set_device(args.device) + report('Using device={} with GPU={}'.format(args.device, device)) - if use_cuda: - classifier.cuda() + classifier.to(device) ## load the data radius = args.radius # number of pixels around coordinates to label as positive @@ -695,7 +696,7 @@ def main(args): #if not os.path.exists(os.path.dirname(save_prefix)): # os.makedirs(os.path.dirname(save_prefix)) fit_epochs(classifier, criteria, trainer, train_iterator, test_iterator, args.num_epochs - , save_prefix=save_prefix, use_cuda=use_cuda, output=output) + , save_prefix=save_prefix, device=device, output=output) report('Done!') diff --git a/topaz/denoise.py b/topaz/denoise.py index ba458b4..4deaf4f 100644 --- a/topaz/denoise.py +++ b/topaz/denoise.py @@ -11,6 +11,10 @@ from topaz.utils.data.loader import load_image from topaz.filters import AffineFilter, AffineDenoise, GaussianDenoise, gaussian_filter, inverse_filter +try: + import intel_extension_for_pytorch as ipex +except: + pass def load_model(name): @@ -100,15 +104,14 @@ def denoise_patches(model, x, patch_size, padding=128): return y -def denoise_stack(model, stack, batch_size=20, use_cuda=False): +def denoise_stack(model, stack, batch_size=20, device='cpu'): denoised = np.zeros_like(stack) with torch.no_grad(): stack = torch.from_numpy(stack).float() for i in range(0, len(stack), batch_size): x = stack[i:i+batch_size] - if use_cuda: - x = x.cuda() + x = x.to(device) mu = x.view(x.size(0), -1).mean(1) std = x.view(x.size(0), -1).std(1) x = (x - mu.unsqueeze(1).unsqueeze(2))/std.unsqueeze(1).unsqueeze(2) @@ -1017,7 +1020,7 @@ def __call__(self, x, y): def eval_noise2noise(model, dataset, criteria, batch_size=10 - , use_cuda=False, num_workers=0): + , device='cpu', num_workers=0): data_iterator = torch.utils.data.DataLoader(dataset, batch_size=batch_size , num_workers=num_workers) @@ -1025,12 +1028,13 @@ def eval_noise2noise(model, dataset, criteria, batch_size=10 loss = 0 model.eval() +# if 'ipex' in dir(): +# model = ipex.optimize(model) with torch.no_grad(): for x1,x2 in data_iterator: - if use_cuda: - x1 = x1.cuda() - x2 = x2.cuda() + x1 = x1.to(device) + x2 = x2.to(device) x1 = x1.unsqueeze(1) y = model(x1).squeeze(1) @@ -1047,7 +1051,7 @@ def eval_noise2noise(model, dataset, criteria, batch_size=10 def train_noise2noise(model, dataset, lr=0.001, optim='adagrad', batch_size=10, num_epochs=100 , criteria=nn.MSELoss(), dataset_val=None - , use_cuda=False, num_workers=0, shuffle=True): + , device='cpu', num_workers=0, shuffle=True): gamma = None if criteria == 'L0': @@ -1072,6 +1076,8 @@ def train_noise2noise(model, dataset, lr=0.001, optim='adagrad', batch_size=10, for epoch in range(1, num_epochs+1): model.train() +# if 'ipex' in dir(): +# model, optim = ipex.optimize(model, optimizer=optim) n = 0 loss_accum = 0 @@ -1081,9 +1087,8 @@ def train_noise2noise(model, dataset, lr=0.001, optim='adagrad', batch_size=10, criteria.gamma = 2 - (epoch-1)*2/num_epochs for x1,x2 in data_iterator: - if use_cuda: - x1 = x1.cuda() - x2 = x2.cuda() + x1 = x1.to(device) + x2 = x2.to(device) x1 = x1.unsqueeze(1) y = model(x1).squeeze(1) @@ -1109,7 +1114,7 @@ def train_noise2noise(model, dataset, lr=0.001, optim='adagrad', batch_size=10, loss_val = eval_noise2noise(model, dataset_val, criteria , batch_size=batch_size , num_workers=num_workers - , use_cuda=use_cuda + , device=device ) yield epoch, loss_accum, loss_val else: @@ -1117,7 +1122,7 @@ def train_noise2noise(model, dataset, lr=0.001, optim='adagrad', batch_size=10, def eval_mask_denoise(model, dataset, criteria, p=0.01 # masking rate - , batch_size=10, use_cuda=False, num_workers=0): + , batch_size=10, device='cpu', num_workers=0): data_iterator = torch.utils.data.DataLoader(dataset, batch_size=batch_size , num_workers=num_workers) @@ -1125,6 +1130,8 @@ def eval_mask_denoise(model, dataset, criteria, p=0.01 # masking rate loss = 0 model.eval() +# if 'ipex' in dir(): +# model = ipex.optimize(model) with torch.no_grad(): for x in data_iterator: @@ -1132,10 +1139,9 @@ def eval_mask_denoise(model, dataset, criteria, p=0.01 # masking rate mask = (torch.rand(x.size()) < p) r = torch.randn(x.size()) - if use_cuda: - x = x.cuda() - mask = mask.cuda() - r = r.cuda() + x = x.to(device) + mask = mask.to(device) + r = r.to(device) # mask out x by replacing from N(0,1) x_ = mask.float()*r + (1-mask.float())*x @@ -1159,7 +1165,7 @@ def eval_mask_denoise(model, dataset, criteria, p=0.01 # masking rate def train_mask_denoise(model, dataset, p=0.01, lr=0.001, optim='adagrad', batch_size=10, num_epochs=100 , criteria=nn.MSELoss(), dataset_val=None - , use_cuda=False, num_workers=0, shuffle=True): + , device='cpu', num_workers=0, shuffle=True): gamma = None if criteria == 'L0': @@ -1184,6 +1190,8 @@ def train_mask_denoise(model, dataset, p=0.01, lr=0.001, optim='adagrad', batch_ for epoch in range(1, num_epochs+1): model.train() +# if 'ipex' in dir(): +# model, optim = ipex.optimize(model, optimizer=optim) n = 0 loss_accum = 0 @@ -1199,10 +1207,9 @@ def train_mask_denoise(model, dataset, p=0.01, lr=0.001, optim='adagrad', batch_ mask = (torch.rand(x.size()) < p) r = torch.randn(x.size()) - if use_cuda: - x = x.cuda() - mask = mask.cuda() - r = r.cuda() + x = x.to(device) + mask = mask.to(device) + r = r.to(device) # mask out x by replacing from N(0,1) x_ = mask.float()*r + (1-mask.float())*x @@ -1233,7 +1240,7 @@ def train_mask_denoise(model, dataset, p=0.01, lr=0.001, optim='adagrad', batch_ loss_val = eval_mask_denoise(model, dataset_val, criteria, p=p , batch_size=batch_size , num_workers=num_workers - , use_cuda=use_cuda + , device=device ) yield epoch, loss_accum, loss_val else: @@ -1266,18 +1273,16 @@ def lowpass(x, factor=1, dims=2): return f -def gaussian(x, sigma=1, scale=5, use_cuda=False, dims=2): +def gaussian(x, sigma=1, scale=5, device='cpu', dims=2): """ Apply Gaussian filter with sigma to image. Truncates the kernel at scale times sigma pixels """ f = GaussianDenoise(sigma, scale=scale, dims=dims) - if use_cuda: - f.cuda() + f.to(device) with torch.no_grad(): x = torch.from_numpy(x).unsqueeze(0).unsqueeze(0) - if use_cuda: - x = x.cuda() + x = x.to(device) y = f(x).squeeze().cpu().numpy() - return y \ No newline at end of file + return y diff --git a/topaz/cuda.py b/topaz/gpu.py similarity index 50% rename from topaz/cuda.py rename to topaz/gpu.py index b9cfd65..72aab2b 100644 --- a/topaz/cuda.py +++ b/topaz/gpu.py @@ -2,6 +2,10 @@ import warnings import torch +try: + import intel_extension_for_pytorch as ipex +except: + pass def _format(message, category, filename, lineno, line=None): w = '{}: {}\n'.format(category.__name__, message) @@ -9,16 +13,26 @@ def _format(message, category, filename, lineno, line=None): warnings.formatwarning = _format -class CudaWarning(UserWarning): +class GpuWarning(UserWarning): pass def set_device(device, error=False, warn=True): - use_cuda = False + use_device = 'cpu' if device >= 0: # try to set GPU when device >= 0 - use_cuda = torch.cuda.is_available() + if torch.cuda.is_available(): + import torch.cuda as acc + use_device = 'cuda' + elif hasattr(torch,'xpu'): + if torch.xpu.is_available(): + import torch.xpu as acc + use_device = 'xpu' + else: + import torch.cpu as acc + else: + import torch.cpu as acc try: - torch.cuda.set_device(device) + acc.set_device(device) except Exception as e: ## setting the device failed if error: @@ -26,7 +40,8 @@ def set_device(device, error=False, warn=True): if warn: # warn the user message = str(e) + '\nFalling back to CPU.' - warnings.warn(message, CudaWarning) + warnings.warn(message, GpuWarning) # fallback to CPU - use_cuda = False - return use_cuda + use_device = 'cpu' + return use_device + diff --git a/topaz/methods.py b/topaz/methods.py index 7e72c91..1f5432f 100644 --- a/topaz/methods.py +++ b/topaz/methods.py @@ -7,6 +7,10 @@ import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable +try: + import intel_extension_for_pytorch as ipex +except: + pass def autoencoder_loss(model, X): X = X.unsqueeze(1) @@ -123,8 +127,7 @@ def step(self, X, Y): ## KL of w from the binomial distribution with pi log_binom = scipy.stats.binom.logpmf(np.arange(0,N+1),N,self.pi) log_binom = torch.from_numpy(log_binom).float() - if q_var.is_cuda: - log_binom = log_binom.cuda() + log_binom = log_binom.to(q_var.device) log_binom = Variable(log_binom) ge_penalty = -torch.sum(log_binom*q_discrete) diff --git a/topaz/predict.py b/topaz/predict.py index 580209f..816bc5a 100644 --- a/topaz/predict.py +++ b/topaz/predict.py @@ -1,7 +1,10 @@ from __future__ import absolute_import, print_function, division import torch - +try: + import intel_extension_for_pytorch as ipex +except: + pass def batches(X, batch_size=1): batch = [] @@ -16,20 +19,19 @@ def batches(X, batch_size=1): yield batch -def score_stream(model, images, use_cuda=False, batch_size=1): +def score_stream(model, images, device='cpu', batch_size=1): with torch.no_grad(): for x in batches(images, batch_size=batch_size): x = x.unsqueeze(1) - if use_cuda: - x = x.cuda() + x = x.to(device) logits = model(x).squeeze(1).cpu().numpy() for i in range(len(logits)): yield logits[i] -def score(model, images, use_cuda=False, batch_size=1): +def score(model, images, device='cpu', batch_size=1): scores = [] - for y in score_stream(model, images, use_cuda=use_cuda, batch_size=batch_size): + for y in score_stream(model, images, device=device, batch_size=batch_size): scores.append(y) return scores diff --git a/topaz/stats.py b/topaz/stats.py index 6299adf..1c482f0 100644 --- a/topaz/stats.py +++ b/topaz/stats.py @@ -4,10 +4,13 @@ import scipy.stats import torch - +try: + import intel_extension_for_pytorch as ipex +except: + pass def normalize(x, alpha=900, beta=1, num_iters=100, sample=1 - , method='gmm', use_cuda=False, verbose=False): + , method='gmm', device='cpu', verbose=False): if method == 'affine': mu = x.mean() std = x.std() @@ -32,7 +35,7 @@ def normalize(x, alpha=900, beta=1, num_iters=100, sample=1 mu, std, pi, logp, mus, stds, pis, logps = norm_fit(x_sample, alpha=alpha, beta=beta , scale=scale - , num_iters=num_iters, use_cuda=use_cuda + , num_iters=num_iters, device=device , verbose=verbose) # normalize the data @@ -57,7 +60,7 @@ def normalize(x, alpha=900, beta=1, num_iters=100, sample=1 def norm_fit(x, alpha=900, beta=1, scale=1 - , num_iters=100, use_cuda=False, verbose=False): + , num_iters=100, device='cpu', verbose=False): # try multiple initializations of pi pis = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.98, 1]) @@ -68,8 +71,7 @@ def norm_fit(x, alpha=900, beta=1, scale=1 stds = np.zeros(len(pis)) x = torch.from_numpy(x) - if use_cuda: - x = x.cuda() + x = x.to(device) for i in range(len(pis)): pi = pis[i]