tbepler · do-jason · Feb 20, 2024
diff --git a/topaz/commands/denoise.py b/topaz/commands/denoise.py
@@ -16,7 +16,11 @@
 from topaz.utils.data.loader import load_image
 from topaz.utils.image import downsample
 import topaz.mrc as mrc
-import topaz.cuda
+import topaz.gpu
+try:
+    import intel_extension_for_pytorch as ipex
+except:
+    pass
 
 name = 'denoise'
 help = 'denoise micrographs with various denoising algorithms'
@@ -265,13 +269,12 @@ def make_hdf5_datasets(path, paired=True, preload=False, holdout=0.1, cutoff=0):
 
 def denoise_image(mic, models, lowpass=1, cutoff=0, gaus=None, inv_gaus=None, deconvolve=False
                  , deconv_patch=1, patch_size=-1, padding=0, normalize=False
-                 , use_cuda=False):
+                 , device='cpu'):
     if lowpass > 1:
         mic = dn.lowpass(mic, lowpass)
 
     mic = torch.from_numpy(mic)
-    if use_cuda:
-        mic = mic.cuda()
+    mic = mic.to(device)
 
     # normalize and remove outliers
     mu = mic.mean()
@@ -316,8 +319,8 @@ def main(args):
     set_num_threads(num_threads)
 
     ## set the device
-    use_cuda = topaz.cuda.set_device(args.device)
-    print('# using device={} with cuda={}'.format(args.device, use_cuda), file=sys.stderr)
+    device = topaz.gpu.set_device(args.device)
+    print('# Using device={} with GPU={}'.format(args.device, device), file=sys.stderr)
 
     cutoff = args.pixel_cutoff # pixel truncation limit
 
@@ -393,8 +396,7 @@ def main(args):
         else:
             raise Exception('Unknown architecture: ' + arch)
 
-        if use_cuda:
-            model = model.cuda()
+        model = model.to(device)
 
         # train
         optim = args.optim
@@ -417,7 +419,7 @@ def main(args):
                                            , criteria=criteria
                                            , num_epochs=num_epochs
                                            , dataset_val=dataset_val
-                                           , use_cuda=use_cuda
+                                           , device=device
                                            , num_workers=num_workers
                                            , shuffle=shuffle
                                            )
@@ -428,7 +430,7 @@ def main(args):
                                             , criteria=criteria
                                             , num_epochs=num_epochs
                                             , dataset_val=dataset_val
-                                            , use_cuda=use_cuda
+                                            , device=device
                                             , num_workers=num_workers
                                             , shuffle=shuffle
                                             )
@@ -446,8 +448,7 @@ def main(args):
                 model.cpu()
                 model.eval()
                 torch.save(model, path)
-                if use_cuda:
-                    model.cuda()
+                model.to(device)
 
         models = [model]
 
@@ -461,8 +462,7 @@ def main(args):
             model = dn.load_model(arg)
 
             model.eval()
-            if use_cuda:
-                model.cuda()
+            model.to(device)
 
             models.append(model)
 
@@ -481,15 +481,13 @@ def main(args):
     gaus = args.gaussian
     if gaus > 0:
         gaus = dn.GaussianDenoise(gaus)
-        if use_cuda:
-            gaus.cuda()
+        gaus.to(device)
     else:
         gaus = None
     inv_gaus = args.inv_gaussian
     if inv_gaus > 0:
         inv_gaus = dn.InvGaussianFilter(inv_gaus)
-        if use_cuda:
-            inv_gaus.cuda()
+        inv_gaus.to(device)
     else:
         inv_gaus = None
     deconvolve = args.deconvolve
@@ -516,7 +514,7 @@ def main(args):
                                , inv_gaus=inv_gaus, deconvolve=deconvolve
                                , deconv_patch=deconv_patch
                                , patch_size=ps, padding=padding, normalize=normalize
-                               , use_cuda=use_cuda
+                               , device=device
                                )
             denoised[i] = mic
 
@@ -538,7 +536,7 @@ def main(args):
             return
 
         # make the output directory if it doesn't exist
-        if not os.path.exists(args.output):
+        if args.output and (not os.path.exists(args.output)):
             os.makedirs(args.output)
 
         for path in args.micrographs:
@@ -550,7 +548,7 @@ def main(args):
                                , inv_gaus=inv_gaus, deconvolve=deconvolve
                                , deconv_patch=deconv_patch
                                , patch_size=ps, padding=padding, normalize=normalize
-                               , use_cuda=use_cuda
+                               , device=device
                                )
 
             # write the micrograph

diff --git a/topaz/commands/denoise3d.py b/topaz/commands/denoise3d.py
@@ -19,7 +19,11 @@
 from topaz.utils.data.loader import load_image
 from topaz.utils.image import downsample
 import topaz.mrc as mrc
-import topaz.cuda
+import topaz.gpu
+try:
+    import intel_extension_for_pytorch as ipex
+except:
+    pass
 
 from topaz.denoise import UDenoiseNet3D
 from topaz.filters import GaussianDenoise
@@ -73,19 +77,20 @@ def add_arguments(parser=None):
 
     return parser
 
-def train_epoch(iterator, model, cost_func, optim, epoch=1, num_epochs=1, N=1, use_cuda=False):
+def train_epoch(iterator, model, cost_func, optim, epoch=1, num_epochs=1, N=1, device='cpu'):
 
     c = 0
     loss_accum = 0    
     model.train()
+#    if 'ipex' in dir():
+#        model, optim = ipex.optimize(model, optimizer=optim)
 
     for batch_idx , (source,target), in enumerate(iterator):
 
         b = source.size(0)        
         loss_mb = 0
-        if use_cuda:
-            source = source.cuda()
-            target = target.cuda()
+        source = source.to(device)
+        target = target.to(device)
 
         denoised_source = model(source)
         loss = cost_func(denoised_source,target)
@@ -108,7 +113,7 @@ def train_epoch(iterator, model, cost_func, optim, epoch=1, num_epochs=1, N=1, u
     return loss_accum
 
 
-def eval_model(iterator, model, cost_func, epoch=1, num_epochs=1, N=1, use_cuda=False):
+def eval_model(iterator, model, cost_func, epoch=1, num_epochs=1, N=1, device='cpu'):
 
     c = 0
     loss_accum = 0
@@ -119,9 +124,8 @@ def eval_model(iterator, model, cost_func, epoch=1, num_epochs=1, N=1, use_cuda=
 
             b = source.size(0)        
             loss_mb = 0
-            if use_cuda:
-                source = source.cuda()
-                target = target.cuda()
+            source = source.to(device)
+            target = target.to(device)
 
             denoised_source = model(source)
             loss = cost_func(denoised_source,target)
@@ -408,7 +412,7 @@ def train_model(even_path, odd_path, save_prefix, save_interval, device
     # initialize the model
     print('# initializing model...', file=log)
     model_base = UDenoiseNet3D(base_width=base_kernel_width)
-    model,use_cuda,num_devices = set_device(model_base, device)
+    model,use_device,num_devices = set_device(model_base, device)
 
     if cost_func == 'L2':
         cost_func = nn.MSELoss()
@@ -469,7 +473,7 @@ def train_model(even_path, odd_path, save_prefix, save_interval, device
                                        epoch=epoch,
                                        num_epochs=num_epochs,
                                        N=N_train,
-                                       use_cuda=use_cuda)
+                                       device=use_device)
 
         line = '\t'.join([str(epoch+1), 'train', str(epoch_loss_accum)])
         print(line, file=output)
@@ -482,7 +486,7 @@ def train_model(even_path, odd_path, save_prefix, save_interval, device
                                    epoch=epoch,
                                    num_epochs=num_epochs,
                                    N=N_test,
-                                   use_cuda=use_cuda)
+                                   device=use_device)
 
         line = '\t'.join([str(epoch+1), 'test', str(epoch_loss_accum)])
         print(line, file=output)
@@ -491,8 +495,7 @@ def train_model(even_path, odd_path, save_prefix, save_interval, device
         if save_prefix is not None and (epoch+1)%save_interval == 0:
             model.eval().cpu()
             save_model(model, epoch+1, save_prefix, digits=digits)
-            if use_cuda:
-                model.cuda()
+            model.to(use_device)
 
     print('# training completed!', file=log)
 
@@ -557,17 +560,29 @@ def load_model(path, base_kernel_width=11):
 def set_device(model, device, log=sys.stderr):
     # set the device or devices
     d = device
-    use_cuda = (d != -1) and torch.cuda.is_available()
+    use_device = 'cpu'
+    if d != -1:
+        if torch.cuda.is_available():
+            import torch.cuda as acc
+            use_device = 'cuda'
+        elif hasattr(torch,'xpu'):
+            if torch.xpu.is_available():
+                import torch.xpu as acc
+                use_device = 'xpu'
+            else:
+                import torch.cpu as acc
+        else:
+            import torch.cpu as acc
     num_devices = 1
-    if use_cuda:
-        device_count = torch.cuda.device_count()
+    if use_device != 'cpu':
+        device_count = acc.device_count()
         try:
             if d >= 0:
                 assert d < device_count
-                torch.cuda.set_device(d)
-                print('# using CUDA device:', d, file=log)
+                acc.set_device(d)
+                print('# using GPU device:', d, file=log)
             elif d == -2:
-                print('# using all available CUDA devices:', device_count, file=log)
+                print('# using all available GPU devices:', device_count, file=log)
                 num_devices = device_count
                 model = nn.DataParallel(model)
             else:
@@ -579,10 +594,9 @@ def set_device(model, device, log=sys.stderr):
             print('ERROR: Something went wrong with setting the compute device', file=log)
             sys.exit(2)
 
-    if use_cuda:
-        model.cuda()
+        model.to(use_device)
 
-    return model, use_cuda, num_devices
+    return model, use_device, num_devices
 
 
 class PatchDataset:
@@ -756,7 +770,9 @@ def main(args):
             model = nn.Sequential(model, GaussianDenoise(gaussian_sigma, dims=3))
         model.eval()
 
-        model, use_cuda, num_devices = set_device(model, args.device)
+        model, use_device, num_devices = set_device(model, args.device)
+#        if 'ipex' in dir():
+#            model = ipex.optimize(model)
 
         #batch_size = args.batch_size
         #batch_size *= num_devices
@@ -783,4 +799,4 @@ def main(args):
 if __name__ == '__main__':
     parser = add_arguments()
     args = parser.parse_args()
-    main(args)
+    main(args)
diff --git a/topaz/commands/extract.py b/topaz/commands/extract.py
@@ -18,7 +18,11 @@
 from topaz.algorithms import non_maximum_suppression, match_coordinates
 from topaz.metrics import average_precision
 import topaz.predict
-import topaz.cuda
+import topaz.gpu
+try:
+    import intel_extension_for_pytorch as ipex
+except:
+    pass
 
 name = 'extract'
 help = 'extract particles from segmented images or segment and extract in one step with a trained classifier'
@@ -187,15 +191,16 @@ def stream_images(paths):
 def score_images(model, paths, device=-1, batch_size=1):
     if model is not None and model != 'none': # score each image with the model
         ## set the device
-        use_cuda = topaz.cuda.set_device(device)
+        device = topaz.gpu.set_device(device)
         ## load the model
         from topaz.model.factory import load_model
         model = load_model(model)
         model.eval()
         model.fill()
-        if use_cuda:
-            model.cuda()
-        scores = topaz.predict.score_stream(model, stream_images(paths), use_cuda=use_cuda
+        model.to(device)
+#        if 'ipex' in dir():
+#            model = ipex.optimize(model)
+        scores = topaz.predict.score_stream(model, stream_images(paths), device=device
                                            , batch_size=batch_size)
     else: # load scores directly
         scores = stream_images(paths)
@@ -311,4 +316,4 @@ def main(args):
 if __name__ == '__main__':
     parser = add_arguments()
     args = parser.parse_args()
-    main(args)
+    main(args)
diff --git a/topaz/commands/normalize.py b/topaz/commands/normalize.py
@@ -12,7 +12,11 @@
 from topaz.stats import normalize
 from topaz.utils.data.loader import load_image
 from topaz.utils.image import downsample, save_image
-import topaz.cuda
+import topaz.gpu
+try:
+    import intel_extension_for_pytorch as ipex
+except:
+    pass
 
 name = 'normalize'
 help = 'normalize a set of images using the 2-component Gaussian mixture model'
@@ -48,7 +52,7 @@ def add_arguments(parser=None):
 
 class Normalize:
     def __init__(self, dest, scale, affine, num_iters, alpha, beta
-                , sample, metadata, formats, use_cuda):
+                , sample, metadata, formats, device):
         self.dest = dest
         self.scale = scale
         self.affine = affine
@@ -58,7 +62,7 @@ def __init__(self, dest, scale, affine, num_iters, alpha, beta
         self.sample = sample
         self.metadata = metadata
         self.formats = formats
-        self.use_cuda = use_cuda
+        self.device = device
 
     def __call__(self, path):
         # load the image
@@ -72,7 +76,7 @@ def __call__(self, path):
         if self.affine:
             method = 'affine'
         x,metadata = normalize(x, alpha=self.alpha, beta=self.beta, num_iters=self.num_iters
-                              , method=method, sample=self.sample, use_cuda=self.use_cuda)
+                              , method=method, sample=self.sample, device=self.device)
 
         # save the image and the metadata
         name,_ = os.path.splitext(os.path.basename(path))
@@ -116,17 +120,17 @@ def main(args):
     from topaz.torch import set_num_threads
     set_num_threads(num_threads)
 
-    # set CUDA device
-    use_cuda = topaz.cuda.set_device(args.device)
-    if use_cuda:
+    # set GPU device
+    device = topaz.gpu.set_device(args.device)
+    if device != 'cpu':
         # when using GPU, turn off multiple processes
         num_workers = 0
 
     if not os.path.exists(dest):
         os.makedirs(dest)
 
     process = Normalize(dest, scale, affine, num_iters, alpha, beta
-                       , sample, metadata, formats, use_cuda)
+                       , sample, metadata, formats, device)
 
     if num_workers > 1:
         pool = mp.Pool(num_workers)