Skip to content

Commit

Permalink
Bugfixes and update parallization
Browse files Browse the repository at this point in the history
* Modify parallelization to use Horace parallel_config
* Add a python patch routine and store diffs wrt 3.6.2
* Add parallelization documentation
* Fix input pars to `disp2sqw_plot` bug #9
* Add auto conv to double of small np.ndarrays #11
* Fix spyder deletion of imports bug #12
* Fix getdoc error string in Spyder/IPython help #12
  • Loading branch information
mducle committed Mar 20, 2022
1 parent 5044949 commit bd4bdf9
Show file tree
Hide file tree
Showing 9 changed files with 351 additions and 8 deletions.
58 changes: 58 additions & 0 deletions cmake/patch_horace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import sys, os, re

FUZZ = 10

def patch_files(diff_file, base_dir):
# Patches files in `base_dir` with unified diff in `diff_file`
with open(diff_file, 'r') as f:
df = f.read()
for diffs in df.split('diff'):
if '@@' in diffs:
filename = os.path.join(base_dir, diffs.split('+++ b/')[1].split('\n')[0])
with open(filename, 'r') as f:
t0 = f.read()
t0 = t0.split('\n')
for p in diffs.split('\n@@')[1:]:
ll = p.split('\n')
m = re.match(r'-(\d+),?(\d*) \+(\d+),?(\d*) @@', ll[0].strip())
if not m:
raise ValueError(f'Invalid diff hunk header {ll[0]}')
l0 = int(m.group(3))
# Search for start of hunk in original file
found = False
for ii in range(l0 - FUZZ, l0 + FUZZ):
if t0[ii] == ll[1][1:] and t0[ii+1] == ll[2][1:] and t0[ii+2] == ll[3][1:]:
found = True
break
if not found:
raise ValueError('Invalid Hunk:\n@@ %s' % ("\n".join(ll)))
l1 = ii
if t0[l1] != ll[1][1:]:
raise ValueError('File %s\nHunk failed: @@ %s' % (filename, "\n".join(ll)))
for ii in range(1, len(ll)):
print(f'{ii, l1}: {ll[ii]}')
if 'No newline at end of file' in ll[ii]:
continue
if len(ll[ii]) < 1:
l1 += 1
elif ll[ii][0] == '-':
if t0[l1] == ll[ii][1:]:
t0.pop(l1)
else:
raise ValueError('Unmatched hunk: %s\n%s' % (t0[l1], ll[ii]))
elif ll[ii][0] == '+':
t0.insert(l1, ll[ii][1:])
l1 += 1
else:
if t0[l1] != ll[ii][1:]:
raise ValueError('Unmatched hunk: %s\n%s' % (t0[l1], ll[ii]))
l1 += 1
with open(filename, 'w') as f:
f.write('\n'.join(t0))


if __name__ == '__main__':
# Recursively applies all diffs in a given folder w.r.t. an input base dir
for root, _, files in os.walk(sys.argv[1]):
for fl in [f for f in files if f.endswith('.diff')]:
patch_files(os.path.join(root, fl), sys.argv[2])
28 changes: 28 additions & 0 deletions docs/user_docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,34 @@ we recommend that you install ``pace_neutrons`` in a Python virtual environment
or `conda <https://docs.conda.io>`__.


Parallization
-------------

You can activate the parallization framework using:

.. code-block:: python
from pace_neutrons import Matlab
m = Matlab()
m.hpc('on')
or deactivate it with :code:`m.hpc('off')`. You can select different types of parallelisation using:

.. code-block:: python
m.hpc_config().parallel_cluster = 'parpool'
(The compiled matlab code includes the parallelisation toolbox so :code:`parpool` will work.)

Other options are :code:`'herbert'` (file-based communications) or :code:`'mpiexec_mpi'`.
The last options needs an MPI installation, and a properly configured firewall.
On Windows you can use `MSMPI <https://docs.microsoft.com/en-us/message-passing-interface/microsoft-mpi>`__.
Horace does bundle a version of ``mpiexec`` for Windows but it may be blocked by the firewall in some
cases where the official Microsoft version is not blocked.
(The Matlab version of Horace also has an addition option :code:`'slurm_mpi'` but this only works
on certain versions of the IDAaaS system.)


IDAaaS Installation
-------------------

Expand Down
7 changes: 6 additions & 1 deletion pace_neutrons/DataTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from .TypeWrappers import as_matlab, as_numpy
from .FunctionWrapper import pymatpy

NPY_INT = [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64, np.bool_]

class DataTypes:

def __init__(self, interface, pyMatlab):
Expand Down Expand Up @@ -41,7 +43,10 @@ def encode(self, data):
data = self._unwrap(data)
# If the list is not one of numbers leave it for Matlab to convert to a cell array
elif isinstance(data, np.ndarray):
data = as_matlab(data)
if data.size < 1000 and data.dtype in NPY_INT:
data = self.matlab.double(list(data))
else:
data = as_matlab(data)
elif isinstance(data, Number):
# Case 4)
data = float(data)
Expand Down
41 changes: 38 additions & 3 deletions pace_neutrons/Matlab.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os, sys
import platform
import shutil
from .funcinspect import lhs_info
from pace_neutrons_cli.utils import get_runtime_version, checkPath

Expand All @@ -23,6 +24,21 @@
except ImportError:
pass


def _disable_umr():
# Mark Matlab libraries so Spyder does not reload (*delete*) them
if 'spydercustomize' not in sys.modules:
return
ll = '_internal,_internal.mlarray_utils,_internal.mlarray_sequence,mlarray,' \
'mlexceptions,matlab,matlab_pysdk,matlab_pysdk.runtime.errorhandler,' \
'matlab_pysdk.runtime.futureresult,matlab_pysdk.runtime.deployablefunc,' \
'matlab_pysdk.runtime.deployableworkspace,matlab_pysdk.runtime,' \
'matlab_pysdk.runtime.deployablepackage,matlabruntimeforpython3_8'

if 'matlab_pysdk' not in sys.modules['spydercustomize'].__umr__.namelist:
sys.modules['spydercustomize'].__umr__.namelist += ll.split(',')


# Store the Matlab engine as a module global wrapped inside a class
# When the global ref is deleted (e.g. when Python exits) the __del__ method is called
# Which then gracefully shutsdown Matlab, else we get a segfault.
Expand All @@ -37,6 +53,21 @@ def __init__(self, runtime_version, mlPath):
self._interface = pace.initialize()
print('Interface opened')
self._interface.call('pyhorace_init', [], nargout=0)
if 'SPYDER_ARGS' in os.environ:
_disable_umr()
# Sets the parallel worker to the compiled worker if it exists
if 'worker' not in sys.argv[0]:
is_windows = platform.system() == 'Windows'
worker_path = os.path.join(os.path.dirname(sys.argv[0]), 'worker_v2')
if is_windows:
worker_path = worker_path + '.exe'
if not os.path.exists(worker_path):
# Tries to search for it on the path
worker_path = shutil.which('worker_v2')
if worker_path:
pc = self._interface.call('parallel_config', [], nargout=1)
access = self._interface.call('substruct', ['.', 'worker'])
self._interface.call('subsasgn', [pc, access, worker_path])

def __del__(self):
if self._interface:
Expand Down Expand Up @@ -74,6 +105,10 @@ def __call__(self, *args, **kwargs):
results = self._interface.call_method(self._name, [], m_args, nargout=nargout)
return self._converter.decode(results)

def getdoc(self):
# To avoid error message printing in Matlab
raise NotImplementedError


class Matlab(object):
def __init__(self, mlPath=None):
Expand All @@ -84,7 +119,7 @@ def __init__(self, mlPath=None):
:param mlPath: Path to the SDK i.e. '/MATLAB/MATLAB_Runtime/v96' or to the location where matlab is installed
(MATLAB root directory)
"""

self.interface = None
self.pyMatlab = None
self.converter = None
Expand All @@ -97,7 +132,7 @@ def initialize(self, mlPath=None):
:return: None. obj has been filled with initialization pars.
"""
global _global_matlab_ref
if _global_matlab_ref is None:
if _global_matlab_ref is None:
_global_matlab_ref = _MatlabInstance(get_runtime_version(), mlPath)
self.interface = _global_matlab_ref
import matlab as pyMatlab
Expand Down Expand Up @@ -133,7 +168,7 @@ def type(self, obj):

def register_ipython_magics():
try:
import IPython
import IPython
except ImportError:
return None
else:
Expand Down
3 changes: 0 additions & 3 deletions pace_neutrons_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,6 @@ def main(args=None):
if args.install_mcr:
from pace_neutrons_cli.utils import install_MCR
install_MCR(interactive=False)
# Sets the environment variable for the parallelisation worker
worker_path = os.path.join(os.path.dirname(sys.argv[0]), 'worker_v2')
os.environ['PACE_WORKER'] = worker_path + '.exe' if is_windows else worker_path
# Need to set the Qt library folder first if we're using Spyder,
# or get conflict with bundled Matlab libraries on Linux
force_reload = False
Expand Down
4 changes: 3 additions & 1 deletion pace_neutrons_cli/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,11 @@ def _parse_control_string(cs):
if isinstance(cs, list):
cs = ' '.join(cs)
if "('" in cs and "')" in cs:
match = re.match("[\w\d]*\('([\w\d\-]*)'\).*", cs)
match = re.match("[\w\d\.\\\/:]*\('([\w\d\-]*)'\).*", cs)
if match:
return match.group(1)
if '/' in cs or '\\' in cs:
cs = ''
return cs

def main(args=None):
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ endif()

add_custom_command(
TARGET compile_ctf PRE_BUILD
COMMAND ${PYTHON_EXECUTABLE} "${CMAKE_MODULE_PATH}/patch_horace.py" "${CMAKE_CURRENT_SOURCE_DIR}/diffs" "${CMAKE_CURRENT_BINARY_DIR}/CTF"
COMMAND ${Matlab_MCC_COMPILER} -W python:pace -d "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pace_neutrons" -v
-a "${CMAKE_CURRENT_BINARY_DIR}/CTF"
-a "${CMAKE_CURRENT_SOURCE_DIR}/matlab_overrides"
Expand Down
12 changes: 12 additions & 0 deletions src/diffs/disp2sqw.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
diff --git a/HORACE-build/Horace/disp2sqw_plot.m b/HORACE-build/Horace/disp2sqw_plot.m
index 43af671..12faa0e 100644
--- a/Horace/disp2sqw_plot.m
+++ b/Horace/disp2sqw_plot.m
@@ -119,6 +119,7 @@ else
end

pars=args{noff+3};
+if ~iscell(pars), pars = {pars}; end

if isnumeric(args{noff+4}) && numel(args{noff+4})==3
ecent=args{noff+4};
Loading

0 comments on commit bd4bdf9

Please sign in to comment.