Feature/imcalc (#7)

* demostration of imcalc issue * remove final product * Suggested fix for imcalc issue: a new function that makes a non-crow copy of crow objects in an entire object tree. The resulting python object tree can be written via to_yaml without any !tags * Bug fix in JobRankSpec: raise AttributeError in __getattr__ instead of KeyError * Add an !icalc type which is just a shortcut for !Immediate [ !calc "..." ] * remove some debugging junk * add yaml representers for dict_eval and list_eval * use !icalc in the imcalc test * adjust worktools.py for resource tuning
NOAA-EMC · Oct 23, 2019 · c85dfa5 · c85dfa5
1 parent f55588a
commit c85dfa5
Show file tree

Hide file tree

Showing 13 changed files with 256 additions and 11 deletions.
diff --git a/crow/config/eval_tools.py b/crow/config/eval_tools.py
@@ -90,6 +90,10 @@ def _result(self,globals,locals):
             CALC_CACHE[self]=obj
         return eval(obj,c,locals)
 
+class stricalc(strcalc):
+    """Represents a string that should be run through eval()"""
+    def _is_immediate(self): return True
+
 class strref(str):
     """Represents a reference to a variable within some scope (ie. abc.def[32].ghi)"""
     def __repr__(self):
@@ -530,12 +534,13 @@ def invalidate_cache(obj,key=None,recurse=False):
 
 def evaluate_one(obj,key,val,memo):
     if hasattr(val,'_is_immediate'):
+        _ = obj[key]
         if memo is not None:
-            evaluate_immediates_impl(obj[key],memo)
-        else:
-            _ = obj[key]
-    elif not hasattr(val,'_result') and memo is not None:
-        evaluate_immediates_impl(obj[key],memo)
+            evaluate_immediates_impl(val,memo)
+    if memo is not None and (
+            hasattr(val,'_recurse_evaluate_immediates') or \
+            not hasattr(val,'_result') ):
+        evaluate_immediates_impl(val,memo)
 
 def evaluate_immediates_impl(obj,memo=None):
     if memo is not None:

diff --git a/crow/config/from_yaml.py b/crow/config/from_yaml.py
@@ -125,6 +125,7 @@ def constructor(loader,node):
 
 add_yaml_string(u'!expand',expand)
 add_yaml_string(u'!calc',calc)
+add_yaml_string(u'!icalc',icalc)
 add_yaml_string(u'!ref',ref)
 add_yaml_string(u'!error',user_error_message)
 add_yaml_string(u'!Depend',Depend)

diff --git a/crow/config/represent.py b/crow/config/represent.py
@@ -6,7 +6,7 @@
 from datetime import timedelta
 from copy import deepcopy
 from crow.config.exceptions import *
-from crow.config.eval_tools import list_eval, dict_eval, multidict, from_config, strcalc, strref
+from crow.config.eval_tools import list_eval, dict_eval, multidict, from_config, strcalc, strref, stricalc
 from crow.tools import to_timedelta, Clock
 from copy import copy
 import crow.sysenv
@@ -18,7 +18,7 @@
           'FirstMax', 'LastTrue', 'FirstTrue', 'GenericList',
           'GenericDict', 'GenericOrderedDict', 'ShellCommand',
           'Immediate', 'ClockMaker', 'JobResourceSpecMaker',
-          'MergeMapping', 'AppendSequence', 'Select' ]
+          'MergeMapping', 'AppendSequence', 'Select', 'icalc' ]
 
 ########################################################################
 
@@ -32,6 +32,7 @@ class Platform(dict_eval): pass
 class ShellCommand(dict_eval): pass
 
 class JobResourceSpecMaker(list_eval):
+    def _recurse_evaluate_immediates(self): return True
     def _result(self,globals,locals):
         rank_specs=list()
         for i in range(len(self)):
@@ -211,4 +212,5 @@ def _index(self,lst):
         return None
 
 class calc(strcalc): pass
+class icalc(stricalc): pass
 class ref(strref): pass
diff --git a/crow/config/to_yaml.py b/crow/config/to_yaml.py
@@ -46,6 +46,7 @@ def representer(dumper,data):
 add_yaml_list_eval(u'!MergeMapping',MergeMapping)
 add_yaml_list_eval(u'!AppendSequence',AppendSequence)
 add_yaml_list_eval(None,GenericList)
+add_yaml_list_eval(None,list_eval)
 
 ########################################################################
 
@@ -68,6 +69,7 @@ def representer(dumper,data):
     yaml.add_representer(cls,representer)
 
 add_yaml_dict_eval(None,GenericDict)
+add_yaml_dict_eval(None,dict_eval)
 add_yaml_dict_eval(u'!Platform',Platform)
 add_yaml_dict_eval(u'!Select',Select)
 add_yaml_dict_eval(u'!Action',Action)

diff --git a/crow/sysenv/jobs.py b/crow/sysenv/jobs.py
@@ -89,7 +89,10 @@ def new_with(self,*args,**kwargs):
         return JobRankSpec(**newspec)
 
     # Nicities
-    def __getattr__(self,key):    return self[key]
+    def __getattr__(self,key):
+        if key in self:
+            return self[key]
+        raise AttributeError(key)
 
     # Implement Mapping abstract methods:
     def __getitem__(self,key):    return self.__spec[key]

diff --git a/examples/imcalc/_common.yaml b/examples/imcalc/_common.yaml
@@ -0,0 +1,13 @@
+platform_common: &global_platform_common
+  Evaluate: False
+  default_resources: {}
+
+partition_common: &global_partition_common
+  Evaluate: False
+  resources: !MergeMapping
+    - !calc doc.default_resources
+    - !calc doc.case.get('resources',{})
+    - !calc doc.resources_sum
+
+  default_resources: {}
+
diff --git a/examples/imcalc/_sandbox.yaml b/examples/imcalc/_sandbox.yaml
@@ -0,0 +1,137 @@
+# This file configures the workflow to run on the cray parts of WCOSS
+
+platform: !Platform
+  <<: *global_platform_common
+
+  # Evaluate: this must be "false" to ensure disk space availability logic
+  # is not run unless this file is for the current platform.
+  Evaluate: false
+
+  # name: the name of this platform; this must match what the underlying
+  # scripts expect.
+  name: sandbox
+
+  # detect: this is a function that returns true iff the user is on GAEA
+  # and false otherwise
+  detect: True
+
+  # skip_if_others_present: if this is true, and at least one other
+  # platform is detected with this flag set to false, then skip this
+  # platform
+  skip_if_others_present: true
+
+  # public_release_ics: location of input conditions that have been 
+  # prepared for the public release.
+  public_release_ics: /gpfs/hps3/emc/global/noscrub/emc.glopara/FV3GFS_V1_RELEASE/ICs
+
+  # CHGRP_RSTPROD_COMMAND - this specifies the command to use to
+  # restrict access to NOAA "rstprod" data restriction class.
+  # This only used for observation processing, data assimilation, and
+  # data assimilation archiving, which are not in the public release.
+  CHGRP_RSTPROD_COMMAND: "chgrp rstprod"
+
+  # NWPROD - location of the NCEP operational "nwprod" directory, which
+  # only has meaning on the NCEP WCOSS machines.  It is used to get
+  # the paths to certain programs and scripts.
+  NWPROD: "/gpfs/hps/nco/ops/nwprod"
+
+  # DMPDIR - location of the global dump data.  This is used by the observation
+  # processing scripts, which are not included in the public release.
+  DMPDIR: !calc doc.user_places.PROJECT_DIR
+  #"/Users/jiankuang/Documents/Eclipse_workspace/ecfutils_007"
+  #DMPDIR: !FirstTrue
+  #  - do: "/gpfs/gp1/emc/globaldump"
+  #    when: !calc tools.isdir(do)
+  #  - do: "/gpfs/tp1/emc/globaldump"
+  #    when: !calc tools.isdir(do)
+  #  - otherwise: !error "Cannot find globaldump directory."
+
+  # RTMFIX - location of the CRTM fixed data files used by the GSI data
+  # assimilation.  The data assimilation is not included in this public release
+  # so this path is unused.
+  RTMFIX: "$CRTM_FIX"
+
+  # BASE_SVN - a directory maintained by emc global model developers
+  # that contains recent versions of source code and executables for
+  # various subversion repositories.  This is used on some platforms
+  # to find executables for this workflow.
+  BASE_SVN: "/gpfs/hps3/emc/global/noscrub/emc.glopara/svn"
+
+  # BASE_GIT - a directory maintained by emc global model developers
+  # that contains recent versions of source code and executables for
+  # various git repositories. This is used on some platforms to find
+  # executables for this workflow.
+  BASE_GIT: "/gpfs/hps3/emc/global/noscrub/emc.glopara/git"
+
+  # config_base_extras - Additional configuration data to put in the
+  # config.base file
+  config_base_extras: "sandbox"
+  #config_base_extras: |
+  #  if [ -d /gpfs/tp1 ]; then
+  #      export SITE="LUNA"
+  #  elif [ -d /gpfs/gp1 ]; then
+  #      export SITE="SURGE"
+  #  fi
+
+  metasched_more: {}
+
+  partitions:
+    Evaluate: false
+    default_exclusive: !calc doc.platform.partitions.sandbox
+    default_service: !calc doc.platform.partitions.sandbox
+    default_shared: !calc doc.platform.partitions.sandbox
+    default_bigmem: !calc doc.platform.partitions.sandbox
+    sandbox:
+      <<: *global_partition_common
+
+      # specification - string to specify to the batch system to
+      # request this partition.  Not relevant for WCOSS Cray
+      specification: null
+
+      # shared_accounting_ref - accounting settings for shared jobs
+      shared_accounting_ref:
+        project: !calc metasched.varref(doc.accounting.cpu_project)
+
+      # service_accounting_ref - accounting settings for service jobs (jobs
+      # that require tape or network access)
+      service_accounting_ref:
+        project: !calc metasched.varref(doc.accounting.cpu_project)
+
+      # exclusive_accounting_ref - accounting settings for jobs that require
+      # exclusive access to a node.
+      exclusive_accounting_ref:
+        project: !calc metasched.varref(doc.accounting.cpu_project)
+
+      bigmem_accounting_ref:
+        project: !calc metasched.varref(doc.accounting.cpu_project)
+
+      # Queues to use for each job type.  This logic automatically
+      # switches between development queues on the backup machine and
+      # development queues on the production machine based on whether the
+      # /gpfs/hps2/ptmp is writable.
+      shared_queue: dev
+      service_queue: dev
+      exclusive_queue: dev
+      bigmem_queue: bigmem
+
+      # Details about the scheduler on this cluster.
+      scheduler_settings:
+        scheduler_name: LSFAlps
+        parallelism_name: LSFAlps
+        node_type: generic
+        physical_cores_per_node: 24
+        logical_cpus_per_core: 2
+        hyperthreading_allowed: true
+        indent_text: "  "
+        memory_per_node: !calc (64*1024)
+
+      scheduler: !calc |
+        tools.get_scheduler(scheduler_settings.scheduler_name, scheduler_settings)
+      parallelism: !calc |
+        tools.get_parallelism(scheduler_settings.parallelism_name, scheduler_settings)
+      nodes: !calc |
+        tools.node_tool_for(scheduler_settings.node_type, scheduler_settings)
+
+  long_term_temp: !calc doc.user_places.PROJECT_DIR 
+  short_term_temp: !calc doc.user_places.PROJECT_DIR 
+  EXP_PARENT_DIR: !calc doc.user_places.PROJECT_DIR
diff --git a/examples/imcalc/case.yaml b/examples/imcalc/case.yaml
@@ -0,0 +1,7 @@
+case:
+  resources:
+    run_job1: !JobRequest
+      - mpi_ranks: 7
+        max_ppn: 7
+        OMP_NUM_THREADS: 4
+        walltime: !timedelta '01:15:00'
diff --git a/examples/imcalc/default_resources.yaml b/examples/imcalc/default_resources.yaml
@@ -0,0 +1,25 @@
+# Sample default resource
+
+sample_resource_table:
+      #              ranks   ppn wallclock            threads MB
+  job1:       [ 200,   20, !timedelta "01:00:00", 12,  "3072" ]
+  job2:       [  12,   12, !timedelta "01:00:00", 1,   "3072"  ]
+
+default_resources: &default_resources
+
+  run_nothing: !JobRequest # Special placeholder for "do nothing"
+    - memory: "300M"
+      mpi_ranks: 1
+      walltime: !timedelta "00:02:00"
+
+  run_job1: !JobRequest
+    - batch_memory: !icalc doc.sample_resource_table.job1[4]
+      mpi_ranks: !icalc doc.sample_resource_table.job1[0]
+      walltime: !icalc doc.sample_resource_table.job1[2]
+      max_ppn: !icalc doc.sample_resource_table.job1[1]
+
+  run_job2: !JobRequest
+    - batch_memory: !icalc doc.sample_resource_table.job2[4]
+      mpi_ranks: !icalc doc.sample_resource_table.job2[0]
+      walltime: !icalc doc.sample_resource_table.job2[2]
+      max_ppn: !icalc doc.sample_resource_table.job2[1]
diff --git a/examples/imcalc/imcalc.py b/examples/imcalc/imcalc.py
@@ -0,0 +1,31 @@
+#! /usr/bin/env python3.6
+
+## Unit test program for crow.config module
+
+import sys, os, shutil, collections, copy
+
+sys.path.append(os.getcwd() + '/../../')
+
+import logging
+from datetime import timedelta
+from crow.config import from_dir, Suite, from_file, to_yaml, evaluate_immediates, from_string
+import crow.config.represent
+
+from crow.config.eval_tools import list_eval, dict_eval
+from crow.sysenv import JobResourceSpec
+
+platdoc=from_file('_common.yaml','_sandbox.yaml')
+platdoc.platform.Evaluate=True
+evaluate_immediates(platdoc.platform)
+
+shutil.copyfile('resources_sum_sample.yaml','resources_sum.yaml')
+doc=from_file('_common.yaml','_sandbox.yaml','case.yaml','default_resources.yaml','resources_sum.yaml')
+
+filename = 'resources_sum.yaml'
+
+doc.writeme = { 'resources_sum': doc.partition_common.resources }
+content = to_yaml({ 'resources_sum': doc.partition_common.resources })
+with open(filename,'wt') as fd:
+     fd.write(content)
+
+logging.basicConfig(stream=sys.stderr,level=logging.DEBUG)
diff --git a/examples/imcalc/readme.txt b/examples/imcalc/readme.txt
@@ -0,0 +1,15 @@
+This example is the problem statement that I am experiencing:
+
+I am trying to set up some kind of central place where all 
+information of resource settings (case, user, platform, default)
+being merged together. However, as shown in execution, the
+variables of resource_sum is still "!calc" string instead of
+actual values
+
+In this example, the dump of resource_sum comes from default and case. 
+run_job1 is overriden by case settings and with clear 
+final value, while run_job2 is still with the original
+!calc string.
+
+It will be much better if final values could be parsed into all
+the fields of resource_sum
diff --git a/examples/imcalc/resources_sum_sample.yaml b/examples/imcalc/resources_sum_sample.yaml
@@ -0,0 +1,3 @@
+# place holder for resources panel
+
+resources_sum:
diff --git a/utils/worktools.py b/utils/worktools.py
@@ -673,10 +673,9 @@ def setup_case(command_line_arguments):
         logger.warning('Using manual mode \n ')
         YAML_FILES_TO_COPY={ '../_expdir_main_manual.yaml': '_main.yaml',
                              '../top.yaml':'top.yaml',
-                             '../resources_sum.yaml':'resources_sum.yaml',
                              '../schema/task.yaml':'schema.yaml',
                              '../schema/varnames.yaml':'varnames.yaml',
-                             '../defaults/resources.yaml':'resources.yaml',
+                             '../defaults/default_resources.yaml':'default_resources.yaml',
                              '../defaults/settings.yaml':'settings.yaml',
                              '../defaults/places.yaml':'places.yaml',
                              '../config/base.yaml':'base.yaml',
@@ -685,7 +684,6 @@ def setup_case(command_line_arguments):
 
     else:
         YAML_FILES_TO_COPY={ '../_expdir_main_auto.yaml': '_main.yaml',
-                     '../resources_sum.yaml':'resources_sum.yaml',
                      '../user.yaml': 'user.yaml' }
         YAML_DIRS_TO_COPY={ '../schema':'schema',
                     '../defaults':'defaults',
@@ -739,6 +737,9 @@ def setup_case(command_line_arguments):
         os.path.abspath('../'),YAML_DIRS_TO_COPY,YAML_FILES_TO_COPY,case_name,experiment_name,platdoc,force,
         skip_comrot,force_platform_rewrite)
 
+    if '-f' not in options:
+        shutil.copy('../resources_sum.yaml',EXPDIR+'/resources_sum.yaml')
+
     doc=from_dir(EXPDIR,validation_stage='setup')
     suite=Suite(doc.suite)
     logger.info('creating a dummy workflow in memory...')
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# place holder for resources panel

		resources_sum: