Skip to content

Commit

Permalink
Feature/imcalc (#7)
Browse files Browse the repository at this point in the history
* demostration of imcalc issue

* remove final product

* Suggested fix for imcalc issue: a new function that makes a non-crow copy of crow objects in an entire object tree.  The resulting python object tree can be written via to_yaml without any !tags

* Bug fix in JobRankSpec: raise AttributeError in __getattr__ instead of KeyError

* Add an !icalc type which is just a shortcut for !Immediate [ !calc "..." ]

* remove some debugging junk

* add yaml representers for dict_eval and list_eval

* use !icalc in the imcalc test

* adjust worktools.py for resource tuning
  • Loading branch information
JianKuang-Intelsat authored Oct 23, 2019
1 parent f55588a commit c85dfa5
Show file tree
Hide file tree
Showing 13 changed files with 256 additions and 11 deletions.
15 changes: 10 additions & 5 deletions crow/config/eval_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ def _result(self,globals,locals):
CALC_CACHE[self]=obj
return eval(obj,c,locals)

class stricalc(strcalc):
"""Represents a string that should be run through eval()"""
def _is_immediate(self): return True

class strref(str):
"""Represents a reference to a variable within some scope (ie. abc.def[32].ghi)"""
def __repr__(self):
Expand Down Expand Up @@ -530,12 +534,13 @@ def invalidate_cache(obj,key=None,recurse=False):

def evaluate_one(obj,key,val,memo):
if hasattr(val,'_is_immediate'):
_ = obj[key]
if memo is not None:
evaluate_immediates_impl(obj[key],memo)
else:
_ = obj[key]
elif not hasattr(val,'_result') and memo is not None:
evaluate_immediates_impl(obj[key],memo)
evaluate_immediates_impl(val,memo)
if memo is not None and (
hasattr(val,'_recurse_evaluate_immediates') or \
not hasattr(val,'_result') ):
evaluate_immediates_impl(val,memo)

def evaluate_immediates_impl(obj,memo=None):
if memo is not None:
Expand Down
1 change: 1 addition & 0 deletions crow/config/from_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def constructor(loader,node):

add_yaml_string(u'!expand',expand)
add_yaml_string(u'!calc',calc)
add_yaml_string(u'!icalc',icalc)
add_yaml_string(u'!ref',ref)
add_yaml_string(u'!error',user_error_message)
add_yaml_string(u'!Depend',Depend)
Expand Down
6 changes: 4 additions & 2 deletions crow/config/represent.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from datetime import timedelta
from copy import deepcopy
from crow.config.exceptions import *
from crow.config.eval_tools import list_eval, dict_eval, multidict, from_config, strcalc, strref
from crow.config.eval_tools import list_eval, dict_eval, multidict, from_config, strcalc, strref, stricalc
from crow.tools import to_timedelta, Clock
from copy import copy
import crow.sysenv
Expand All @@ -18,7 +18,7 @@
'FirstMax', 'LastTrue', 'FirstTrue', 'GenericList',
'GenericDict', 'GenericOrderedDict', 'ShellCommand',
'Immediate', 'ClockMaker', 'JobResourceSpecMaker',
'MergeMapping', 'AppendSequence', 'Select' ]
'MergeMapping', 'AppendSequence', 'Select', 'icalc' ]

########################################################################

Expand All @@ -32,6 +32,7 @@ class Platform(dict_eval): pass
class ShellCommand(dict_eval): pass

class JobResourceSpecMaker(list_eval):
def _recurse_evaluate_immediates(self): return True
def _result(self,globals,locals):
rank_specs=list()
for i in range(len(self)):
Expand Down Expand Up @@ -211,4 +212,5 @@ def _index(self,lst):
return None

class calc(strcalc): pass
class icalc(stricalc): pass
class ref(strref): pass
2 changes: 2 additions & 0 deletions crow/config/to_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def representer(dumper,data):
add_yaml_list_eval(u'!MergeMapping',MergeMapping)
add_yaml_list_eval(u'!AppendSequence',AppendSequence)
add_yaml_list_eval(None,GenericList)
add_yaml_list_eval(None,list_eval)

########################################################################

Expand All @@ -68,6 +69,7 @@ def representer(dumper,data):
yaml.add_representer(cls,representer)

add_yaml_dict_eval(None,GenericDict)
add_yaml_dict_eval(None,dict_eval)
add_yaml_dict_eval(u'!Platform',Platform)
add_yaml_dict_eval(u'!Select',Select)
add_yaml_dict_eval(u'!Action',Action)
Expand Down
5 changes: 4 additions & 1 deletion crow/sysenv/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ def new_with(self,*args,**kwargs):
return JobRankSpec(**newspec)

# Nicities
def __getattr__(self,key): return self[key]
def __getattr__(self,key):
if key in self:
return self[key]
raise AttributeError(key)

# Implement Mapping abstract methods:
def __getitem__(self,key): return self.__spec[key]
Expand Down
13 changes: 13 additions & 0 deletions examples/imcalc/_common.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
platform_common: &global_platform_common
Evaluate: False
default_resources: {}

partition_common: &global_partition_common
Evaluate: False
resources: !MergeMapping
- !calc doc.default_resources
- !calc doc.case.get('resources',{})
- !calc doc.resources_sum

default_resources: {}

137 changes: 137 additions & 0 deletions examples/imcalc/_sandbox.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# This file configures the workflow to run on the cray parts of WCOSS

platform: !Platform
<<: *global_platform_common

# Evaluate: this must be "false" to ensure disk space availability logic
# is not run unless this file is for the current platform.
Evaluate: false

# name: the name of this platform; this must match what the underlying
# scripts expect.
name: sandbox

# detect: this is a function that returns true iff the user is on GAEA
# and false otherwise
detect: True

# skip_if_others_present: if this is true, and at least one other
# platform is detected with this flag set to false, then skip this
# platform
skip_if_others_present: true

# public_release_ics: location of input conditions that have been
# prepared for the public release.
public_release_ics: /gpfs/hps3/emc/global/noscrub/emc.glopara/FV3GFS_V1_RELEASE/ICs

# CHGRP_RSTPROD_COMMAND - this specifies the command to use to
# restrict access to NOAA "rstprod" data restriction class.
# This only used for observation processing, data assimilation, and
# data assimilation archiving, which are not in the public release.
CHGRP_RSTPROD_COMMAND: "chgrp rstprod"

# NWPROD - location of the NCEP operational "nwprod" directory, which
# only has meaning on the NCEP WCOSS machines. It is used to get
# the paths to certain programs and scripts.
NWPROD: "/gpfs/hps/nco/ops/nwprod"

# DMPDIR - location of the global dump data. This is used by the observation
# processing scripts, which are not included in the public release.
DMPDIR: !calc doc.user_places.PROJECT_DIR
#"/Users/jiankuang/Documents/Eclipse_workspace/ecfutils_007"
#DMPDIR: !FirstTrue
# - do: "/gpfs/gp1/emc/globaldump"
# when: !calc tools.isdir(do)
# - do: "/gpfs/tp1/emc/globaldump"
# when: !calc tools.isdir(do)
# - otherwise: !error "Cannot find globaldump directory."

# RTMFIX - location of the CRTM fixed data files used by the GSI data
# assimilation. The data assimilation is not included in this public release
# so this path is unused.
RTMFIX: "$CRTM_FIX"

# BASE_SVN - a directory maintained by emc global model developers
# that contains recent versions of source code and executables for
# various subversion repositories. This is used on some platforms
# to find executables for this workflow.
BASE_SVN: "/gpfs/hps3/emc/global/noscrub/emc.glopara/svn"

# BASE_GIT - a directory maintained by emc global model developers
# that contains recent versions of source code and executables for
# various git repositories. This is used on some platforms to find
# executables for this workflow.
BASE_GIT: "/gpfs/hps3/emc/global/noscrub/emc.glopara/git"

# config_base_extras - Additional configuration data to put in the
# config.base file
config_base_extras: "sandbox"
#config_base_extras: |
# if [ -d /gpfs/tp1 ]; then
# export SITE="LUNA"
# elif [ -d /gpfs/gp1 ]; then
# export SITE="SURGE"
# fi

metasched_more: {}

partitions:
Evaluate: false
default_exclusive: !calc doc.platform.partitions.sandbox
default_service: !calc doc.platform.partitions.sandbox
default_shared: !calc doc.platform.partitions.sandbox
default_bigmem: !calc doc.platform.partitions.sandbox
sandbox:
<<: *global_partition_common

# specification - string to specify to the batch system to
# request this partition. Not relevant for WCOSS Cray
specification: null

# shared_accounting_ref - accounting settings for shared jobs
shared_accounting_ref:
project: !calc metasched.varref(doc.accounting.cpu_project)

# service_accounting_ref - accounting settings for service jobs (jobs
# that require tape or network access)
service_accounting_ref:
project: !calc metasched.varref(doc.accounting.cpu_project)

# exclusive_accounting_ref - accounting settings for jobs that require
# exclusive access to a node.
exclusive_accounting_ref:
project: !calc metasched.varref(doc.accounting.cpu_project)

bigmem_accounting_ref:
project: !calc metasched.varref(doc.accounting.cpu_project)

# Queues to use for each job type. This logic automatically
# switches between development queues on the backup machine and
# development queues on the production machine based on whether the
# /gpfs/hps2/ptmp is writable.
shared_queue: dev
service_queue: dev
exclusive_queue: dev
bigmem_queue: bigmem

# Details about the scheduler on this cluster.
scheduler_settings:
scheduler_name: LSFAlps
parallelism_name: LSFAlps
node_type: generic
physical_cores_per_node: 24
logical_cpus_per_core: 2
hyperthreading_allowed: true
indent_text: " "
memory_per_node: !calc (64*1024)

scheduler: !calc |
tools.get_scheduler(scheduler_settings.scheduler_name, scheduler_settings)
parallelism: !calc |
tools.get_parallelism(scheduler_settings.parallelism_name, scheduler_settings)
nodes: !calc |
tools.node_tool_for(scheduler_settings.node_type, scheduler_settings)
long_term_temp: !calc doc.user_places.PROJECT_DIR
short_term_temp: !calc doc.user_places.PROJECT_DIR
EXP_PARENT_DIR: !calc doc.user_places.PROJECT_DIR
7 changes: 7 additions & 0 deletions examples/imcalc/case.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
case:
resources:
run_job1: !JobRequest
- mpi_ranks: 7
max_ppn: 7
OMP_NUM_THREADS: 4
walltime: !timedelta '01:15:00'
25 changes: 25 additions & 0 deletions examples/imcalc/default_resources.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Sample default resource

sample_resource_table:
# ranks ppn wallclock threads MB
job1: [ 200, 20, !timedelta "01:00:00", 12, "3072" ]
job2: [ 12, 12, !timedelta "01:00:00", 1, "3072" ]

default_resources: &default_resources

run_nothing: !JobRequest # Special placeholder for "do nothing"
- memory: "300M"
mpi_ranks: 1
walltime: !timedelta "00:02:00"

run_job1: !JobRequest
- batch_memory: !icalc doc.sample_resource_table.job1[4]
mpi_ranks: !icalc doc.sample_resource_table.job1[0]
walltime: !icalc doc.sample_resource_table.job1[2]
max_ppn: !icalc doc.sample_resource_table.job1[1]

run_job2: !JobRequest
- batch_memory: !icalc doc.sample_resource_table.job2[4]
mpi_ranks: !icalc doc.sample_resource_table.job2[0]
walltime: !icalc doc.sample_resource_table.job2[2]
max_ppn: !icalc doc.sample_resource_table.job2[1]
31 changes: 31 additions & 0 deletions examples/imcalc/imcalc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#! /usr/bin/env python3.6

## Unit test program for crow.config module

import sys, os, shutil, collections, copy

sys.path.append(os.getcwd() + '/../../')

import logging
from datetime import timedelta
from crow.config import from_dir, Suite, from_file, to_yaml, evaluate_immediates, from_string
import crow.config.represent

from crow.config.eval_tools import list_eval, dict_eval
from crow.sysenv import JobResourceSpec

platdoc=from_file('_common.yaml','_sandbox.yaml')
platdoc.platform.Evaluate=True
evaluate_immediates(platdoc.platform)

shutil.copyfile('resources_sum_sample.yaml','resources_sum.yaml')
doc=from_file('_common.yaml','_sandbox.yaml','case.yaml','default_resources.yaml','resources_sum.yaml')

filename = 'resources_sum.yaml'

doc.writeme = { 'resources_sum': doc.partition_common.resources }
content = to_yaml({ 'resources_sum': doc.partition_common.resources })
with open(filename,'wt') as fd:
fd.write(content)

logging.basicConfig(stream=sys.stderr,level=logging.DEBUG)
15 changes: 15 additions & 0 deletions examples/imcalc/readme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
This example is the problem statement that I am experiencing:

I am trying to set up some kind of central place where all
information of resource settings (case, user, platform, default)
being merged together. However, as shown in execution, the
variables of resource_sum is still "!calc" string instead of
actual values

In this example, the dump of resource_sum comes from default and case.
run_job1 is overriden by case settings and with clear
final value, while run_job2 is still with the original
!calc string.

It will be much better if final values could be parsed into all
the fields of resource_sum
3 changes: 3 additions & 0 deletions examples/imcalc/resources_sum_sample.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# place holder for resources panel

resources_sum:
7 changes: 4 additions & 3 deletions utils/worktools.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,10 +673,9 @@ def setup_case(command_line_arguments):
logger.warning('Using manual mode \n ')
YAML_FILES_TO_COPY={ '../_expdir_main_manual.yaml': '_main.yaml',
'../top.yaml':'top.yaml',
'../resources_sum.yaml':'resources_sum.yaml',
'../schema/task.yaml':'schema.yaml',
'../schema/varnames.yaml':'varnames.yaml',
'../defaults/resources.yaml':'resources.yaml',
'../defaults/default_resources.yaml':'default_resources.yaml',
'../defaults/settings.yaml':'settings.yaml',
'../defaults/places.yaml':'places.yaml',
'../config/base.yaml':'base.yaml',
Expand All @@ -685,7 +684,6 @@ def setup_case(command_line_arguments):

else:
YAML_FILES_TO_COPY={ '../_expdir_main_auto.yaml': '_main.yaml',
'../resources_sum.yaml':'resources_sum.yaml',
'../user.yaml': 'user.yaml' }
YAML_DIRS_TO_COPY={ '../schema':'schema',
'../defaults':'defaults',
Expand Down Expand Up @@ -739,6 +737,9 @@ def setup_case(command_line_arguments):
os.path.abspath('../'),YAML_DIRS_TO_COPY,YAML_FILES_TO_COPY,case_name,experiment_name,platdoc,force,
skip_comrot,force_platform_rewrite)

if '-f' not in options:
shutil.copy('../resources_sum.yaml',EXPDIR+'/resources_sum.yaml')

doc=from_dir(EXPDIR,validation_stage='setup')
suite=Suite(doc.suite)
logger.info('creating a dummy workflow in memory...')
Expand Down

0 comments on commit c85dfa5

Please sign in to comment.