From b13fd6c6c8449e475ae7e13448c9ad193a66d88f Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 14 Aug 2024 12:27:27 -0400 Subject: [PATCH 1/2] starting to include realm in cf search --- catalogbuilder/intakebuilder/getinfo.py | 10 +++++++--- catalogbuilder/scripts/gen_intake_gfdl.py | 5 +++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/catalogbuilder/intakebuilder/getinfo.py b/catalogbuilder/intakebuilder/getinfo.py index 2f94254..94553e0 100644 --- a/catalogbuilder/intakebuilder/getinfo.py +++ b/catalogbuilder/intakebuilder/getinfo.py @@ -242,7 +242,7 @@ def getInfoFromGlobalAtts(fname,dictInfo,filexra=None): dictInfo["frequency"] = frequency return dictInfo -def getStandardName(list_variable_id): +def getStandardName(list_variable_id,list_realm): ''' Returns dict standard name for the variable in question ''' @@ -251,12 +251,16 @@ def getStandardName(list_variable_id): try: url = "https://raw.githubusercontent.com/NOAA-GFDL/MDTF-diagnostics/b5e7916c203f3ba0b53e9e40fb8dc78ecc2cf5c3/data/gfdl-cmor-tables/gfdl_to_cmip5_vars.csv" df = pd.read_csv(url, sep=",", header=0,index_col=False) + + print(df) except IOError: print("Unable to open file") sys.exit(1) #search for variable and its cf name for variable_id in list_variable_id: - cfname = (df[df['GFDL_varname'] == variable_id]["standard_name"]) + for realm in list_realm: + cfname = df[(df['GFDL_varname'] == variable_id) & (df['modeling_realm'] == realm)]["standard_name"] + #cfname = (df[df['GFDL_varname'] == variable_id]["standard_name"]) #print(cfname,variable_id) list_cfname = cfname.tolist() if(len(list_cfname) == 0): @@ -266,5 +270,5 @@ def getStandardName(list_variable_id): #print(list_cfname) if len(list_cfname) > 0: unique_cf = list(set(list_cfname))[0] - dictCF[variable_id] = unique_cf + dictCF[variable_id,realm] = unique_cf return (dictCF) diff --git a/catalogbuilder/scripts/gen_intake_gfdl.py b/catalogbuilder/scripts/gen_intake_gfdl.py index 16cdb53..e28a882 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl.py +++ b/catalogbuilder/scripts/gen_intake_gfdl.py @@ -100,8 +100,9 @@ def create_catalog(input_path=None, output_path=None, config=None, filter_realm= #If we badly need standard name, we use gfdl cmip mapping tables especially when one does not prefer the slow option. Useful for MDTF runs df = pd.read_csv(os.path.abspath(csv_path), sep=",", header=0,index_col=False) list_variable_id = [] - list_variable_id = df["variable_id"].tolist() - dictVarCF = getinfo.getStandardName(list_variable_id) + list_variable_id = df["variable_id"].unique().tolist() + list_realm = df["realm"].unique().tolist() + dictVarCF = getinfo.getStandardName(list_variable_id,list_realm) #print("standard name from look-up table-", dictVarCF) for k, v in dictVarCF.items(): #if(df['variable_id'].eq(k)).any(): From 509aae9cedbf78ce06fdf3dc94a7b87e8597b6ef Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 14 Aug 2024 15:52:46 -0400 Subject: [PATCH 2/2] seems to work now , cf and realm --- catalogbuilder/intakebuilder/getinfo.py | 8 ++++---- catalogbuilder/scripts/gen_intake_gfdl.py | 14 +++++++++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/catalogbuilder/intakebuilder/getinfo.py b/catalogbuilder/intakebuilder/getinfo.py index 94553e0..34a5e55 100644 --- a/catalogbuilder/intakebuilder/getinfo.py +++ b/catalogbuilder/intakebuilder/getinfo.py @@ -251,15 +251,13 @@ def getStandardName(list_variable_id,list_realm): try: url = "https://raw.githubusercontent.com/NOAA-GFDL/MDTF-diagnostics/b5e7916c203f3ba0b53e9e40fb8dc78ecc2cf5c3/data/gfdl-cmor-tables/gfdl_to_cmip5_vars.csv" df = pd.read_csv(url, sep=",", header=0,index_col=False) - - print(df) except IOError: print("Unable to open file") sys.exit(1) #search for variable and its cf name for variable_id in list_variable_id: for realm in list_realm: - cfname = df[(df['GFDL_varname'] == variable_id) & (df['modeling_realm'] == realm)]["standard_name"] + cfname = df[(df['GFDL_varname'] == variable_id) & (realm in df['modeling_realm'])]["standard_name"] #cfname = (df[df['GFDL_varname'] == variable_id]["standard_name"]) #print(cfname,variable_id) list_cfname = cfname.tolist() @@ -270,5 +268,7 @@ def getStandardName(list_variable_id,list_realm): #print(list_cfname) if len(list_cfname) > 0: unique_cf = list(set(list_cfname))[0] - dictCF[variable_id,realm] = unique_cf + varrealm = "{0},{1}".format(variable_id,realm) + dictCF[varrealm] = unique_cf + #print(varrealm,unique_cf) return (dictCF) diff --git a/catalogbuilder/scripts/gen_intake_gfdl.py b/catalogbuilder/scripts/gen_intake_gfdl.py index e28a882..b8213a2 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl.py +++ b/catalogbuilder/scripts/gen_intake_gfdl.py @@ -105,9 +105,17 @@ def create_catalog(input_path=None, output_path=None, config=None, filter_realm= dictVarCF = getinfo.getStandardName(list_variable_id,list_realm) #print("standard name from look-up table-", dictVarCF) for k, v in dictVarCF.items(): - #if(df['variable_id'].eq(k)).any(): - df['standard_name'].loc[(df['variable_id'] == k)] = v - #df['standard_name'] = v + try: + var = k.split(",")[0] + except ValueError: + continue + try: + realm = k.split(",")[1] + except ValueError: + continue + if(var is not None) & (realm is not None): + df['standard_name'].loc[(df['variable_id'] == var) & (df['realm'] == realm) ] = v + #df['standard_name'].loc[(df['variable_id'] == k)] = v if(slow == False) & ('standard_name' in headers): if ((df is not None) & (len(df) != 0) ): with open(csv_path, 'w') as csvfile: