Skip to content

Commit

Permalink
Rework regex grammar to disregard unused keywords (#33)
Browse files Browse the repository at this point in the history
* Disregard dfn valid attribute

* Disregard dfn keyword with type record or recarray

* Disregard untagged dfn keyword

* Revert "Disregard dfn valid attribute"

* Fix: do not set default "" value

* Filter tagged section for keywords only

* Add test cases

* Update snapshot of test data
  • Loading branch information
martclanor committed Feb 16, 2025
1 parent b8752db commit 2c4be19
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 8 deletions.
2 changes: 1 addition & 1 deletion syntaxes/mf6.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
"patterns": [
{
"name": "entity.name.function.mf6",
"match": "(?i)(?<=^|\\s)(adv_scheme|afrcsv_filerecord|afrcsvfile|alh|all|alphasj|alternative_cell_averaging|alv|angldegx|angle1|angle2|angle3|angrot|area|ath1|ath2|ats6|ats6_filename|ats_filerecord|ats_outer_maximum_fraction|ats_percel|atv|auto_flow_reduce|auto_flow_reduce_csv|aux|auxdepthname|auxiliary|auxiliaryrecord|auxmultname|auxname|auxspeciesname|auxval|backtracking_number|backtracking_reduction_factor|backtracking_residual_limit|backtracking_tolerance|barea|bedk|bedleak|belev|beta|bhead|binary|bndno|bot|botm|bottom|boundname|boundnames|budget|budget_filerecord|budgetcsv|budgetcsv_filerecord|budgetcsvfile|budgetfile|bulk_density|cdelay|cell1d|cell2d|cell_averaging|cell_fraction|cellid|cellid1|cellid2|cellidm|cellidm1|cellidm2|cellidn|cellidsj|central_in_space|cfact|cg_ske_cr|cg_theta|chunk_face|chunk_time|chunk_x|chunk_y|chunk_z|cim|cim_filerecord|cimfile|cimprintrecord|cl1|cl12|cl2|claktype|cnd_xt3d_off|cnd_xt3d_rhs|coarse_compaction_filename|coarsestrain_filename|columns|compaction|compaction_coarse|compaction_coarse_filerecord|compaction_elastic|compaction_elastic_filerecord|compaction_filename|compaction_inelastic|compaction_inelastic_filerecord|compaction_interbed|compaction_interbed_filerecord|complexity|compression_indices|conc|concentration|concentration_filerecord|concentrationfile|concentrationprintrecord|concfile|cond|condeqn|connectiondata|connlen|connwidth|continue|continuous|couttype|cprior|crhoref|cross_section|cross_sectionrecord|crosssectiondata|crosssections|csv_inner_output|csv_inner_output_filerecord|csv_outer_output|csv_outer_output_filerecord|csv_output|csv_output_filerecord|csvfile|cviscref|cvoptions|decay|decay_solid|decay_sorbed|decay_water|deflate|delc|delr|denseref|density|density_filerecord|density_solid|density_water|densityfile|depth|dev_efh_formulation|dev_exit_solve_method|dev_forceternary|dev_interfacemodel_on|dev_log_mpi|dev_no_newton|dev_oldstorageformulation|dev_omega|dev_storage_weight|dev_swr_conductance|dewatered|diffc|digits|disable_storage_change_integration|distcoef|diversion|diversionrecord|diversions|divflow|drape|drhodc|dry_tracking_method|dsp_xt3d_off|dsp_xt3d_rhs|dt0|dtadj|dtfailadj|dtmax|dtmin|dviscdc|effective_stress_lag|elastic_compaction_filename|elev|eps|evaporation|exchangedata|exchanges|exgfile|exgmnamea|exgmnameb|exgtype|exit_solve_tolerance|explicit|export_array_ascii|export_array_netcdf|ext-inflow|extdp|extend_tracking|extwc|fdc|filein|fileinput|fileout|finf|first|first_order_decay|fixed_cell|fixed_conductance|flow_correction|flow_imbalance_correction|flow_package_auxiliary_name|flow_package_name|flowing_well|flowing_wellrecord|flowing_wells|flowtype|fname|format|formatrecord|fraction|frequency|fthk|ftype|fwcond|fwelev|fwrlen|gnc6|gnc6_filename|gnc_filerecord|gncdata|group_num|gwfmodelname1|gwfmodelname2|h0|ha|head|head_based|head_filerecord|head_limit|headfile|headprintrecord|heat_capacity_solid|heat_capacity_water|height|hhformulation_rhs|hk_skin|hpc6|hpc6_filename|hpc_filerecord|hroot|hwva|hydchr|iac|ic|icell1d|icell2d|icelltype|icon|iconn|iconr|iconvert|icsubno|icvert|id|id1|id2|idcxs|idomain|idv|ievt|ifno|ihc|ihdwet|inelastic_compaction_filename|infiltration|inflow|initial_preconsolidation_head|initialstage|initialstages|inner_csvfile|inner_dvclose|inner_hclose|inner_maximum|inner_rclose|interbed_compaction_filename|interbedstrain_filename|interpolation_method|interpolation_method_single|interpolation_methodrecord|interpolation_methodrecord_single|invert|iper|iperats|irch|irhospec|irptno|istopzone|iv|ivertcon|iviscspec|iwetit|izone|ja|k|k22|k22overk|k33|k33overk|ktf|kts|ktw|kv|lakein|lakeno|lakeout|lakeperioddata|laksetting|landflag|last|latent_heat_vaporization|latitude|length|length_conversion|length_units|linear_acceleration|linear_gwet|list|local_z|longitude|man|manfraction|manning|manningsn|maw_flow_reduce_csv|mawno|mawsetting|maxats|maxbound|maxerrors|maxhfb|maximum_depth_change|maximum_iterations|maximum_picard_iterations|maximum_stage_change|maxmvr|maxpackages|maxrate|maxsig0|memory_print_option|method|methods|mfname|mfrcsv_filerecord|mfrcsvfile|minrate|mixed|mname|mname1|mname2|modelname|modelnames|models|modflow6_attr_off|mover|mrank|mtype|mve6|mve6_filename|mve_filerecord|mvr6|mvr6_filename|mvr_filerecord|mvrtype|mvt6|mvt6_filename|mvt_filerecord|mweperioddata|mwesetting|mwtperioddata|mwtsetting|mxiter|name|names|nc_filerecord|nc_mesh2d_filerecord|nc_structured_filerecord|ncf6|ncf6_filename|ncf_filerecord|ncmesh2dfile|ncol|ncon|ncpl|ncstructfile|ncvert|ndelaycells|ndv|netcdf|netcdf_filename|netcdf_mesh2d|netcdf_structured|newton|newtonoptions|nexg|ngwfnodes|ninterbeds|nja|nlakeconn|nlakes|nlay|nmawwells|no_ptc|no_ptc_option|no_ptcrecord|no_well_storage|nocheck|nodes|nogrb|noutlets|nper|npoints|nreaches|nreleasepts|nreleasetimes|nrhospecies|nrow|nsections|nseg|nstp|ntables|ntracktimes|ntrailwaves|numalphaj|number|number_orthogonalizations|numgnc|nuzfcells|nvert|nviscspecies|nwavesets|nxspoints|obs6|obs6_filename|obs_filerecord|obs_output_file_name|obsname|obstype|ocsetting|outer_csvfile|outer_dvclose|outer_hclose|outer_maximum|outer_rclosebnd|outletno|outlets|output|package_convergence|package_convergence_filename|package_convergence_filerecord|packagedata|packages|partitions|pcs0|perched|perioddata|perlen|pet|petm|petm0|pname|pname1|pname2|porosity|precipitation|preconditioner_drop_tolerance|preconditioner_levels|print|print_concentration|print_flows|print_format|print_head|print_input|print_option|print_stage|print_table|print_temperature|printrecord|profile_option|pump_elevation|pxdp|q|qoutflow|qoutflow_filerecord|qoutflowfile|qoutflowprintrecord|radius|radius_skin|rainfall|rate|rate_scaling|rate_scalingrecord|rbot|rbth|rbthcnd|rclose_option|rcloserecord|reachperioddata|reachsetting|readasarrays|recharge|relaxation_factor|release_time_frequency|release_time_tolerance|releasesetting|releasetimes|reordering_method|retfactor|rewet|rewet_record|rgrd|rhk|rhs|rlen|rnb|rno|rootact|rough|rtp|rtype|runoff|rwid|sarea|save|save_flows|save_saturation|save_specific_discharge|save_velocity|saverecord|scaling_length|scaling_method|scheme|scrn_bot|scrn_top|senerrate|sfac|sfacrecord|sfacrecord_single|sfacs|sfacval|sfrsetting|sgm|sgs|shuffle|shut_off|shutdown_kappa|shutdown_theta|shutoffrecord|sig0|simulate_et|simulate_gwseep|slnfname|slnmnames|slntype|slope|smassrate|solutiongroup|sorbate|sorbate_filerecord|sorbatefile|sorption|sources|sp2|spc6_filename|spcsetting|specified_initial_delay_head|specified_initial_interbed_state|specified_initial_preconsolidation_stress|square_gwet|srctype|ss|ss_confined_only|sse_cr|ssv_cc|stage|stage_filerecord|stagefile|start_date_time|status|steady-state|steps|stop_at_weak_sink|stoptime|stoptraveltime|storage|storagecoefficient|strain_csv_coarse|strain_csv_interbed|straincg_filerecord|stress_period_data|strt|surf_rate_specified|surface|surfdep|sy|tab6|tab6_filename|table|tables|tas6|tas6_filename|tas_array|tas_filerecord|tdis6|telev|temp|temperature|temperature_filerecord|temperature_species_name|temperaturefile|temperatureprintrecord|tempfile|thermal_a2|thermal_a3|thermal_a4|thermal_formulation|theta|thick_frac|thickstrt|thti|thtr|thts|time|time_conversion|time_from_model_start|time_series_name|time_series_namerecord|time_series_names|time_units|timeseries|top|track|track_exit|track_filerecord|track_release|track_terminate|track_timestep|track_usertime|track_weaksink|trackcsv|trackcsv_filerecord|trackcsvfile|trackfile|tracktimes|transient|ts6|ts6_filename|ts_array|ts_filerecord|ts_time|tsmult|tvk6|tvk6_filename|tvk_filerecord|tvksetting|tvs6|tvs6_filename|tvs_filerecord|tvssetting|under_relaxation|under_relaxation_gamma|under_relaxation_kappa|under_relaxation_momentum|under_relaxation_theta|unit_conversion|unsat_etae|unsat_etwc|update_material_properties|upstream_fraction|ustrf|uzeperioddata|uzesetting|uzet|uzfno|uztperioddata|uztsetting|value|variablecv|vertical_offset_tolerance|vertices|viscosity|viscosity_filerecord|viscosityfile|viscref|vks|volfrac|volume|water_content|wc_filerecord|wcfile|well_head|wetdry|wetfct|width|withdrawal|wkt|xc|xfraction|xorigin|xrpt|xt3d|xt3d_off|xt3d_rhs|xt3doptions|xv|yc|yorigin|yrpt|yv|zdisplacement|zdisplacement_filename|zdisplacement_filerecord|zero_order_decay|zero_order_decay_solid|zero_order_decay_water|zetaim|zrpt)(?=\\s|$)"
"match": "(?i)(?<=^|\\s)(adv_scheme|alh|all|alternative_cell_averaging|alv|angldegx|angle1|angle2|angle3|angrot|area|ath1|ath2|ats6|ats_outer_maximum_fraction|ats_percel|atv|auto_flow_reduce|auto_flow_reduce_csv|aux|auxdepthname|auxiliary|auxmultname|backtracking_number|backtracking_reduction_factor|backtracking_residual_limit|backtracking_tolerance|bedk|beta|binary|bot|botm|bottom|boundnames|budget|budgetcsv|bulk_density|cell1d|cell2d|cell_averaging|cell_fraction|central_in_space|cg_ske_cr|cg_theta|chunk_face|chunk_time|chunk_x|chunk_y|chunk_z|cim|cl12|cnd_xt3d_off|cnd_xt3d_rhs|columns|compaction|compaction_coarse|compaction_elastic|compaction_inelastic|compaction_interbed|complexity|compression_indices|concentration|connectiondata|continue|continuous|cross_section|crosssectiondata|crosssections|csv_inner_output|csv_outer_output|csv_output|cvoptions|decay|decay_solid|decay_sorbed|decay_water|deflate|delc|delr|denseref|density|density_solid|density_water|depth|dev_efh_formulation|dev_exit_solve_method|dev_forceternary|dev_interfacemodel_on|dev_log_mpi|dev_no_newton|dev_oldstorageformulation|dev_omega|dev_storage_weight|dev_swr_conductance|dewatered|diffc|digits|disable_storage_change_integration|distcoef|diversion|diversions|drape|dry_tracking_method|dsp_xt3d_off|dsp_xt3d_rhs|effective_stress_lag|evaporation|exchangedata|exchanges|exit_solve_tolerance|explicit|export_array_ascii|export_array_netcdf|ext-inflow|extend_tracking|filein|fileinput|fileout|first|first_order_decay|fixed_cell|fixed_conductance|flow_correction|flow_imbalance_correction|flow_package_auxiliary_name|flow_package_name|flowing_well|flowing_wells|fraction|frequency|gnc6|gncdata|gwfmodelname1|gwfmodelname2|head|head_based|head_limit|heat_capacity_solid|heat_capacity_water|hhformulation_rhs|hpc6|hwva|iac|icelltype|iconvert|idcxs|idomain|ievt|ihc|ihdwet|infiltration|inflow|initial_preconsolidation_head|initialstages|inner_dvclose|inner_hclose|inner_maximum|inner_rclose|invert|irch|istopzone|iwetit|izone|ja|k|k22|k22overk|k33|k33overk|kts|ktw|lakeperioddata|last|latent_heat_vaporization|latitude|length|length_conversion|length_units|linear_acceleration|linear_gwet|list|local_z|longitude|manning|manningsn|maw_flow_reduce_csv|maxats|maxbound|maxerrors|maxhfb|maximum_depth_change|maximum_iterations|maximum_picard_iterations|maximum_stage_change|maxmvr|maxpackages|maxsig0|memory_print_option|method|methods|mixed|modelnames|models|modflow6_attr_off|mover|mve6|mvr6|mvt6|mweperioddata|mwtperioddata|mxiter|name|names|ncf6|ncol|ncpl|ndelaycells|netcdf|netcdf_mesh2d|netcdf_structured|newton|newtonoptions|nexg|ninterbeds|nja|nlakes|nlay|nmawwells|no_ptc|no_well_storage|nocheck|nodes|nogrb|noutlets|nper|npoints|nreaches|nreleasepts|nreleasetimes|nrhospecies|nrow|nsections|nseg|ntables|ntracktimes|ntrailwaves|numalphaj|number_orthogonalizations|numgnc|nuzfcells|nvert|nviscspecies|nwavesets|obs6|outer_dvclose|outer_hclose|outer_maximum|outer_rclosebnd|outlets|output|package_convergence|packagedata|packages|partitions|perched|perioddata|porosity|preconditioner_drop_tolerance|preconditioner_levels|print|print_concentration|print_flows|print_format|print_head|print_input|print_option|print_stage|print_table|print_temperature|profile_option|qoutflow|rainfall|rate|rate_scaling|reachperioddata|readasarrays|recharge|relaxation_factor|release_time_frequency|release_time_tolerance|releasetimes|reordering_method|retfactor|rewet|rhs|rough|runoff|save|save_flows|save_saturation|save_specific_discharge|save_velocity|scaling_method|scheme|sfac|sfacs|sgm|sgs|shuffle|shut_off|shutdown_kappa|shutdown_theta|simulate_et|simulate_gwseep|slope|solutiongroup|sorbate|sorption|sources|sp2|specified_initial_delay_head|specified_initial_interbed_state|specified_initial_preconsolidation_stress|square_gwet|ss|ss_confined_only|stage|start_date_time|status|steady-state|steps|stop_at_weak_sink|stoptime|stoptraveltime|storage|storagecoefficient|strain_csv_coarse|strain_csv_interbed|stress_period_data|strt|surf_rate_specified|surface|surfdep|sy|tab6|table|tables|tas6|tdis6|temperature|temperature_species_name|thermal_a2|thermal_a3|thermal_a4|thermal_formulation|thickstrt|time_conversion|time_units|timeseries|top|track|track_exit|track_release|track_terminate|track_timestep|track_usertime|track_weaksink|trackcsv|tracktimes|transient|ts6|tvk6|tvs6|under_relaxation|under_relaxation_gamma|under_relaxation_kappa|under_relaxation_momentum|under_relaxation_theta|unit_conversion|unsat_etae|unsat_etwc|update_material_properties|upstream_fraction|uzeperioddata|uzet|uztperioddata|variablecv|vertical_offset_tolerance|vertices|viscosity|viscref|volfrac|water_content|well_head|wetdry|wetfct|width|withdrawal|wkt|xorigin|xt3d|xt3d_off|xt3d_rhs|xt3doptions|yorigin|zdisplacement|zero_order_decay|zero_order_decay_solid|zero_order_decay_water|zetaim)(?=\\s|$)"
}
]
},
Expand Down
5 changes: 5 additions & 0 deletions syntaxes/samples/scratch.nam
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,8 @@ drop
.drop.
drop.
drop.

BEGIN time
METHODS linear linear
IMS6
EMS6
18 changes: 16 additions & 2 deletions syntaxes/samples/scratch.nam.snap
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
> gwf6 sdfdsf#modeltype namefile modelname
#^^ source.mf6
# ^^^^ source.mf6 entity.name.function.mf6
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ source.mf6
# ^^^^^^^^^ source.mf6 entity.name.function.mf6
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ source.mf6
> gwf6 sdfdsf #modeltype namefile modelname
#^^ source.mf6
# ^^^^ source.mf6 entity.name.function.mf6
Expand Down Expand Up @@ -118,4 +117,19 @@
#^^^^^^ source.mf6
>drop.
#^^^^^^ source.mf6
>
>BEGIN time
#^^^^^ source.mf6 keyword.control.mf6
# ^ source.mf6
# ^^^^ source.mf6 entity.name.type.mf6
>METHODS linear linear
#^^^^^^^ source.mf6 entity.name.function.mf6
# ^ source.mf6
# ^^^^^^ source.mf6 entity.name.function.mf6
# ^ source.mf6
# ^^^^^^ source.mf6 entity.name.function.mf6
>IMS6
#^^^^ source.mf6 entity.name.function.mf6
>EMS6
#^^^^ source.mf6 entity.name.function.mf6
>
29 changes: 24 additions & 5 deletions utils/generate_from_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ class Line:
object."""

key: str
value: Optional[str] = None
value: Optional[str | bool] = None

@classmethod
def from_file(cls, data: str) -> "Line":
if "tagged" in data:
return cls("tagged", "true" in data)
return cls(*data.split(maxsplit=1))


Expand All @@ -49,20 +51,24 @@ class Section:
block: str
data_type: Optional[str] = None
valid: Optional[tuple[str, ...]] = None
tagged: bool = True

@classmethod
def from_file(cls, data: str) -> "Section":
lines = (
Line.from_file(line)
for line in data.split("\n")
if any(line.startswith(s) for s in {"block", "name", "type", "valid"})
if any(
line.startswith(s) for s in {"block", "name", "type", "valid", "tagged"}
)
)
line_dict: dict[str, str] = {line.key: (line.value or "") for line in lines}
line_dict: dict[str, str] = {line.key: line.value for line in lines}
return cls(
block=line_dict.get("block", ""),
keyword=line_dict.get("name", ""),
data_type=line_dict.get("type", None),
valid=None if (x := line_dict.get("valid")) is None else tuple(x.split()),
tagged=line_dict.get("tagged", True),
)


Expand All @@ -83,15 +89,28 @@ def data(self) -> tuple[str, ...]:

@property
def sections(self) -> tuple[Section, ...]:
return tuple(Section.from_file(x) for x in self.data if x.startswith("block"))
sections = []
for data in self.data:
if not data.startswith("block"):
continue
section = Section.from_file(data)
if "record" in section.keyword or "recarray" in section.keyword:
continue
sections.append(section)
return tuple(sections)

@property
def blocks(self) -> set[str]:
return {p.block for p in self.sections}

@property
def keywords(self) -> set[str]:
return {p.keyword for p in self.sections}
keywords = set()
for section in self.sections:
if not section.tagged:
continue
keywords.add(section.keyword)
return keywords

@property
def valids(self) -> set[tuple[str, ...]]:
Expand Down

0 comments on commit 2c4be19

Please sign in to comment.