Skip to content

Commit

Permalink
Merge pull request #221 from podaac/release/2.7.0
Browse files Browse the repository at this point in the history
Release/2.7.0
  • Loading branch information
jamesfwood authored Dec 5, 2023
2 parents 8fab689 + a91d4cc commit a23e7e9
Show file tree
Hide file tree
Showing 13 changed files with 1,159 additions and 1,716 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/build-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ jobs:
runs-on: ubuntu-latest
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: 3.9
python-version: '3.10'
- name: Install Poetry
uses: abatilo/[email protected]
with:
Expand Down Expand Up @@ -124,7 +124,7 @@ jobs:
-Dsonar.projectVersion=${{ env.software_version }}
-Dsonar.python.version=3.8,3.9,3.10
- name: Run Snyk as a blocking step
uses: snyk/actions/python-3.8@master
uses: snyk/actions/python-3.10@master
env:
SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
with:
Expand All @@ -135,7 +135,7 @@ jobs:
--severity-threshold=high
--fail-on=all
- name: Run Snyk on Python
uses: snyk/actions/python-3.8@master
uses: snyk/actions/python-3.10@master
env:
SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
with:
Expand Down Expand Up @@ -218,7 +218,7 @@ jobs:
- name: Build Python Artifact
run: |
poetry build
- uses: actions/upload-artifact@v2
- uses: actions/upload-artifact@v3
with:
name: python-artifact
path: dist/*
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/release-created.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ jobs:
${{ startsWith(github.ref, 'refs/heads/release/') }}
steps:
# Checks-out the develop branch
- uses: actions/checkout@v2
- uses: actions/checkout@v4
with:
ref: 'refs/heads/develop'
- uses: actions/setup-python@v2
- uses: actions/setup-python@v4
with:
python-version: 3.9
python-version: '3.10'
- name: Install Poetry
uses: abatilo/[email protected]
with:
Expand Down
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Security


## [2.7.0]
### Added
### Changed
- [pull/216](https://github.com/podaac/l2ss-py/pull/216): Updated all python to 3.10 and update all depencency versions
### Deprecated
### Removed
### Fixed
- [issue/209](https://github.com/podaac/l2ss-py/issues/209): Update code so dims are created if they don't already exists.
- Update way we modify shapefile from 180 to 360 lon lats.
- [issue/205](https://github.com/podaac/l2ss-py/issues/205): Add coding capability for when groups have same dimension names but different values. Xarray rename dims is utilized
- [issue/220](https://github.com/podaac/l2ss-py/issues/220): Check if the time variables being found haven't already been found. Add time_vars as an extra arguement in compute_time_variable_name
### Security

## [2.6.0]
### Added
### Changed
Expand Down
2 changes: 1 addition & 1 deletion cmr/cmr.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# such information to foreign countries or providing access to foreign
# persons.

FROM python:3.8-slim
FROM python:3.10-slim

## Create a new user
RUN adduser --quiet --disabled-password --shell /bin/sh --home /home/dockeruser --gecos "" --uid 300 dockeruser
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# such information to foreign countries or providing access to foreign
# persons.

FROM python:3.9-slim
FROM python:3.10-slim

RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get upgrade -y \
Expand Down
35 changes: 35 additions & 0 deletions podaac/subsetter/dimension_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,38 @@ def sync_dims_inplace(original_dataset: xr.Dataset, new_dataset: xr.Dataset) ->
for new_dim in new_variable_dims:
if new_dim not in original_variable_dims:
new_dataset[variable_name] = new_dataset[variable_name].isel({new_dim: 0})


def recreate_pixcore_dimensions(datasets: list):
"""
if dimensions have different values after subsetting,
then they better have different names
"""
dim_dict = {}
count = 0
for dataset in datasets:
dim_list_shape = list(dataset.dims.values())
current_dims = list(dataset.dims.keys())
rename_list = []
for current_dim, dim_value in zip(current_dims, dim_list_shape):
if current_dim not in dim_dict:
dim_dict[current_dim] = dim_value
else:
# find dim name with conflicting values
if dim_dict[current_dim] != dim_value:
# create a new name for the dim
new_dim = current_dim+'_'+str(count)
dim_tup = (current_dim, new_dim)
# add the old and new name tuple to the list
rename_list.append(dim_tup)
else:
pass

if len(rename_list) > 0:
# xarray rename_dims funct with dict of old names (keys) to new names (values)
rename_dict = dict(rename_list)
datasets[count] = dataset.rename_dims(rename_dict)

count += 1

return datasets
11 changes: 7 additions & 4 deletions podaac/subsetter/group_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ def recombine_grouped_datasets(datasets: List[xr.Dataset], output_file: str, sta
"""

base_dataset = nc.Dataset(output_file, mode='w')

for dataset in datasets:
group_lst = []
for var_name in dataset.variables.keys(): # need logic if there is data in the top level not in a group
Expand All @@ -123,7 +122,8 @@ def recombine_grouped_datasets(datasets: List[xr.Dataset], output_file: str, sta
for dim_name in list(dataset.dims.keys()):
new_dim_name = dim_name.split(GROUP_DELIM)[-1]
dim_group = _get_nested_group(base_dataset, dim_name)
dim_group.createDimension(new_dim_name, dataset.dims[dim_name])
if new_dim_name not in dim_group.dimensions:
dim_group.createDimension(new_dim_name, dataset.dims[dim_name])

# Rename variables
_rename_variables(dataset, base_dataset, start_date, time_vars)
Expand Down Expand Up @@ -180,7 +180,8 @@ def _rename_variables(dataset: xr.Dataset, base_dataset: nc.Dataset, start_date,
comp_args = {"zlib": True, "complevel": 1}

var_data = variable.data
if variable.dtype == object:

if variable.dtype in [object, '|S27']:
comp_args = {"zlib": False, "complevel": 1}
var_group.createVariable(new_var_name, 'S4', var_dims, fill_value=fill_value, **comp_args)
var_data = np.array(variable.data)
Expand All @@ -189,14 +190,16 @@ def _rename_variables(dataset: xr.Dataset, base_dataset: nc.Dataset, start_date,
elif variable.dtype in ['|S1', '|S2']:
var_group.createVariable(new_var_name, variable.dtype, var_dims, fill_value=fill_value)
else:
if np.issubdtype(variable.dtype, np.unicode_):
comp_args["zlib"] = False
var_group.createVariable(new_var_name, variable.dtype, var_dims, fill_value=fill_value, **comp_args)

# Copy attributes
var_group.variables[new_var_name].setncatts(var_attrs)

# Copy data
var_group.variables[new_var_name].set_auto_maskandscale(False)
if variable.dtype in ['|S1', '|S2']:
if variable.dtype in ['|S1', '|S2', '|S27'] or np.issubdtype(variable.dtype, np.unicode_):
var_group.variables[new_var_name][:] = variable.values
else:
var_group.variables[new_var_name][:] = var_data
Expand Down
83 changes: 63 additions & 20 deletions podaac/subsetter/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import pandas as pd
import xarray as xr
import xarray.coding.times
from shapely.geometry import Point
from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.ops import transform

from podaac.subsetter import gpm_cleanup as gc
Expand Down Expand Up @@ -487,13 +487,14 @@ def get_spatial_bounds(dataset: xr.Dataset, lat_var_names: str, lon_var_names: s
return np.array([[min_lon, max_lon], [min_lat, max_lat]])


def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable) -> str:
def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_time_vars: list) -> str:
"""
Try to determine the name of the 'time' variable. This is done as
follows:
- The variable name contains 'time'
- The variable dimensions match the dimensions of the given lat var
- The variable that hasn't already been found
Parameters
----------
Expand All @@ -512,7 +513,6 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable) -> str
ValueError
If the time variable could not be determined
"""

time_vars = find_matching_coords(dataset, ['time'])
if time_vars:
# There should only be one time var match (this is called once
Expand All @@ -523,26 +523,26 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable) -> str
time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.dims.keys()))

for var_name in time_vars:
if "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
return var_name
for var_name in list(dataset.data_vars.keys()):
if "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
return var_name

# first check if any variables are named 'time'
for var_name in list(dataset.data_vars.keys()):
var_name_time = var_name.strip(GROUP_DELIM).split(GROUP_DELIM)[-1]
if len(dataset[var_name].squeeze().dims) == 0:
continue
if ('time' == var_name_time.lower() or 'timeMidScan' == var_name_time) and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
if var_name not in total_time_vars and ('time' == var_name_time.lower() or 'timeMidScan' == var_name_time) and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
return var_name

# then check if any variables have 'time' in the string if the above loop doesn't return anything
for var_name in list(dataset.data_vars.keys()):
var_name_time = var_name.strip(GROUP_DELIM).split(GROUP_DELIM)[-1]
if len(dataset[var_name].squeeze().dims) == 0:
continue
if 'time' in var_name_time.lower() and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
if var_name not in total_time_vars and 'time' in var_name_time.lower() and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
return var_name

raise ValueError('Unable to determine time variable')
Expand All @@ -559,6 +559,53 @@ def compute_utc_name(dataset: xr.Dataset) -> Union[str, None]:
return None


def translate_longitude(geometry):
"""
Translates the longitude values of a Shapely geometry from the range [-180, 180) to [0, 360).
Parameters
----------
geometry : shapely.geometry.base.BaseGeometry
The input shape geometry to be translated
Returns
-------
geometry
The translated shape geometry
"""

def translate_point(point):
# Translate the point's x-coordinate (longitude) by adding 360
return Point((point.x + 360) % 360, point.y)

def translate_polygon(polygon):
def translate_coordinates(coords):
if len(coords[0]) == 2:
return [((x + 360) % 360, y) for x, y in coords]
if len(coords[0]) == 3:
return [((x + 360) % 360, y, z) for x, y, z in coords]
return coords

exterior = translate_coordinates(polygon.exterior.coords)

interiors = [
translate_coordinates(ring.coords)
for ring in polygon.interiors
]

return Polygon(exterior, interiors)

if isinstance(geometry, (Point, Polygon)): # pylint: disable=no-else-return
return translate_point(geometry) if isinstance(geometry, Point) else translate_polygon(geometry)
elif isinstance(geometry, MultiPolygon):
# Translate each polygon in the MultiPolygon
translated_polygons = [translate_longitude(subgeometry) for subgeometry in geometry.geoms]
return MultiPolygon(translated_polygons)
else:
# Handle other geometry types as needed
return geometry


def get_time_epoch_var(dataset: xr.Dataset, time_var_name: str) -> str:
"""
Get the name of the epoch time var. This is only needed in the case
Expand Down Expand Up @@ -915,8 +962,8 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches
total_list.extend(group_vars)
if diffs == -1:
return datasets

return datasets
dim_cleaned_datasets = dc.recreate_pixcore_dimensions(datasets)
return dim_cleaned_datasets


def subset_with_shapefile(dataset: xr.Dataset,
Expand Down Expand Up @@ -959,12 +1006,7 @@ def subset_with_shapefile(dataset: xr.Dataset,
# assumption that the shapefile is -180,180.
if is_360(dataset[lon_var_name], lon_scale, lon_offset):
# Transform
def convert_180_to_360(lon, lat):
return tuple(map(lambda value: value + 360 if value < 0 else value, lon)), lat

geometries = [transform(convert_180_to_360, geometry) for geometry in
shapefile_df.geometry]
shapefile_df.geometry = geometries
shapefile_df.geometry = shapefile_df['geometry'].apply(translate_longitude)

# Mask and scale shapefile
def scale(lon, lat):
Expand Down Expand Up @@ -1018,11 +1060,12 @@ def get_coordinate_variable_names(dataset: xr.Dataset,
if not lat_var_names or not lon_var_names:
lat_var_names, lon_var_names = compute_coordinate_variable_names(dataset)
if not time_var_names:
time_var_names = [
compute_time_variable_name(
dataset, dataset[lat_var_name]
) for lat_var_name in lat_var_names
]
time_var_names = []
for lat_var_name in lat_var_names:
time_var_names.append(compute_time_variable_name(dataset,
dataset[lat_var_name],
time_var_names))

time_var_names.append(compute_utc_name(dataset))
time_var_names = [x for x in time_var_names if x is not None] # remove Nones and any duplicates

Expand Down
Loading

0 comments on commit a23e7e9

Please sign in to comment.