Skip to content

Commit

Permalink
Merge pull request #71 from umr-lops/bugfix_corrupted_input
Browse files Browse the repository at this point in the history
Bugfix corrupted input
  • Loading branch information
agrouaze authored Mar 11, 2024
2 parents 2292e1b + c688d9c commit 6387024
Show file tree
Hide file tree
Showing 4 changed files with 221 additions and 34 deletions.
16 changes: 8 additions & 8 deletions cdsodatacli/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,9 +508,10 @@ def add_missing_cdse_hash_ids_in_listing(listing_path):
-------
"""
res = pd.DataFrame({"id": [], "safename": []})
df_raw = pd.read_csv(listing_path, names=["safenames"])
df_raw = df_raw[df_raw['safenames'].str.contains('.SAFE')]
list_safe_a = df_raw["safenames"].values

delta = datetime.timedelta(seconds=1)
gdf = gpd.GeoDataFrame(
{
Expand All @@ -535,13 +536,12 @@ def add_missing_cdse_hash_ids_in_listing(listing_path):
}
)
sea_min_pct = 0
collected_data_norm = fetch_data(gdf, min_sea_percent=sea_min_pct)
if collected_data_norm is None:
res = pd.DataFrame({"id": [], "safename": []})
else:
res = collected_data_norm[["Id", "Name"]]
res.rename(columns={"Name": "safename"},inplace=True)
res.rename(columns={"Id": "id"},inplace=True)
if len(gdf['geometry'])>0:
collected_data_norm = fetch_data(gdf, min_sea_percent=sea_min_pct)
if not collected_data_norm is None:
res = collected_data_norm[["Id", "Name"]]
res.rename(columns={"Name": "safename"},inplace=True)
res.rename(columns={"Id": "id"},inplace=True)
return res


Expand Down
19 changes: 14 additions & 5 deletions cdsodatacli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,26 @@ def WhichArchiveDir(safe):
Args:
safe (str): safe base name
"""

firstdate = safe[17:25]
logging.debug('safe: %s',safe)
if 'S1' in safe:
firstdate = safe[17:32]
elif 'S2' in safe:
firstdate = safe[11:26]
year = firstdate[0:4]
doy = str(
datetime.datetime.strptime(firstdate, "%Y%m%d").timetuple().tm_yday
).zfill(3)
# try:
# doy = str(
# datetime.datetime.strptime(firstdate, "%Y%m%d").timetuple().tm_yday
# ).zfill(3)
doy = datetime.datetime.strptime(firstdate, "%Y%m%dT%H%M%S").strftime('%j')
sat = safe.split("_")[0]
if sat == "S1A":
satdir = "sentinel-1a"
elif sat == "S1B":
satdir = "sentinel-1b"
elif sat =='S2B':
satdir = 'sentinel-2b'
elif sat =='S2A':
satdir = 'sentinel-2a'
else:
satdir = ""
logging.error("%s is not a good satellite name", sat)
Expand Down
19 changes: 11 additions & 8 deletions tests_metiers/download_multithread_multiuser.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,15 @@
if not os.path.exists(outputdir):
logging.debug("mkdir on %s", outputdir)
os.makedirs(outputdir, 0o0775)
dfout = download_list_product_multithread_v2(
list_id=inputdf["id"].values,
list_safename=inputdf["safename"].values,
outputdir=outputdir,
hideProgressBar=False,
account_group=logins_group,
check_on_disk=args.forcedownload == False,
)
if len(inputdf['id'])>0:
dfout = download_list_product_multithread_v2(
list_id=inputdf["id"].values,
list_safename=inputdf["safename"].values,
outputdir=outputdir,
hideProgressBar=False,
account_group=logins_group,
check_on_disk=args.forcedownload == False,
)
else:
logging.info('empty listing to treat')
logging.info("end of function")
201 changes: 188 additions & 13 deletions tests_metiers/inventories_CDS_figures.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ def map_footprints(geometry_request, collected_data_norm, title):
cpt["unary_union_not_sufficient"] += 1
pass
# print('unaryunion passe pas')
elif uu.geom_type=='Polygon':
elif uu.geom_type == "Polygon":
plt.plot(*uu.exterior.xy, "b--", lw=0.7, alpha=0.8)
else:
print('strange geometry',uu)
print("strange geometry", uu)
# plt.title('Sentinel-1 IW SLC available products %s since 2014'%(len(collected_data_norm['Name'])),fontsize=22)
plt.title(title)
plt.show()
Expand Down Expand Up @@ -138,8 +138,8 @@ def number_product_per_month(collected_data_norm, title):
plt.grid(True)
plt.title(title, fontsize=18)
plt.yticks(fontsize=18)
plt.xticks(fontsize=18,rotation=45)
plt.xlim(ix[0],ix[-1])
plt.xticks(fontsize=18, rotation=45)
plt.xlim(ix[0], ix[-1])
plt.ylabel("Number of IW SLC products available\nstacked histogram", fontsize=17)
plt.show()

Expand All @@ -163,7 +163,7 @@ def number_of_product_per_climato_month(collected_data_norm, title):
markeredgecolor="k",
lw=0.7,
)
plt.legend(fontsize=15,bbox_to_anchor=(1,1))
plt.legend(fontsize=15, bbox_to_anchor=(1, 1))
plt.grid(True)
plt.title(title, fontsize=18)
plt.yticks(fontsize=18)
Expand Down Expand Up @@ -238,7 +238,7 @@ def number_of_product_per_year_asc_desc(collected_data_norm, title):
cummul_grp += grp["Name"].fillna(0)
# print('cummul_grp',cummul_grp)
cptu += 1
plt.legend(fontsize=12, bbox_to_anchor=(1,1))
plt.legend(fontsize=12, bbox_to_anchor=(1, 1))
plt.grid(True)
plt.title(title, fontsize=18)
plt.yticks(fontsize=12)
Expand All @@ -255,17 +255,25 @@ def add_volumetry_column(collected_data_norm):
"""
vols = []
for kk in collected_data_norm["Name"]:
if "EW" in kk or "WV" in kk:
raise Exception("mode no configured")
if "1SDV" in kk or "1SDH" in kk:
vols.append(7.8/1000.)
if "EW" in kk and "OCN" in kk:
vols.append(37.0 / 1000.0)
elif "EW" in kk and "SLC" in kk:
vols.append(3.7 / 1.0)
elif "IW" in kk and "OCN" in kk:
vols.append(15.0 / 1000.0)
else:
vols.append(3.8/1000.)
vols.append(3.8 / 1000.0)
# if "EW" in kk or "WV" in kk:
# raise Exception("mode no configured")
# if "1SDV" in kk or "1SDH" in kk:
# vols.append(7.8 / 1000.0)
# else:
# vols.append(3.8 / 1000.0)
collected_data_norm["volume"] = vols
return collected_data_norm


def volume_per_year(collected_data_norm, title,freq = "AS"):
def volume_per_year(collected_data_norm, title, freq="AS"):
"""
:param collected_data_norm:
Expand All @@ -277,7 +285,7 @@ def volume_per_year(collected_data_norm, title,freq = "AS"):
collected_data_norm = add_time_index_based_onstardtate(collected_data_norm)
plt.figure(figsize=(10, 6), dpi=110)
cummul_grp = None
# not Y because anchored date is offset to year+1
# not Y because anchored date is offset to year+1
# freq = "M" # for a test
if freq == "AS":
width = 300
Expand Down Expand Up @@ -328,6 +336,173 @@ def volume_per_year(collected_data_norm, title,freq = "AS"):
plt.show()


def count_per_year_with_labels(collected_data_norm, title, freq="AS"):
"""
:param collected_data_norm:
:param title:
:param freq: AS is for yearly grouping with anchor at the start of the year
:return:
"""
collected_data_norm = add_volumetry_column(collected_data_norm)
collected_data_norm = add_time_index_based_onstardtate(collected_data_norm)
plt.figure(figsize=(10, 6), dpi=110)
cummul_grp = None
# not Y because anchored date is offset to year+1
# freq = "M" # for a test
if freq == "AS":
width = 365
elif freq == "M":
width = 30

ix = pd.date_range(
start=datetime.datetime(2013, 1, 1),
end=datetime.datetime(2024, 1, 1),
freq=freq,
)
cptu = 0

newdf_per_class_double_entries = {}
years = []
newdf_per_class_double_entries["1SDV"] = []
newdf_per_class_double_entries["1SSV"] = []
newdf_per_class_double_entries["1SSH"] = []
newdf_per_class_double_entries["1SDH"] = []
# newdf_per_class_double_entries["pola"] = []
# print('test',collected_data_norm["Name"])
for year in range(2014, 2024):
years.append(year)
for pol in ["1SDV", "1SSV", "1SSH", "1SDH"]:
subset = collected_data_norm[
(
collected_data_norm["Name"].str.contains(pol + "_" + str(year))
) # & (collected_data_norm["Name"].str.contains(pol))
]
# print(subset)
# subset = subset["volume"]
countsafe = subset["Name"].count()
# grp = subset.groupby(pd.Grouper(freq=freq)).sum()
# grp = grp.reindex(ix)
# if countsafe > 0:

newdf_per_class_double_entries["%s" % pol].append(countsafe)
# newdf_per_class_double_entries["pola"] = pol
print("dict ", newdf_per_class_double_entries)
newdf = pd.DataFrame(newdf_per_class_double_entries, index=years)
print("newdf", newdf)
ax = newdf.plot(
kind="bar", stacked=True, figsize=(8, 6), rot=0, xlabel="year", ylabel="Count"
)
for c in ax.containers:

# Optional: if the segment is small or 0, customize the labels
labels = [v.get_height() if v.get_height() > 0 else "" for v in c]

# remove the labels parameter if it's not needed for customized labels
ax.bar_label(c, labels=labels, label_type="center")
plt.legend(fontsize=10, loc=2)
plt.grid(True)
plt.title(title, fontsize=18)
plt.yticks(fontsize=12)
plt.xticks(fontsize=12)
plt.ylabel(
"Count IW SLC products available \nstacked histogram",
fontsize=15,
)
plt.show()


def count_per_year_with_labels_unit(
collected_data_norm,
title,
freq="AS",
yearmin=2013,
yearmax=2024,
addlegendonlyifcountnotnull=True,
):
"""
:param collected_data_norm:
:param title:
:param freq: AS is for yearly grouping with anchor at the start of the year
:return:
"""
collected_data_norm = add_volumetry_column(collected_data_norm)
collected_data_norm = add_time_index_based_onstardtate(collected_data_norm)
plt.figure(figsize=(10, 6), dpi=110)
cummul_grp = None
# not Y because anchored date is offset to year+1
# freq = "M" # for a test
if freq == "AS":
width = 365
elif freq == "M":
width = 30
newdf_per_class_double_entries = {}
years = []
for year in range(yearmin, yearmax + 1):
years.append(year)
for sarunit in ["S1A", "S1B"]:
for pol in ["1SDV", "1SSV", "1SSH", "1SDH"]:

subset = collected_data_norm[
(collected_data_norm["Name"].str.contains(pol + "_" + str(year)))
& (collected_data_norm["Name"].str.contains(sarunit)) #
]
# print(subset)
# subset = subset["volume"]
countsafe = subset["Name"].count()
# grp = subset.groupby(pd.Grouper(freq=freq)).sum()
# grp = grp.reindex(ix)
# if countsafe > 0:
if addlegendonlyifcountnotnull:
if countsafe > 0:
if sarunit + "_" + pol not in newdf_per_class_double_entries:
newdf_per_class_double_entries[
"%s" % (sarunit + "_" + pol)
] = []
newdf_per_class_double_entries[
"%s" % (sarunit + "_" + pol)
].append(countsafe)
else:
if sarunit + "_" + pol not in newdf_per_class_double_entries:
newdf_per_class_double_entries[
"%s" % (sarunit + "_" + pol)
] = []
newdf_per_class_double_entries["%s" % (sarunit + "_" + pol)].append(
countsafe
)
# newdf_per_class_double_entries["pola"] = pol
print("dict ", newdf_per_class_double_entries)
newdf = pd.DataFrame(newdf_per_class_double_entries, index=years)
print("newdf", newdf)
ax = newdf.plot(
kind="bar",
stacked=True,
figsize=(8, 6),
rot=0,
xlabel="year",
ylabel="Count",
edgecolor="k",
)
for c in ax.containers:

# Optional: if the segment is small or 0, customize the labels
labels = [v.get_height() if v.get_height() > 0 else "" for v in c]

# remove the labels parameter if it's not needed for customized labels
ax.bar_label(c, labels=labels, label_type="center")
plt.legend(fontsize=10, ncols=4,bbox_to_anchor=(1,-0.1))
plt.grid(True)
plt.title(title, fontsize=18)
plt.yticks(fontsize=12)
plt.xticks(fontsize=12)
plt.ylabel(
"Count SAFE products available on CDSE \nstacked histogram",
fontsize=15,
)
plt.show()


def volume_wrt_sea_percent(collected_data_norm, title):
collected_data_norm = add_volumetry_column(collected_data_norm)
delta = 10
Expand Down

0 comments on commit 6387024

Please sign in to comment.