Merge pull request #71 from umr-lops/bugfix_corrupted_input

Bugfix corrupted input
umr-lops · Mar 11, 2024 · 6387024 · 6387024
2 parents 2292e1b + c688d9c
commit 6387024
Show file tree

Hide file tree

Showing 4 changed files with 221 additions and 34 deletions.
diff --git a/cdsodatacli/download.py b/cdsodatacli/download.py
@@ -508,9 +508,10 @@ def add_missing_cdse_hash_ids_in_listing(listing_path):
     -------
 
     """
+    res = pd.DataFrame({"id": [], "safename": []})
     df_raw = pd.read_csv(listing_path, names=["safenames"])
+    df_raw = df_raw[df_raw['safenames'].str.contains('.SAFE')]
     list_safe_a = df_raw["safenames"].values
-
     delta = datetime.timedelta(seconds=1)
     gdf = gpd.GeoDataFrame(
         {
@@ -535,13 +536,12 @@ def add_missing_cdse_hash_ids_in_listing(listing_path):
         }
     )
     sea_min_pct = 0
-    collected_data_norm = fetch_data(gdf, min_sea_percent=sea_min_pct)
-    if collected_data_norm is None:
-        res = pd.DataFrame({"id": [], "safename": []})
-    else:
-        res = collected_data_norm[["Id", "Name"]]
-        res.rename(columns={"Name": "safename"},inplace=True)
-        res.rename(columns={"Id": "id"},inplace=True)
+    if len(gdf['geometry'])>0:
+        collected_data_norm = fetch_data(gdf, min_sea_percent=sea_min_pct)
+        if not collected_data_norm is None:
+            res = collected_data_norm[["Id", "Name"]]
+            res.rename(columns={"Name": "safename"},inplace=True)
+            res.rename(columns={"Id": "id"},inplace=True)
     return res
 
 

diff --git a/cdsodatacli/utils.py b/cdsodatacli/utils.py
@@ -54,17 +54,26 @@ def WhichArchiveDir(safe):
     Args:
         safe (str): safe base name
     """
-
-    firstdate = safe[17:25]
+    logging.debug('safe: %s',safe)
+    if 'S1' in safe:
+        firstdate = safe[17:32]
+    elif 'S2' in safe:
+        firstdate = safe[11:26]
     year = firstdate[0:4]
-    doy = str(
-        datetime.datetime.strptime(firstdate, "%Y%m%d").timetuple().tm_yday
-    ).zfill(3)
+    # try:
+    # doy = str(
+    #     datetime.datetime.strptime(firstdate, "%Y%m%d").timetuple().tm_yday
+    # ).zfill(3)
+    doy = datetime.datetime.strptime(firstdate, "%Y%m%dT%H%M%S").strftime('%j')
     sat = safe.split("_")[0]
     if sat == "S1A":
         satdir = "sentinel-1a"
     elif sat == "S1B":
         satdir = "sentinel-1b"
+    elif sat =='S2B':
+        satdir = 'sentinel-2b'
+    elif sat =='S2A':
+        satdir = 'sentinel-2a'
     else:
         satdir = ""
         logging.error("%s is not a  good satellite name", sat)

diff --git a/tests_metiers/download_multithread_multiuser.py b/tests_metiers/download_multithread_multiuser.py
@@ -79,12 +79,15 @@
     if not os.path.exists(outputdir):
         logging.debug("mkdir on %s", outputdir)
         os.makedirs(outputdir, 0o0775)
-    dfout = download_list_product_multithread_v2(
-        list_id=inputdf["id"].values,
-        list_safename=inputdf["safename"].values,
-        outputdir=outputdir,
-        hideProgressBar=False,
-        account_group=logins_group,
-        check_on_disk=args.forcedownload == False,
-    )
+    if len(inputdf['id'])>0:
+        dfout = download_list_product_multithread_v2(
+            list_id=inputdf["id"].values,
+            list_safename=inputdf["safename"].values,
+            outputdir=outputdir,
+            hideProgressBar=False,
+            account_group=logins_group,
+            check_on_disk=args.forcedownload == False,
+        )
+    else:
+        logging.info('empty listing to treat')
     logging.info("end of function")
diff --git a/tests_metiers/inventories_CDS_figures.py b/tests_metiers/inventories_CDS_figures.py
@@ -47,10 +47,10 @@ def map_footprints(geometry_request, collected_data_norm, title):
                 cpt["unary_union_not_sufficient"] += 1
                 pass
                 # print('unaryunion passe pas')
-        elif uu.geom_type=='Polygon':
+        elif uu.geom_type == "Polygon":
             plt.plot(*uu.exterior.xy, "b--", lw=0.7, alpha=0.8)
         else:
-            print('strange geometry',uu)
+            print("strange geometry", uu)
     # plt.title('Sentinel-1 IW SLC available products %s since 2014'%(len(collected_data_norm['Name'])),fontsize=22)
     plt.title(title)
     plt.show()
@@ -138,8 +138,8 @@ def number_product_per_month(collected_data_norm, title):
     plt.grid(True)
     plt.title(title, fontsize=18)
     plt.yticks(fontsize=18)
-    plt.xticks(fontsize=18,rotation=45)
-    plt.xlim(ix[0],ix[-1])
+    plt.xticks(fontsize=18, rotation=45)
+    plt.xlim(ix[0], ix[-1])
     plt.ylabel("Number of IW SLC products available\nstacked histogram", fontsize=17)
     plt.show()
 
@@ -163,7 +163,7 @@ def number_of_product_per_climato_month(collected_data_norm, title):
                     markeredgecolor="k",
                     lw=0.7,
                 )
-    plt.legend(fontsize=15,bbox_to_anchor=(1,1))
+    plt.legend(fontsize=15, bbox_to_anchor=(1, 1))
     plt.grid(True)
     plt.title(title, fontsize=18)
     plt.yticks(fontsize=18)
@@ -238,7 +238,7 @@ def number_of_product_per_year_asc_desc(collected_data_norm, title):
                 cummul_grp += grp["Name"].fillna(0)
             # print('cummul_grp',cummul_grp)
             cptu += 1
-    plt.legend(fontsize=12, bbox_to_anchor=(1,1))
+    plt.legend(fontsize=12, bbox_to_anchor=(1, 1))
     plt.grid(True)
     plt.title(title, fontsize=18)
     plt.yticks(fontsize=12)
@@ -255,17 +255,25 @@ def add_volumetry_column(collected_data_norm):
     """
     vols = []
     for kk in collected_data_norm["Name"]:
-        if "EW" in kk or "WV" in kk:
-            raise Exception("mode no configured")
-        if "1SDV" in kk or "1SDH" in kk:
-            vols.append(7.8/1000.)
+        if "EW" in kk and "OCN" in kk:
+            vols.append(37.0 / 1000.0)
+        elif "EW" in kk and "SLC" in kk:
+            vols.append(3.7 / 1.0)
+        elif "IW" in kk and "OCN" in kk:
+            vols.append(15.0 / 1000.0)
         else:
-            vols.append(3.8/1000.)
+            vols.append(3.8 / 1000.0)
+        # if "EW" in kk or "WV" in kk:
+        #     raise Exception("mode no configured")
+        # if "1SDV" in kk or "1SDH" in kk:
+        #     vols.append(7.8 / 1000.0)
+        # else:
+        #     vols.append(3.8 / 1000.0)
     collected_data_norm["volume"] = vols
     return collected_data_norm
 
 
-def volume_per_year(collected_data_norm, title,freq = "AS"):
+def volume_per_year(collected_data_norm, title, freq="AS"):
     """
 
     :param collected_data_norm:
@@ -277,7 +285,7 @@ def volume_per_year(collected_data_norm, title,freq = "AS"):
     collected_data_norm = add_time_index_based_onstardtate(collected_data_norm)
     plt.figure(figsize=(10, 6), dpi=110)
     cummul_grp = None
-      # not Y because anchored date is offset to year+1
+    # not Y because anchored date is offset to year+1
     # freq = "M"  # for a test
     if freq == "AS":
         width = 300
@@ -328,6 +336,173 @@ def volume_per_year(collected_data_norm, title,freq = "AS"):
     plt.show()
 
 
+def count_per_year_with_labels(collected_data_norm, title, freq="AS"):
+    """
+
+    :param collected_data_norm:
+    :param title:
+    :param freq: AS is for yearly grouping with anchor at the start of the year
+    :return:
+    """
+    collected_data_norm = add_volumetry_column(collected_data_norm)
+    collected_data_norm = add_time_index_based_onstardtate(collected_data_norm)
+    plt.figure(figsize=(10, 6), dpi=110)
+    cummul_grp = None
+    # not Y because anchored date is offset to year+1
+    # freq = "M"  # for a test
+    if freq == "AS":
+        width = 365
+    elif freq == "M":
+        width = 30
+
+    ix = pd.date_range(
+        start=datetime.datetime(2013, 1, 1),
+        end=datetime.datetime(2024, 1, 1),
+        freq=freq,
+    )
+    cptu = 0
+
+    newdf_per_class_double_entries = {}
+    years = []
+    newdf_per_class_double_entries["1SDV"] = []
+    newdf_per_class_double_entries["1SSV"] = []
+    newdf_per_class_double_entries["1SSH"] = []
+    newdf_per_class_double_entries["1SDH"] = []
+    # newdf_per_class_double_entries["pola"] = []
+    # print('test',collected_data_norm["Name"])
+    for year in range(2014, 2024):
+        years.append(year)
+        for pol in ["1SDV", "1SSV", "1SSH", "1SDH"]:
+            subset = collected_data_norm[
+                (
+                    collected_data_norm["Name"].str.contains(pol + "_" + str(year))
+                )  # & (collected_data_norm["Name"].str.contains(pol))
+            ]
+            # print(subset)
+            # subset = subset["volume"]
+            countsafe = subset["Name"].count()
+            # grp = subset.groupby(pd.Grouper(freq=freq)).sum()
+            # grp = grp.reindex(ix)
+            # if countsafe > 0:
+
+            newdf_per_class_double_entries["%s" % pol].append(countsafe)
+            # newdf_per_class_double_entries["pola"] = pol
+    print("dict ", newdf_per_class_double_entries)
+    newdf = pd.DataFrame(newdf_per_class_double_entries, index=years)
+    print("newdf", newdf)
+    ax = newdf.plot(
+        kind="bar", stacked=True, figsize=(8, 6), rot=0, xlabel="year", ylabel="Count"
+    )
+    for c in ax.containers:
+
+        # Optional: if the segment is small or 0, customize the labels
+        labels = [v.get_height() if v.get_height() > 0 else "" for v in c]
+
+        # remove the labels parameter if it's not needed for customized labels
+        ax.bar_label(c, labels=labels, label_type="center")
+    plt.legend(fontsize=10, loc=2)
+    plt.grid(True)
+    plt.title(title, fontsize=18)
+    plt.yticks(fontsize=12)
+    plt.xticks(fontsize=12)
+    plt.ylabel(
+        "Count IW SLC products available \nstacked histogram",
+        fontsize=15,
+    )
+    plt.show()
+
+
+def count_per_year_with_labels_unit(
+    collected_data_norm,
+    title,
+    freq="AS",
+    yearmin=2013,
+    yearmax=2024,
+    addlegendonlyifcountnotnull=True,
+):
+    """
+
+    :param collected_data_norm:
+    :param title:
+    :param freq: AS is for yearly grouping with anchor at the start of the year
+    :return:
+    """
+    collected_data_norm = add_volumetry_column(collected_data_norm)
+    collected_data_norm = add_time_index_based_onstardtate(collected_data_norm)
+    plt.figure(figsize=(10, 6), dpi=110)
+    cummul_grp = None
+    # not Y because anchored date is offset to year+1
+    # freq = "M"  # for a test
+    if freq == "AS":
+        width = 365
+    elif freq == "M":
+        width = 30
+    newdf_per_class_double_entries = {}
+    years = []
+    for year in range(yearmin, yearmax + 1):
+        years.append(year)
+        for sarunit in ["S1A", "S1B"]:
+            for pol in ["1SDV", "1SSV", "1SSH", "1SDH"]:
+
+                subset = collected_data_norm[
+                    (collected_data_norm["Name"].str.contains(pol + "_" + str(year)))
+                    & (collected_data_norm["Name"].str.contains(sarunit))  #
+                ]
+                # print(subset)
+                # subset = subset["volume"]
+                countsafe = subset["Name"].count()
+                # grp = subset.groupby(pd.Grouper(freq=freq)).sum()
+                # grp = grp.reindex(ix)
+                # if countsafe > 0:
+                if addlegendonlyifcountnotnull:
+                    if countsafe > 0:
+                        if sarunit + "_" + pol not in newdf_per_class_double_entries:
+                            newdf_per_class_double_entries[
+                                "%s" % (sarunit + "_" + pol)
+                            ] = []
+                        newdf_per_class_double_entries[
+                            "%s" % (sarunit + "_" + pol)
+                        ].append(countsafe)
+                else:
+                    if sarunit + "_" + pol not in newdf_per_class_double_entries:
+                        newdf_per_class_double_entries[
+                            "%s" % (sarunit + "_" + pol)
+                        ] = []
+                    newdf_per_class_double_entries["%s" % (sarunit + "_" + pol)].append(
+                        countsafe
+                    )
+                # newdf_per_class_double_entries["pola"] = pol
+    print("dict ", newdf_per_class_double_entries)
+    newdf = pd.DataFrame(newdf_per_class_double_entries, index=years)
+    print("newdf", newdf)
+    ax = newdf.plot(
+        kind="bar",
+        stacked=True,
+        figsize=(8, 6),
+        rot=0,
+        xlabel="year",
+        ylabel="Count",
+        edgecolor="k",
+    )
+    for c in ax.containers:
+
+        # Optional: if the segment is small or 0, customize the labels
+        labels = [v.get_height() if v.get_height() > 0 else "" for v in c]
+
+        # remove the labels parameter if it's not needed for customized labels
+        ax.bar_label(c, labels=labels, label_type="center")
+    plt.legend(fontsize=10, ncols=4,bbox_to_anchor=(1,-0.1))
+    plt.grid(True)
+    plt.title(title, fontsize=18)
+    plt.yticks(fontsize=12)
+    plt.xticks(fontsize=12)
+    plt.ylabel(
+        "Count SAFE products available on CDSE \nstacked histogram",
+        fontsize=15,
+    )
+    plt.show()
+
+
 def volume_wrt_sea_percent(collected_data_norm, title):
     collected_data_norm = add_volumetry_column(collected_data_norm)
     delta = 10