cloud-bulldozer · dbutenhof · Dec 13, 2024 · Oct 25, 2024 · Nov 9, 2024 · Nov 19, 2024
diff --git a/backend/app/api/v1/commons/hce.py b/backend/app/api/v1/commons/hce.py
@@ -3,23 +3,27 @@
 from app.services.search import ElasticService
 
 
-async def getData(start_datetime: date, end_datetime: date, configpath: str):
+async def getData(
+    start_datetime: date, end_datetime: date, size: int, offset: int, configpath: str
+):
     query = {
         "query": {"bool": {"filter": {"range": {"date": {"format": "yyyy-MM-dd"}}}}}
     }
-
     es = ElasticService(configpath=configpath)
     response = await es.post(
         query=query,
+        size=size,
         start_date=start_datetime,
         end_date=end_datetime,
         timestamp_field="date",
     )
     await es.close()
-    tasks = [item["_source"] for item in response]
+    tasks = [item["_source"] for item in response["data"]]
     jobs = pd.json_normalize(tasks)
+    if len(jobs) == 0:
+        return {"data": jobs, "total": response["total"]}
+
     jobs[["group"]] = jobs[["group"]].fillna(0)
     jobs.fillna("", inplace=True)
-    if len(jobs) == 0:
-        return jobs
-    return jobs
+
+    return {"data": jobs, "total": response["total"]}
diff --git a/backend/app/api/v1/commons/ocm.py b/backend/app/api/v1/commons/ocm.py
@@ -3,35 +3,40 @@
 from app.services.search import ElasticService
 
 
-async def getData(start_datetime: date, end_datetime: date, configpath: str):
+async def getData(
+    start_datetime: date, end_datetime: date, size: int, offset: int, configpath: str
+):
     query = {
+        "size": size,
+        "from": offset,
         "query": {
             "bool": {
                 "filter": {"range": {"metrics.earliest": {"format": "yyyy-MM-dd"}}}
             }
-        }
+        },
     }
 
     es = ElasticService(configpath=configpath)
     response = await es.post(
         query=query,
+        size=size,
         start_date=start_datetime,
         end_date=end_datetime,
         timestamp_field="metrics.earliest",
     )
     await es.close()
-    tasks = [item["_source"] for item in response]
+    tasks = [item["_source"] for item in response["data"]]
     jobs = pd.json_normalize(tasks)
     if len(jobs) == 0:
-        return jobs
+        return {"data":jobs,"total": response["total"]}
 
     if "buildUrl" not in jobs.columns:
         jobs.insert(len(jobs.columns), "buildUrl", "")
     if "ciSystem" not in jobs.columns:
         jobs.insert(len(jobs.columns), "ciSystem", "")
     jobs.fillna("", inplace=True)
     jobs["jobStatus"] = jobs.apply(convertJobStatus, axis=1)
-    return jobs
+    return {"data": jobs, "total": response["total"]}
 
 
 def fillCiSystem(row):

diff --git a/backend/app/api/v1/commons/ocp.py b/backend/app/api/v1/commons/ocp.py
@@ -4,25 +4,30 @@
 from app.services.search import ElasticService
 
 
-async def getData(start_datetime: date, end_datetime: date, configpath: str):
+async def getData(
+    start_datetime: date, end_datetime: date, size: int, offset: int, configpath: str
+):
     query = {
+        "size": size,
+        "from": offset,
         "query": {
             "bool": {"filter": {"range": {"timestamp": {"format": "yyyy-MM-dd"}}}}
-        }
+        },
     }
 
     es = ElasticService(configpath=configpath)
     response = await es.post(
         query=query,
+        size=size,
         start_date=start_datetime,
         end_date=end_datetime,
         timestamp_field="timestamp",
     )
     await es.close()
-    tasks = [item["_source"] for item in response]
+    tasks = [item["_source"] for item in response["data"]]
     jobs = pd.json_normalize(tasks)
     if len(jobs) == 0:
-        return jobs
+        return {"data": jobs, "total": response["total"]}
 
     jobs[
         ["masterNodesCount", "workerNodesCount", "infraNodesCount", "totalNodesCount"]
@@ -52,7 +57,7 @@ async def getData(start_datetime: date, end_datetime: date, configpath: str):
     jbs = cleanJobs
     jbs["shortVersion"] = jbs["ocpVersion"].str.slice(0, 4)
 
-    return jbs
+    return {"data": jbs, "total": response["total"]}
 
 
 def fillEncryptionType(row):

diff --git a/backend/app/api/v1/commons/quay.py b/backend/app/api/v1/commons/quay.py
@@ -4,11 +4,15 @@
 from app.services.search import ElasticService
 
 
-async def getData(start_datetime: date, end_datetime: date, configpath: str):
+async def getData(
+    start_datetime: date, end_datetime: date, size, offset, configpath: str
+):
     query = {
+        "size": size,
+        "from": offset,
         "query": {
             "bool": {"filter": {"range": {"timestamp": {"format": "yyyy-MM-dd"}}}}
-        }
+        },
     }
 
     es = ElasticService(configpath=configpath)
@@ -19,10 +23,10 @@ async def getData(start_datetime: date, end_datetime: date, configpath: str):
         timestamp_field="timestamp",
     )
     await es.close()
-    tasks = [item["_source"] for item in response]
+    tasks = [item["_source"] for item in response["data"]]
     jobs = pd.json_normalize(tasks)
     if len(jobs) == 0:
-        return jobs
+        return {"data": jobs, "total": response["total"]}
 
     jobs[
         ["masterNodesCount", "workerNodesCount", "infraNodesCount", "totalNodesCount"]
@@ -38,4 +42,7 @@ async def getData(start_datetime: date, end_datetime: date, configpath: str):
     jobs["build"] = jobs.apply(utils.getBuild, axis=1)
     jobs["shortVersion"] = jobs["ocpVersion"].str.slice(0, 4)
 
-    return jobs[jobs["platform"] != ""]
+    cleanJobs = jobs[jobs["platform"] != ""]
+
+    jbs = cleanJobs
+    return {"data": jbs, "total": response["total"]}
diff --git a/backend/app/api/v1/commons/telco.py b/backend/app/api/v1/commons/telco.py
@@ -8,7 +8,9 @@
 import app.api.v1.endpoints.telco.telcoGraphs as telcoGraphs
 
 
-async def getData(start_datetime: date, end_datetime: date, configpath: str):
+async def getData(
+    start_datetime: date, end_datetime: date, size: int, offset: int, configpath: str
+):
     test_types = [
         "oslat",
         "cyclictest",
@@ -41,10 +43,12 @@ async def getData(start_datetime: date, end_datetime: date, configpath: str):
         ['test_type="{}"'.format(test_type) for test_type in test_types]
     )
     splunk = SplunkService(configpath=configpath)
-    response = await splunk.query(query=query, searchList=searchList)
+    response = await splunk.query(
+        query=query, size=size, offset=offset, searchList=searchList
+    )
     mapped_list = []
 
-    for each_response in response:
+    for each_response in response["data"]:
         end_timestamp = int(each_response["timestamp"])
         test_data = each_response["data"]
         threshold = await telcoGraphs.process_json(test_data, True)
@@ -83,7 +87,5 @@ async def getData(start_datetime: date, end_datetime: date, configpath: str):
         )
 
     jobs = pd.json_normalize(mapped_list)
-    if len(jobs) == 0:
-        return jobs
 
-    return jobs
+    return {"data": jobs, "total": response["total"]}
diff --git a/backend/app/api/v1/commons/utils.py b/backend/app/api/v1/commons/utils.py
@@ -7,7 +7,7 @@ async def getMetadata(uuid: str, configpath: str):
     es = ElasticService(configpath=configpath)
     response = await es.post(query=query)
     await es.close()
-    meta = [item["_source"] for item in response]
+    meta = [item["_source"] for item in response["data"]]
     return meta[0]
 
 

diff --git a/backend/app/api/v1/endpoints/cpt/cptJobs.py b/backend/app/api/v1/endpoints/cpt/cptJobs.py
@@ -28,7 +28,7 @@
 @router.get(
     "/api/v1/cpt/jobs",
     summary="Returns a job list from all the products.",
-    description="Returns a list of jobs in the specified dates. \
+    description="Returns a list of jobs in the specified dates of requested size \
             If not dates are provided the API will default the values. \
             `startDate`: will be set to the day of the request minus 5 days.\
             `endDate`: will be set to the day of the request.",
@@ -48,7 +48,10 @@ async def jobs(
         description="End date for searching jobs, format: 'YYYY-MM-DD'",
         examples=["2020-11-15"],
     ),
-    pretty: bool = Query(False, description="Output contet in pretty format."),
+    pretty: bool = Query(False, description="Output content in pretty format."),
+    size: int = Query(None, description="Number of jobs to fetch"),
+    offset: int = Query(None, description="Offset Number to fetch jobs from"),
+    totalJobs: int = Query(None, description="Total number of jobs"),
 ):
     if start_date is None:
         start_date = datetime.utcnow().date()
@@ -66,23 +69,35 @@ async def jobs(
         )
 
     results_df = pd.DataFrame()
+    total_dict = {}
+    total = 0
     with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
         futures = {
-            executor.submit(fetch_product, product, start_date, end_date): product
+            executor.submit(
+                fetch_product, product, start_date, end_date, size, offset
+            ): product
             for product in products
         }
         for future in as_completed(futures):
             product = futures[future]
             try:
                 result = future.result()
-                results_df = pd.concat([results_df, result])
+                total_dict[product] = result["total"]
+                results_df = pd.concat([results_df, result["data"]])
             except Exception as e:
                 print(f"Error fetching data for product {product}: {e}")
 
+    # on first hit, totalJobs is 0
+    if totalJobs == 0:
+        for product in total_dict:
+            total += int(total_dict[product])
+        totalJobs = total
     response = {
         "startDate": start_date.__str__(),
         "endDate": end_date.__str__(),
         "results": results_df.to_dict("records"),
+        "total": totalJobs,
+        "offset": offset + size,
     }
 
     if pretty:
@@ -93,34 +108,70 @@ async def jobs(
     return jsonstring
 
 
-async def fetch_product_async(product, start_date, end_date):
+async def fetch_product_async(product, start_date, end_date, size, offset):
     try:
-        df = await products[product](start_date, end_date)
-        return (
-            df.loc[
-                :,
-                [
-                    "ciSystem",
-                    "uuid",
-                    "releaseStream",
-                    "jobStatus",
-                    "buildUrl",
-                    "startDate",
-                    "endDate",
-                    "product",
-                    "version",
-                    "testName",
-                ],
-            ]
-            if len(df) != 0
-            else df
-        )
+        response = await products[product](start_date, end_date, size, offset)
+        if response:
+            df = response["data"]
+            return {
+                "data": (
+                    df.loc[
+                        :,
+                        [
+                            "ciSystem",
+                            "uuid",
+                            "releaseStream",
+                            "jobStatus",
+                            "buildUrl",
+                            "startDate",
+                            "endDate",
+                            "product",
+                            "version",
+                            "testName",
+                        ],
+                    ]
+                    if len(df) != 0
+                    else df
+                ),
+                "total": response["total"],
+            }
     except ConnectionError:
         print("Connection Error in mapper for product " + product)
     except Exception as e:
         print(f"Error in mapper for product {product}: {e}")
         return pd.DataFrame()
 
 
-def fetch_product(product, start_date, end_date):
-    return asyncio.run(fetch_product_async(product, start_date, end_date))
+def fetch_product(product, start_date, end_date, size, offset):
+    return asyncio.run(fetch_product_async(product, start_date, end_date, size, offset))
+
+
+def is_requested_size_available(total_count, offset, requested_size):
+    """
+    Check if the requested size of data is available starting from a given offset.
+
+    Args:
+        total_count (int): Total number of available records.
+        offset (int): The starting position in the dataset.
+        requested_size (int): The number of records requested.
+
+    Returns:
+        bool: True if the requested size is available, False otherwise.
+    """
+    return (offset + requested_size) <= total_count
+
+
+def calculate_remaining_data(total_count, offset, requested_size):
+    """
+    Calculate the remaining number of data items that can be fetched based on the requested size.
+
+    Args:
+        total_count (int): Total number of available records.
+        offset (int): The starting position in the dataset.
+        requested_size (int): The number of records requested.
+
+    Returns:
+        int: The number of records that can be fetched, which may be less than or equal to requested_size.
+    """
+    available_data = total_count - offset  # Data available from the offset
+    return min(available_data, requested_size)