From 784e8f6b777f53bf20cf234d27c7fb1240045631 Mon Sep 17 00:00:00 2001 From: Vitor Bellini Date: Tue, 9 Apr 2024 17:47:52 -0300 Subject: [PATCH] sort notification values --- src/hooks/inlabs_hook.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/hooks/inlabs_hook.py b/src/hooks/inlabs_hook.py index 7f967e3..27f634d 100644 --- a/src/hooks/inlabs_hook.py +++ b/src/hooks/inlabs_hook.py @@ -3,8 +3,8 @@ import re from datetime import datetime, timedelta, date -import pandas as pd import unicodedata +import pandas as pd import html2text from airflow.hooks.base import BaseHook @@ -177,6 +177,9 @@ def transform_search_results( """ df = response.copy() + # `identifica` column is the publication title. If None + # can be a table or other text content that is not inside + # a publication. df.dropna(subset=["identifica"], inplace=True) df["pubname"] = df["pubname"].apply(self._rename_section) df["identifica"] = df["identifica"].apply(self._remove_html_tags) @@ -226,7 +229,9 @@ def transform_search_results( {} if df.empty else self._group_to_dict( - df.sort_values(by="matches"), "matches", cols_output + df.sort_values(by=["matches", "section", "title"]), + "matches", + cols_output, ) )