diff --git a/dag_confs/examples_and_tests/all_parameters_example.yaml b/dag_confs/examples_and_tests/all_parameters_example.yaml index eba3db9..7acba27 100644 --- a/dag_confs/examples_and_tests/all_parameters_example.yaml +++ b/dag_confs/examples_and_tests/all_parameters_example.yaml @@ -9,6 +9,7 @@ dag: - pessoa 2 schedule: 0 8 * * MON-FRI search: + header: Pesquisa no DOU terms: - dados abertos - governo aberto diff --git a/src/dou_dag_generator.py b/src/dou_dag_generator.py index 321ca17..80ac49b 100755 --- a/src/dou_dag_generator.py +++ b/src/dou_dag_generator.py @@ -13,7 +13,6 @@ import os import sys import textwrap -from dataclasses import asdict from datetime import datetime, timedelta from typing import Dict, List, Optional, Union from functools import reduce @@ -35,6 +34,7 @@ from utils.date import get_trigger_date, template_ano_mes_dia_trigger_local_time from notification.notifier import Notifier from parsers import DAGConfig, YAMLParser +from schemas import FetchTermsConfig from searchers import BaseSearcher, DOUSearcher, QDSearcher, INLABSSearcher @@ -91,7 +91,7 @@ def merge_two(dict1, dict2): def result_as_html(specs: DAGConfig) -> bool: """Só utiliza resultado HTML apenas para email""" - return specs.discord_webhook and specs.slack_webhook + return specs.report.discord and specs.report.slack class DouDigestDagGenerator: @@ -153,7 +153,7 @@ def prepare_doc_md(specs: DAGConfig, config_file: str) -> str: Returns: str: The DAG documentation in markdown format. """ - config = asdict(specs) + config = specs.model_dump() # options that won't show in the "DAG Docs" del config["description"] del config["doc_md"] @@ -201,7 +201,7 @@ def _get_safe_schedule(self, specs: DAGConfig, default_schedule: str) -> str: """ schedule = default_schedule - id_based_minute = self._hash_dag_id(specs.dag_id, 60) + id_based_minute = self._hash_dag_id(specs.id, 60) schedule_without_min = " ".join(schedule.split(" ")[1:]) schedule = f"{id_based_minute} {schedule_without_min}" @@ -262,7 +262,7 @@ def generate_dags(self): for filepath in files_list: dag_specs = self.parser(filepath).parse() - dag_id = dag_specs.dag_id + dag_id = dag_specs.id globals()[dag_id] = self.create_dag(dag_specs, filepath) def perform_searches( @@ -385,9 +385,7 @@ def create_dag(self, specs: DAGConfig, config_file: str) -> DAG: the term_list from a database """ # Prepare the markdown documentation - doc_md = ( - self.prepare_doc_md(specs, config_file) if specs.doc_md else specs.doc_md - ) + doc_md = self.prepare_doc_md(specs, config_file) if specs.doc_md else None # DAG parameters default_args = { "owner": specs.owner, @@ -401,64 +399,78 @@ def create_dag(self, specs: DAGConfig, config_file: str) -> DAG: schedule = self._update_schedule(specs) dag = DAG( - specs.dag_id, + specs.id, default_args=default_args, schedule=schedule, description=specs.description, doc_md=doc_md, catchup=False, params={"trigger_date": "2022-01-02T12:00"}, - tags=specs.dag_tags, + tags=specs.tags, ) with dag: with TaskGroup(group_id="exec_searchs") as tg_exec_searchs: - counter = 0 - for subsearch in specs.search: - counter += 1 - if subsearch["sql"]: + + # is it a single search or a list of searchers? + if isinstance(specs.search, list): + searches = specs.search + else: + searches = [specs.search] + + for counter, subsearch in enumerate(searches, 1): + + # are terms to be fetched from a database? + terms_come_from_db: bool = isinstance( + subsearch.terms, FetchTermsConfig + ) and getattr(subsearch.terms, "from_db_select", None) + + # determine the terms list + term_list = [] + # is it a directly defined list of terms or is it a + # configuration for fetching terms from a data source? + if isinstance(subsearch.terms, list): + term_list = subsearch.terms + elif terms_come_from_db: select_terms_from_db_task = PythonOperator( task_id=f"select_terms_from_db_{counter}", python_callable=self.select_terms_from_db, op_kwargs={ - "sql": subsearch["sql"], - "conn_id": subsearch["conn_id"], + "sql": subsearch.terms.from_db_select.sql, + "conn_id": subsearch.terms.from_db_select.conn_id, }, ) - term_list = ( - "{{ ti.xcom_pull(task_ids='exec_searchs.select_terms_from_db_" - + str(counter) - + "') }}" - ) + term_list = ( + "{{ ti.xcom_pull(task_ids='exec_searchs.select_terms_from_db_" + + str(counter) + + "') }}" + ) exec_search_task = PythonOperator( task_id=f"exec_search_{counter}", python_callable=self.perform_searches, op_kwargs={ - "header": subsearch["header"], - "sources": subsearch["sources"], - "territory_id": subsearch["territory_id"], - "term_list": subsearch["terms"] or term_list, - "dou_sections": subsearch["dou_sections"], - "search_date": subsearch["search_date"], - "field": subsearch["field"], - "is_exact_search": subsearch["is_exact_search"], - "ignore_signature_match": subsearch[ - "ignore_signature_match" - ], - "force_rematch": subsearch["force_rematch"], - "full_text": subsearch["full_text"], - "use_summary": subsearch["use_summary"], - "department": subsearch["department"], + "header": subsearch.header, + "sources": subsearch.sources, + "territory_id": subsearch.territory_id, + "term_list": term_list, + "dou_sections": subsearch.dou_sections, + "search_date": subsearch.date, + "field": subsearch.field, + "is_exact_search": subsearch.is_exact_search, + "ignore_signature_match": subsearch.ignore_signature_match, + "force_rematch": subsearch.force_rematch, + "full_text": subsearch.full_text, + "use_summary": subsearch.use_summary, + "department": subsearch.department, "result_as_email": result_as_html(specs), }, ) - if subsearch["sql"]: - ( - select_terms_from_db_task >> exec_search_task - ) # pylint: disable=pointless-statement + if terms_come_from_db: + # pylint: disable=pointless-statement + select_terms_from_db_task >> exec_search_task has_matches_task = BranchPythonOperator( task_id="has_matches", @@ -467,12 +479,12 @@ def create_dag(self, specs: DAGConfig, config_file: str) -> DAG: "search_result": "{{ ti.xcom_pull(task_ids=" + str( [ - f"exec_searchs.exec_search_{count + 1}" - for count in range(counter) + f"exec_searchs.exec_search_{count}" + for count in range(1, len(searches) + 1) ] ) + ") }}", - "skip_null": specs.skip_null, + "skip_null": specs.report.skip_null, }, ) @@ -485,8 +497,8 @@ def create_dag(self, specs: DAGConfig, config_file: str) -> DAG: "search_report": "{{ ti.xcom_pull(task_ids=" + str( [ - f"exec_searchs.exec_search_{count + 1}" - for count in range(counter) + f"exec_searchs.exec_search_{count}" + for count in range(1, len(searches) + 1) ] ) + ") }}", @@ -494,6 +506,7 @@ def create_dag(self, specs: DAGConfig, config_file: str) -> DAG: }, ) + # pylint: disable=pointless-statement tg_exec_searchs >> has_matches_task has_matches_task >> [send_notification_task, skip_notification_task] diff --git a/src/notification/discord_sender.py b/src/notification/discord_sender.py index ca8046e..c1a2072 100644 --- a/src/notification/discord_sender.py +++ b/src/notification/discord_sender.py @@ -1,17 +1,20 @@ -import requests import re + +import requests + from notification.isender import ISender +from schemas import ReportConfig class DiscordSender(ISender): highlight_tags = ("__", "__") - def __init__(self, specs) -> None: - self.webhook_url = specs.discord_webhook - self.hide_filters = specs.hide_filters - self.header_text = specs.header_text - self.footer_text = specs.footer_text - self.no_results_found_text = specs.no_results_found_text + def __init__(self, report_config: ReportConfig) -> None: + self.webhook_url = report_config.discord["webhook"] + self.hide_filters = report_config.hide_filters + self.header_text = report_config.header_text + self.footer_text = report_config.footer_text + self.no_results_found_text = report_config.no_results_found_text def send(self, search_report: list, report_date: str = None): """Parse the content, and send message to Discord""" @@ -73,4 +76,4 @@ def _remove_html_tags(self, text): # Define a regular expression pattern to match HTML tags clean = re.compile('<.*?>') # Substitute HTML tags with an empty string - return re.sub(clean, '', text) \ No newline at end of file + return re.sub(clean, '', text) diff --git a/src/notification/email_sender.py b/src/notification/email_sender.py index 021d5f1..fd177e8 100644 --- a/src/notification/email_sender.py +++ b/src/notification/email_sender.py @@ -1,7 +1,10 @@ +"""Module for sending emails. +""" + import os import sys -import textwrap from tempfile import NamedTemporaryFile +import textwrap import markdown import pandas as pd @@ -14,12 +17,17 @@ sys.path.insert(0, parent_dir) from notification.isender import ISender +from schemas import ReportConfig class EmailSender(ISender): + """Prepare and send e-mails with the reports.""" + highlight_tags = ("", "") - def __init__(self, specs) -> None: - self.specs = specs + + def __init__(self, report_config: ReportConfig) -> None: + self.report_config = report_config + self.search_report = "" self.watermark = """

Esta pesquisa foi realizada automaticamente pelo Ro-DOU @@ -29,27 +37,27 @@ def __init__(self, specs) -> None: def send(self, search_report: list, report_date: str): """Builds the email content, the CSV if applies, and send it""" self.search_report = search_report - full_subject = f"{self.specs.subject} - DOs de {report_date}" + full_subject = f"{self.report_config.subject} - DOs de {report_date}" skip_notification = True for search in self.search_report: items = ["contains" for k, v in search["result"].items() if v] if items: skip_notification = False else: - content = self.specs.no_results_found_text + content = self.report_config.no_results_found_text if skip_notification: - if self.specs.skip_null: + if self.report_config.skip_null: return "skip_notification" else: content = self.generate_email_content() content += self.watermark - if self.specs.attach_csv and skip_notification is False: + if self.report_config.attach_csv and skip_notification is False: with self.get_csv_tempfile() as csv_file: send_email( - to=self.specs.emails, + to=self.report_config.emails, subject=full_subject, files=[csv_file.name], html_content=content, @@ -57,7 +65,7 @@ def send(self, search_report: list, report_date: str): ) else: send_email( - to=self.specs.emails, + to=self.report_config.emails, subject=full_subject, html_content=content, mime_charset="utf-8", @@ -72,18 +80,18 @@ def generate_email_content(self) -> str: parent_directory = os.path.dirname(current_directory) file_path = os.path.join(parent_directory, "report_style.css") - with open(file_path, "r") as f: + with open(file_path, "r", encoding="utf-8") as f: blocks = [f""] - if self.specs.header_text: - blocks.append(self.specs.header_text) + if self.report_config.header_text: + blocks.append(self.report_config.header_text) for search in self.search_report: if search["header"]: blocks.append(f"

{search['header']}

") - if not self.specs.hide_filters: + if not self.report_config.hide_filters: if search["department"]: blocks.append( """

Filtrando resultados somente para:

""" @@ -95,47 +103,50 @@ def generate_email_content(self) -> str: for group, search_results in search["result"].items(): - if not search_results: - blocks.append( - f"

{self.specs.no_results_found_text}.

" - ) + if not results: + blocks.append(f"

{self.report_config.no_results_found_text}.

") else: - if not self.specs.hide_filters: - if group != "single_group": - blocks.append("\n") - blocks.append(f"**Grupo: {group}**") - blocks.append("\n\n") - - for term, term_results in search_results.items(): + if not self.report_config.hide_filters: + if group != "single_group": blocks.append("\n") - if not self.specs.hide_filters: - blocks.append(f"* # Resultados para: {term}") - - for department, results in term_results.items(): - - if not self.specs.hide_filters and department != 'single_department': - blocks.append(f"**{department}**") - - for result in results: - if not self.specs.hide_filters: - sec_desc = result["section"] - item_html = f""" -

{sec_desc}

- ### [{result['title']}]({result['href']}) -

{result['abstract']}

-

{result['date']}

""" - blocks.append( - textwrap.indent(textwrap.dedent(item_html), " " * 4) + blocks.append(f"**Grupo: {group}**") + blocks.append("\n\n") + + for term, term_results in results.items(): + blocks.append("\n") + if not self.report_config.hide_filters: + blocks.append(f"* # Resultados para: {term}") + + for department, results in term_results.items(): + + if ( + not self.report_config.hide_filters + and department != "single_department" + ): + blocks.append(f"**{department}**") + + for result in results: + if not self.report_config.hide_filters: + sec_desc = result["section"] + item_html = f""" +

{sec_desc}

+ ### [{result['title']}]({result['href']}) +

{result['abstract']}

+

{result['date']}

""" + blocks.append( + textwrap.indent( + textwrap.dedent(item_html), " " * 4 ) - else: - item_html = f""" - ### [{result['title']}]({result['href']}) -

{result['abstract']}



""" - blocks.append(textwrap.dedent(item_html)) + ) + else: + item_html = f""" + ### [{result['title']}]({result['href']}) +

{result['abstract']}



""" + blocks.append(textwrap.dedent(item_html)) blocks.append("---") - if self.specs.footer_text: - blocks.append(self.specs.footer_text) + if self.report_config.footer_text: + blocks.append(self.report_config.footer_text) return markdown.markdown("\n".join(blocks)) @@ -188,11 +199,15 @@ def convert_report_dict_to_tuple_list(self) -> list: for term, departments in results.items(): for department, dpt_matches in departments.items(): for match in dpt_matches: - tuple_list.append(repack_match(header, group, term, department, match)) + tuple_list.append( + repack_match(header, group, term, department, match) + ) return tuple_list -def repack_match(header: str, group: str, search_term: str, department: str, match: dict) -> tuple: +def repack_match( + header: str, group: str, search_term: str, department: str, match: dict +) -> tuple: return ( header, group, diff --git a/src/notification/notifier.py b/src/notification/notifier.py index 84f8264..604801d 100644 --- a/src/notification/notifier.py +++ b/src/notification/notifier.py @@ -26,15 +26,21 @@ class Notifier: def __init__(self, specs: DAGConfig) -> None: self.senders = [] - if specs.emails: - self.senders.append(EmailSender(specs)) - if specs.discord_webhook: - self.senders.append(DiscordSender(specs)) - if specs.slack_webhook: - self.senders.append(SlackSender(specs)) + if specs.report.emails: + self.senders.append(EmailSender(specs.report)) + if specs.report.discord: + self.senders.append(DiscordSender(specs.report)) + if specs.report.slack: + self.senders.append(SlackSender(specs.report)) def send_notification(self, search_report: str, report_date: str): + """Sends the notification to the specified email, Discord or Slack + + Args: + search_report (str): The report to be sent + report_date (str): The date of the report + """ # Convert to data structure after it's retrieved from xcom search_report = ast.literal_eval(search_report) diff --git a/src/notification/slack_sender.py b/src/notification/slack_sender.py index 3158f49..8b636d5 100644 --- a/src/notification/slack_sender.py +++ b/src/notification/slack_sender.py @@ -1,20 +1,27 @@ +"""Send reports to Slack. +""" + from datetime import datetime +import re import requests -import re from notification.isender import ISender +from schemas import ReportConfig + class SlackSender(ISender): + """Prepare a report and send it to Slack. + """ highlight_tags = ("*", "*") - def __init__(self, specs) -> None: - self.webhook_url = specs.slack_webhook + def __init__(self, report_config: ReportConfig) -> None: + self.webhook_url = report_config.slack["webhook"] self.blocks = [] - self.hide_filters = specs.hide_filters - self.header_text = specs.header_text - self.footer_text = specs.footer_text - self.no_results_found_text = specs.no_results_found_text + self.hide_filters = report_config.hide_filters + self.header_text = report_config.header_text + self.footer_text = report_config.footer_text + self.no_results_found_text = report_config.no_results_found_text def send(self, search_report: list, report_date: str = None): """Parse the content, and send message to Slack""" @@ -42,7 +49,7 @@ def send(self, search_report: list, report_date: str = None): for department, results in term_results.items(): if not self.hide_filters and department != 'single_department': - self._add_header(f"{department}") + self._add_header(f"{department}") for result in results: self._add_block(result) @@ -117,8 +124,9 @@ def _format_date(date_str: str) -> str: _from, _to = WEEKDAYS_EN_TO_PT[date.weekday()] return date.strftime("%a %d/%m").replace(_from, _to) + def _remove_html_tags(text): # Define a regular expression pattern to match HTML tags - clean = re.compile('<.*?>') + clean = re.compile("<.*?>") # Substitute HTML tags with an empty string - return re.sub(clean, '', text) \ No newline at end of file + return re.sub(clean, "", text) diff --git a/src/parsers.py b/src/parsers.py index 269f2d5..f895c23 100644 --- a/src/parsers.py +++ b/src/parsers.py @@ -1,64 +1,18 @@ """Abstract and concrete classes to parse DAG configuration from a file.""" -import ast -import os +# from dataclasses import dataclass +import json import textwrap -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import List, Set, Tuple, Union - +from typing import List, Tuple import yaml + from airflow import Dataset from airflow.models import Variable -@dataclass -class SearchConfig: - header: str - sources: List[str] - territory_id: int - dou_sections: List[str] - field: str - search_date: str - is_exact_search: bool - ignore_signature_match: bool - force_rematch: bool - full_text: bool - use_summary: bool - terms: List[str] - sql: str - conn_id: str - department: List[str] - - -@dataclass -class DAGConfig: - dag_id: str - search: List[SearchConfig] - emails: List[str] - subject: str - attach_csv: bool - discord_webhook: str - slack_webhook: str - schedule: str - dataset: str - description: str - skip_null: bool - doc_md: str - dag_tags: Set[str] - owner: str - hide_filters: bool - header_text: str - footer_text: str - no_results_found_text: str - - -class FileParser(ABC): - """Abstract class to build file parsers with DAG configuration.""" - - @abstractmethod - def parse(self): - pass -class YAMLParser(FileParser): +from schemas import RoDouConfig, DAGConfig + + +class YAMLParser: """Parses YAML file and get the DAG parameters. It guarantees that mandatory fields are in place and are properly @@ -68,16 +22,20 @@ class YAMLParser(FileParser): def __init__(self, filepath: str): self.filepath = filepath - def parse(self) -> DAGConfig: - return self._parse_yaml() + def read(self) -> dict: + """Reads the contents of the YAML file.""" + with open(self.filepath, "r", encoding="utf-8") as file: + dag_config_dict = yaml.safe_load(file) + return dag_config_dict - def _parse_yaml(self) -> DAGConfig: + def parse(self) -> DAGConfig: """Processes the config file in order to instantiate the DAG in Airflow. """ - with open(self.filepath, "r") as file: - dag_config_dict = yaml.safe_load(file) + config = RoDouConfig(**self.read()) + return config.dag + # TODO: remove old parser code dag = self._try_get(dag_config_dict, "dag") dag_id = self._try_get(dag, "id") description = self._try_get(dag, "description") @@ -169,7 +127,7 @@ def _get_terms_params(self, search) -> Tuple[List[str], str, str]: if "from_airflow_variable" in terms: var_value = Variable.get(terms.get("from_airflow_variable")) try: - terms = ast.literal_eval(var_value) + terms = json.loads(var_value) except (ValueError, SyntaxError): terms = var_value.splitlines() elif "from_db_select" in terms: diff --git a/src/schemas.py b/src/schemas.py new file mode 100644 index 0000000..196bdb5 --- /dev/null +++ b/src/schemas.py @@ -0,0 +1,229 @@ +""" +This module defines the Pydantic models for validating the structure of +the YAML files used in the application. + +The main classes are: + +- `SearchTerms`: search terms in the YAML file. +- `Search`: search configuration in the YAML file. +- `Report`: report configuration in the YAML file. +- `DAG`: DAG defined in the YAML file. +- `Config`: overall configuration in the YAML file. + +These models are used to validate the YAML files using the Pydantic +library. +""" + +import textwrap +from typing import List, Optional, Set, Union +from pydantic import AnyHttpUrl, BaseModel, EmailStr, Field +from pydantic import field_validator + + +class DBSelect(BaseModel): + """Represents the structure of the 'from_db_select' field in the YAML file.""" + + sql: str = Field(description="SQL query to fetch the search terms") + conn_id: str = Field(description="Airflow connection ID to use for the SQL query") + + +class FetchTermsConfig(BaseModel): + """Represents configuration information for fetching search terms from + a data source.""" + + from_airflow_variable: Optional[str] = Field( + default=None, + description="Variável do Airflow a ser usada como termos de pesquisa", + ) + from_db_select: Optional[DBSelect] = Field( + default=None, + description="Consulta SQL para buscar os termos de pesquisa em um " + "banco de dados", + ) + + +class SearchField(BaseModel): + """Represents the field for search in the YAML file.""" + + description: str + value: str + + +class SearchConfig(BaseModel): + """Represents the search configuration in the YAML file.""" + + header: Optional[str] = Field( + default=None, description="Cabeçalho da consulta de pesquisa" + ) + sources: Optional[List[str]] = Field( + default=["DOU"], + description="Lista de fontes de dados para pesquisar (Querido Diário [QD], " + "Diário Oficial da União [DOU], INLABS). Default: DOU.", + ) + territory_id: Optional[int] = Field( + default=None, + description="ID do território no Querido Diário para filtragem " + "baseada em localização", + ) + date: Optional[str] = Field( + default="DIA", + description="Intervalo de data para busca. Valores: DIA, SEMANA, " + "MES, ANO. Default: DIA", + ) + dou_sections: Optional[List[str]] = Field( + default=["TODOS"], + description=textwrap.dedent( + """ + Seção do Diário Oficial a procurar: + + - SECAO_1 + - SECAO_2 + - SECAO_3 + - EDICAO_EXTRA + - EDICAO_EXTRA_1A + - EDICAO_EXTRA_1B + - EDICAO_EXTRA_1D + - EDICAO_EXTRA_2A + - EDICAO_EXTRA_2B + - EDICAO_EXTRA_2D + - EDICAO_EXTRA_3A + - EDICAO_EXTRA_3B + - EDICAO_EXTRA_3D + - EDICAO_SUPLEMENTAR + - TODOS + + Default: TODOS + """ + ), + ) + department: Optional[List[str]] = Field( + default=None, description="Lista de departamentos para filtrar a pesquisa" + ) + terms: Union[List[str], FetchTermsConfig] = Field( + description="Lista de termos de pesquisa ou uma forma de buscá-los" + ) + field: Optional[str] = Field( + default="TUDO", + description="Campos dos quais os termos devem ser pesquisados. " + "Valores: TUDO, TITULO, CONTEUDO. Default: TUDO", + ) + is_exact_search: Optional[bool] = Field( + default=True, + description="Busca somente o termo exato. Valores: True ou False. " + "Default: True.", + ) + ignore_signature_match: Optional[bool] = Field( + default=False, + description="Busca somente o termo exato. Valores: True ou False. " + "Default: True.", + ) + force_rematch: Optional[bool] = Field( + default=False, + description="Indica que a busca deve ser forçada, mesmo que já " + "tenha sido feita anteriormente. Valores: True ou False. " + "Default: False.", + ) + full_text: Optional[bool] = Field( + default=False, + description="Define se no relatório será exibido o texto completo, " + "ao invés de um resumo. Valores: True ou False. Default: False. " + "(Funcionalidade disponível apenas no INLABS)", + ) + use_summary: Optional[bool] = Field( + default=False, + description="Define se no relatório será exibido a ementa, se existir. " + "Valores: True ou False. Default: False. " + "(Funcionalidade disponível apenas no INLABS)", + ) + + +class ReportConfig(BaseModel): + """Represents the report configuration in the YAML file.""" + + slack: Optional[dict] = Field( + default=None, description="Configuração do webhook do Slack para relatórios" + ) + discord: Optional[dict] = Field( + default=None, description="Configuração do webhook do Discord para relatórios" + ) + emails: Optional[List[EmailStr]] = Field( + default=None, description="Lista de endereços de e-mail para enviar o relatório" + ) + attach_csv: Optional[bool] = Field( + default=False, + description="Se deve anexar um arquivo CSV com os resultados da pesquisa." + "Default: False.", + ) + subject: Optional[str] = Field( + default=None, description="Assunto do relatório por e-mail" + ) + skip_null: Optional[bool] = Field( + default=True, + description="Se deve pular a notificação de resultados nulos/vazios. " + "Default: True.", + ) + hide_filters: Optional[bool] = Field( + default=False, + description="Se deve ocultar os filtros aplicados no relatório." + "Default: False.", + ) + header_text: Optional[str] = Field( + default=None, description="Texto a ser incluído no cabeçalho do relatório" + ) + footer_text: Optional[str] = Field( + default=None, description="Texto a ser incluído no rodapé do relatório" + ) + no_results_found_text: Optional[str] = Field( + default="Nenhum dos termos pesquisados foi encontrado nesta consulta", + description="Texto a ser exibido quando não há resultados", + ) + + +class DAGConfig(BaseModel): + """Represents the DAG configuration in the YAML file.""" + + id: str = Field(description="Nome único da DAG") + description: str = Field(description="Descrição da DAG") + tags: Optional[Set[str]] = Field( + default={"dou", "generated_dag"}, + description="Conjunto de tags para filtragem da DAG no Airflow", + ) + owner: Optional[List[str]] = Field( + default=[], description="Lista de owners para filtragem da DAG no Airflow" + ) + schedule: Optional[str] = Field(default=None, description="Expressão cron") + dataset: Optional[str] = Field(default=None, description="Nome do Dataset") + search: Union[List[SearchConfig], SearchConfig] = Field( + description="Seção para definição da busca no Diário" + ) + doc_md: Optional[str] = Field(default=None, description="description") + report: ReportConfig = Field( + description="Aceita: `slack`, `discord`, `emails`, `attach_csv`, " + "`subject`, `skip_null`" + ) + + @field_validator("search") + @staticmethod + def cast_to_list( + search_param: Union[List[SearchConfig], SearchConfig] + ) -> List[SearchConfig]: + """Cast the value of "search" parameter to always be a list. + If the yaml configuration file does not use a list, convert to + a list with a single search. + """ + if not isinstance(search_param, list): + return [search_param] + return search_param + + @field_validator("tags") + @staticmethod + def add_default_tags(tags_param: Optional[Set[str]]) -> Set[str]: + """Add default tags to the list of tags.""" + tags_param.update({"dou", "generated_dag"}) + return tags_param + + +class RoDouConfig(BaseModel): + """Represents the overall configuration in the YAML file.""" + + dag: DAGConfig = Field(description="Instanciação da DAG") diff --git a/tests/discord_sender_test.py b/tests/discord_sender_test.py index 071c3ac..1d3c18a 100644 --- a/tests/discord_sender_test.py +++ b/tests/discord_sender_test.py @@ -12,7 +12,7 @@ def mocked_specs(): Specs = namedtuple( "Specs", [ - "discord_webhook", + "discord", "hide_filters", "header_text", "footer_text", @@ -20,7 +20,7 @@ def mocked_specs(): ], ) return Specs( - WEBHOOK, + {"webhook": WEBHOOK}, False, None, None, diff --git a/tests/parsers_test.py b/tests/parsers_test.py index 442f1ed..ef2bb4e 100644 --- a/tests/parsers_test.py +++ b/tests/parsers_test.py @@ -5,7 +5,7 @@ import sys import inspect import textwrap -import yaml + import pytest currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) @@ -14,14 +14,14 @@ from dou_dag_generator import DouDigestDagGenerator, YAMLParser, DAGConfig - @pytest.mark.parametrize( "filepath, result_tuple", [ ( "basic_example.yaml", { - "dag_id": "basic_example", + "id": "basic_example", + "description": "DAG de teste", "search": [ { "terms": [ @@ -39,34 +39,43 @@ "field": "TUDO", "is_exact_search": True, "ignore_signature_match": False, - "force_rematch": None, - "full_text": None, - "use_summary": None, + "force_rematch": False, + "full_text": False, + "use_summary": False, "department": None, } ], - "emails": ["destination@economia.gov.br"], - "subject": "Teste do Ro-dou", - "attach_csv": False, - "discord_webhook": None, - "slack_webhook": None, - "schedule": None, - "dataset": None, - "description": "DAG de teste", - "skip_null": True, - "doc_md": None, - "dag_tags": {"dou", "generated_dag"}, - "owner": "", - "hide_filters": False, - "header_text": None, - "footer_text": None, - "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + "report": { + "emails": ["destination@economia.gov.br"], + "subject": "Teste do Ro-dou", + "attach_csv": False, + "discord_webhook": None, + "slack_webhook": None, + "schedule": None, + "dataset": None, + "description": "DAG de teste", + "skip_null": True, + "doc_md": None, + "tags": {"dou", "generated_dag"}, + "owner": [], + "hide_filters": False, + "header_text": None, + "footer_text": None, + "no_results_found_text": "Nenhum dos termos pesquisados " + "foi encontrado nesta consulta", + }, }, ), ( "all_parameters_example.yaml", { - "dag_id": "all_parameters_example", + "id": "all_parameters_example", + "schedule": "0 8 * * MON-FRI", + "dataset": None, + "description": "DAG exemplo utilizando todos os demais parâmetros.", + "doc_md": None, + "tags": {"dou", "generated_dag", "projeto_a", "departamento_x"}, + "owner": ["pessoa 1", "pessoa 2"], "search": [ { "terms": [ @@ -74,13 +83,13 @@ "governo aberto", "lei de acesso à informação", ], - "header": None, + "header": "Pesquisa no DOU", "sources": ["DOU"], "sql": None, "conn_id": None, "territory_id": None, "dou_sections": ["SECAO_1", "EDICAO_SUPLEMENTAR"], - "search_date": "MES", + "date": "MES", "field": "TUDO", "is_exact_search": True, "ignore_signature_match": True, @@ -90,76 +99,85 @@ "department": None, } ], - "emails": ["dest1@economia.gov.br", "dest2@economia.gov.br"], - "subject": "Assunto do Email", - "attach_csv": True, - "discord_webhook": None, - "slack_webhook": None, - "schedule": "0 8 * * MON-FRI", - "dataset": None, - "description": "DAG exemplo utilizando todos os demais parâmetros.", - "skip_null": True, - "doc_md": None, - "dag_tags": {"dou", "generated_dag", "projeto_a", "departamento_x"}, - "owner": "pessoa 1, pessoa 2", - "hide_filters": False, - "header_text": None, - "footer_text": None, - "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + "report": { + "skip_null": True, + "emails": ["dest1@economia.gov.br", "dest2@economia.gov.br"], + "subject": "Assunto do Email", + "attach_csv": True, + "discord_webhook": None, + "slack_webhook": None, + "hide_filters": False, + "header_text": None, + "footer_text": None, + "no_results_found_text": "Nenhum dos termos pesquisados foi " + "encontrado nesta consulta", + }, }, ), ( "terms_from_db_example.yaml", { - "dag_id": "terms_from_db_example", + "id": "terms_from_db_example", + "description": "DAG de teste", + "doc_md": None, + "tags": {"dou", "generated_dag"}, + "owner": [], + "schedule": None, + "dataset": None, "search": [ { - "terms": [], + "terms": { + "from_airflow_variable": None, + "from_db_select": { + "sql": ( + "SELECT 'cloroquina' as TERMO, 'Ações inefetivas' as GRUPO " + "UNION SELECT 'ivermectina' as TERMO, 'Ações inefetivas' as GRUPO " + "UNION SELECT 'vacina contra covid' as TERMO, 'Ações efetivas' as GRUPO " + "UNION SELECT 'higienização das mãos' as TERMO, 'Ações efetivas' as GRUPO " + "UNION SELECT 'uso de máscara' as TERMO, 'Ações efetivas' as GRUPO " + "UNION SELECT 'distanciamento social' as TERMO, 'Ações efetivas' as GRUPO\n" + ), + "conn_id": "example_database_conn", + } + }, "header": None, "sources": ["DOU"], - "sql": ( - "SELECT 'cloroquina' as TERMO, 'Ações inefetivas' as GRUPO " - "UNION SELECT 'ivermectina' as TERMO, 'Ações inefetivas' as GRUPO " - "UNION SELECT 'vacina contra covid' as TERMO, 'Ações efetivas' as GRUPO " - "UNION SELECT 'higienização das mãos' as TERMO, 'Ações efetivas' as GRUPO " - "UNION SELECT 'uso de máscara' as TERMO, 'Ações efetivas' as GRUPO " - "UNION SELECT 'distanciamento social' as TERMO, 'Ações efetivas' as GRUPO\n" - ), - "conn_id": "example_database_conn", "territory_id": None, "dou_sections": ["TODOS"], - "search_date": "MES", + "date": "MES", "field": "TUDO", "is_exact_search": True, "ignore_signature_match": False, - "force_rematch": None, - "full_text": None, - "use_summary": None, + "force_rematch": False, + "full_text": False, + "use_summary": False, "department": None, } ], - "emails": ["destination@economia.gov.br"], - "subject": "[String] com caracteres especiais deve estar entre aspas", - "attach_csv": True, - "discord_webhook": None, - "slack_webhook": None, - "schedule": None, - "dataset": None, - "description": "DAG de teste", - "skip_null": True, - "doc_md": None, - "dag_tags": {"dou", "generated_dag"}, - "owner": "", - "hide_filters": False, - "header_text": None, - "footer_text": None, - "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + "report": { + "emails": ["destination@economia.gov.br"], + "subject": "[String] com caracteres especiais deve estar entre aspas", + "attach_csv": True, + "discord_webhook": None, + "slack_webhook": None, + "skip_null": True, + "hide_filters": False, + "header_text": None, + "footer_text": None, + "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + }, }, ), ( "basic_example_skip_null.yaml", { - "dag_id": "basic_example_skip_null", + "id": "basic_example_skip_null", + "schedule": None, + "dataset": None, + "description": "DAG de teste", + "doc_md": None, + "tags": {"dou", "generated_dag"}, + "owner": [], "search": [ { "terms": ["cimentodaaroeira"], @@ -169,38 +187,48 @@ "conn_id": None, "territory_id": None, "dou_sections": ["TODOS"], - "search_date": "DIA", + "date": "DIA", "field": "TUDO", "is_exact_search": True, "ignore_signature_match": False, - "force_rematch": None, - "full_text": None, - "use_summary": None, + "force_rematch": False, + "full_text": False, + "use_summary": False, "department": None, } ], - "emails": ["destination@economia.gov.br"], - "subject": "Teste do Ro-dou", - "attach_csv": False, - "discord_webhook": None, - "slack_webhook": None, - "schedule": None, - "dataset": None, - "description": "DAG de teste", - "skip_null": False, - "doc_md": None, - "dag_tags": {"dou", "generated_dag"}, - "owner": "", - "hide_filters": False, - "header_text": None, - "footer_text": None, - "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + "report": { + "emails": ["destination@economia.gov.br"], + "subject": "Teste do Ro-dou", + "attach_csv": False, + "discord_webhook": None, + "slack_webhook": None, + "skip_null": False, + "hide_filters": False, + "header_text": None, + "footer_text": None, + "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + }, }, ), ( "markdown_docs_example.yaml", { - "dag_id": "markdown_docs_example", + "id": "markdown_docs_example", + "schedule": None, + "dataset": None, + "description": "DAG com documentação em markdown", + "doc_md": textwrap.dedent( + """ + ## Ola! + Esta é uma DAG de exemplo com documentação em markdown. Esta descrição é opcional e pode ser definida no parâmetro `doc_md`. + + * Ah, aqui você também pode usar *markdown* para + * escrever listas, por exemplo, + * ou colocar [links](graph)!""" + ).strip(), + "tags": {"dou", "generated_dag"}, + "owner": [], "search": [ { "terms": [ @@ -214,46 +242,40 @@ "conn_id": None, "territory_id": None, "dou_sections": ["TODOS"], - "search_date": "DIA", + "date": "DIA", "field": "TUDO", "is_exact_search": True, "ignore_signature_match": False, - "force_rematch": None, - "full_text": None, - "use_summary": None, + "force_rematch": False, + "full_text": False, + "use_summary": False, "department": None, } ], - "emails": ["destination@economia.gov.br"], - "subject": "Teste do Ro-dou", - "attach_csv": False, - "discord_webhook": None, - "slack_webhook": None, - "schedule": None, - "dataset": None, - "description": "DAG com documentação em markdown", - "skip_null": True, - "doc_md": textwrap.dedent( - """ - ## Ola! - Esta é uma DAG de exemplo com documentação em markdown. Esta descrição é opcional e pode ser definida no parâmetro `doc_md`. - - * Ah, aqui você também pode usar *markdown* para - * escrever listas, por exemplo, - * ou colocar [links](graph)!""" - ).strip(), - "dag_tags": {"dou", "generated_dag"}, - "owner": "", - "hide_filters": False, - "header_text": None, - "footer_text": None, - "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + "report": { + "emails": ["destination@economia.gov.br"], + "subject": "Teste do Ro-dou", + "attach_csv": False, + "skip_null": True, + "discord_webhook": None, + "slack_webhook": None, + "hide_filters": False, + "header_text": None, + "footer_text": None, + "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + }, }, ), ( "department_example.yaml", { - "dag_id": "department_example", + "id": "department_example", + "schedule": None, + "dataset": None, + "description": "DAG de teste (filtro por departamento)", + "doc_md": None, + "tags": {"dou", "generated_dag"}, + "owner": [], "search": [ { "terms": ["dados abertos"], @@ -263,41 +285,43 @@ "conn_id": None, "territory_id": None, "dou_sections": ["TODOS"], - "search_date": "DIA", + "date": "DIA", "field": "TUDO", "is_exact_search": True, "ignore_signature_match": False, - "force_rematch": None, - "full_text": None, - "use_summary": None, + "force_rematch": False, + "full_text": False, + "use_summary": False, "department": [ "Ministério da Gestão e da Inovação em Serviços Públicos", "Ministério da Defesa", ], } ], - "emails": ["destination@economia.gov.br"], - "subject": "Teste do Ro-dou", - "attach_csv": False, - "discord_webhook": None, - "slack_webhook": None, - "schedule": None, - "dataset": None, - "description": "DAG de teste (filtro por departamento)", - "skip_null": True, - "doc_md": None, - "dag_tags": {"dou", "generated_dag"}, - "owner": "", - "hide_filters": False, - "header_text": None, - "footer_text": None, - "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + "report": { + "emails": ["destination@economia.gov.br"], + "subject": "Teste do Ro-dou", + "attach_csv": False, + "skip_null": True, + "discord_webhook": None, + "slack_webhook": None, + "hide_filters": False, + "header_text": None, + "footer_text": None, + "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + }, }, ), ( "inlabs_example.yaml", { - "dag_id": "inlabs_example", + "id": "inlabs_example", + "schedule": "0 8 * * MON-FRI", + "dataset": "inlabs", + "description": "DAG de teste", + "doc_md": None, + "tags": {"dou", "generated_dag", "inlabs"}, + "owner": ["cdata"], "search": [ { "terms": ["tecnologia", "informação"], @@ -307,38 +331,41 @@ "conn_id": None, "territory_id": None, "dou_sections": ["TODOS"], - "search_date": "DIA", + "date": "DIA", "field": "TUDO", "is_exact_search": True, "ignore_signature_match": False, - "force_rematch": None, - "full_text": None, + "force_rematch": False, + "full_text": False, "use_summary": True, "department": None, } ], - "emails": ["destination@economia.gov.br"], - "subject": "Teste do Ro-dou", - "attach_csv": True, - "discord_webhook": None, - "slack_webhook": None, - "schedule": "0 8 * * MON-FRI", - "dataset": "inlabs", - "description": "DAG de teste", - "skip_null": True, - "doc_md": None, - "dag_tags": {"dou", "generated_dag", "inlabs"}, - "owner": "cdata", - "hide_filters": False, - "header_text": None, - "footer_text": None, - "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + "report": { + "emails": ["destination@economia.gov.br"], + "subject": "Teste do Ro-dou", + "attach_csv": True, + "skip_null": True, + "discord_webhook": None, + "slack_webhook": None, + "hide_filters": False, + "header_text": None, + "footer_text": None, + "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + }, }, ), ( "inlabs_advanced_search_example.yaml", { - "dag_id": "inlabs_advanced_search_example", + "id": "inlabs_advanced_search_example", + "schedule": None, + "dataset": "inlabs", + "description": "DAG de teste", + "skip_null": True, + "doc_md": None, + "tags": {"dou", "generated_dag", "inlabs"}, + "owner": ["cdata"], "search": [ { "terms": [ @@ -351,38 +378,39 @@ "conn_id": None, "territory_id": None, "dou_sections": ["TODOS"], - "search_date": "DIA", + "date": "DIA", "field": "TUDO", "is_exact_search": True, "ignore_signature_match": False, - "force_rematch": None, - "full_text": None, - "use_summary": None, + "force_rematch": False, + "full_text": False, + "use_summary": False, "department": None, } ], - "emails": ["destination@economia.gov.br"], - "subject": "Teste do Ro-dou", - "attach_csv": True, - "discord_webhook": None, - "slack_webhook": None, - "schedule": None, - "dataset": "inlabs", - "description": "DAG de teste", - "skip_null": True, - "doc_md": None, - "dag_tags": {"dou", "generated_dag", "inlabs"}, - "owner": "cdata", - "hide_filters": False, - "header_text": None, - "footer_text": None, - "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + "report": { + "emails": ["destination@economia.gov.br"], + "subject": "Teste do Ro-dou", + "attach_csv": True, + "discord_webhook": None, + "slack_webhook": None, + "hide_filters": False, + "header_text": None, + "footer_text": None, + "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + }, }, ), ( "multiple_searchs_example.yaml", { - "dag_id": "multiple_searchs_example", + "id": "multiple_searchs_example", + "schedule": "0 8 * * MON-FRI", + "dataset": None, + "description": "DAG de teste com múltiplas buscas", + "doc_md": None, + "tags": {"dou", "generated_dag", "inlabs"}, + "owner": [], "search": [ { "terms": [ @@ -396,13 +424,13 @@ "conn_id": None, "territory_id": None, "dou_sections": ["TODOS"], - "search_date": "DIA", + "date": "DIA", "field": "TUDO", "is_exact_search": True, "ignore_signature_match": True, "force_rematch": True, - "full_text": None, - "use_summary": None, + "full_text": False, + "use_summary": False, "department": None, }, { @@ -417,38 +445,40 @@ "conn_id": None, "territory_id": None, "dou_sections": ["TODOS"], - "search_date": "DIA", + "date": "DIA", "field": "TUDO", "is_exact_search": True, "ignore_signature_match": True, "force_rematch": True, - "full_text": None, - "use_summary": None, + "full_text": False, + "use_summary": False, "department": None, }, ], - "emails": ["destination@economia.gov.br"], - "subject": "Teste do Ro-dou", - "attach_csv": False, - "discord_webhook": None, - "slack_webhook": None, - "schedule": "0 8 * * MON-FRI", - "dataset": None, - "description": "DAG de teste com múltiplas buscas", - "skip_null": False, - "doc_md": None, - "dag_tags": {"dou", "generated_dag", "inlabs"}, - "owner": "", - "hide_filters": False, - "header_text": None, - "footer_text": None, - "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + "report": { + "emails": ["destination@economia.gov.br"], + "subject": "Teste do Ro-dou", + "attach_csv": False, + "skip_null": False, + "discord_webhook": None, + "slack_webhook": None, + "hide_filters": False, + "header_text": None, + "footer_text": None, + "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + }, }, ), ( "hide_filters_example.yaml", { - "dag_id": "hide_filters_example", + "id": "hide_filters_example", + "schedule": "0 8 * * MON-FRI", + "dataset": None, + "description": "DAG de teste", + "doc_md": None, + "tags": {"dou", "inlabs", "generated_dag"}, + "owner": [], "search": [ { "terms": ["tecnologia", "informação"], @@ -458,41 +488,43 @@ "conn_id": None, "territory_id": None, "dou_sections": ["TODOS"], - "search_date": "DIA", + "date": "DIA", "field": "TUDO", "is_exact_search": True, "ignore_signature_match": False, - "force_rematch": None, - "full_text": None, - "use_summary": None, + "force_rematch": False, + "full_text": False, + "use_summary": False, "department": [ "Ministério da Gestão e da Inovação em Serviços Públicos", "Ministério da Defesa", ], } ], - "emails": ["destination@economia.gov.br"], - "subject": "Teste do Ro-dou", - "attach_csv": True, - "discord_webhook": None, - "slack_webhook": None, - "schedule": "0 8 * * MON-FRI", - "dataset": None, - "description": "DAG de teste", - "skip_null": True, - "doc_md": None, - "dag_tags": {"dou", "inlabs", "generated_dag"}, - "owner": "", - "hide_filters": True, - "header_text": None, - "footer_text": None, - "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + "report": { + "emails": ["destination@economia.gov.br"], + "subject": "Teste do Ro-dou", + "attach_csv": True, + "skip_null": True, + "discord_webhook": None, + "slack_webhook": None, + "hide_filters": True, + "header_text": None, + "footer_text": None, + "no_results_found_text": "Nenhum dos termos pesquisados foi encontrado nesta consulta", + }, }, ), ( "header_and_footer_example.yaml", { - "dag_id": "header_and_footer_example", + "id": "header_and_footer_example", + "schedule": "0 8 * * MON-FRI", + "dataset": None, + "description": "DAG de teste", + "doc_md": None, + "tags": {"dou", "generated_dag"}, + "owner": [], "search": [ { "terms": ["tecnologia", "informação"], @@ -502,41 +534,36 @@ "conn_id": None, "territory_id": None, "dou_sections": ["TODOS"], - "search_date": "DIA", + "date": "DIA", "field": "TUDO", "is_exact_search": True, "ignore_signature_match": False, - "force_rematch": None, - "full_text": None, - "use_summary": None, + "force_rematch": False, + "full_text": False, + "use_summary": False, "department": None, } ], - "emails": ["destination@economia.gov.br"], - "subject": "Teste do Ro-dou", - "attach_csv": False, - "discord_webhook": None, - "slack_webhook": None, - "schedule": "0 8 * * MON-FRI", - "dataset": None, - "description": "DAG de teste", - "skip_null": True, - "doc_md": None, - "dag_tags": {"dou", "generated_dag"}, - "owner": "", - "hide_filters": False, - "header_text": "

Greetings

", - "footer_text": "

Best Regards

", - "no_results_found_text": "No results found", + "report": { + "emails": ["destination@economia.gov.br"], + "subject": "Teste do Ro-dou", + "attach_csv": False, + "skip_null": True, + "discord_webhook": None, + "slack_webhook": None, + "hide_filters": False, + "header_text": "

Greetings

", + "footer_text": "

Best Regards

", + "no_results_found_text": "No results found", + }, }, ), ], ) - def test_parse(filepath, result_tuple): filepath = os.path.join( DouDigestDagGenerator().YAMLS_DIR, "examples_and_tests", filepath ) parsed = YAMLParser(filepath=filepath).parse() - assert parsed == DAGConfig(**result_tuple) + assert parsed.model_dump() == DAGConfig(**result_tuple).model_dump() diff --git a/tests/test_validate_yaml_schemas.py b/tests/test_validate_yaml_schemas.py index 4cd91fc..fedfadd 100644 --- a/tests/test_validate_yaml_schemas.py +++ b/tests/test_validate_yaml_schemas.py @@ -1,36 +1,20 @@ -import json -import jsonschema -import pytest +"""Test validation of yaml files according to the defined schemas. +""" + import glob +import os +import sys + +from pydantic import ValidationError +import pytest import yaml -import requests -from urllib.parse import urlparse +# add module path so we can import from other modules +sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) +from schemas import RoDouConfig YAMLS_DIR = "../dags/ro_dou/dag_confs" -SCHEMA_FILEPATH = "../schemas/ro-dou.json" -# or -# SCHEMA_FILEPATH = "https://raw.githubusercontent.com/gestaogovbr/Ro-dou/main/schemas/ro-dou.json" - - -def get_schema(filepath): - def _is_valid_url(url): - try: - result = urlparse(url) - return all([result.scheme, result.netloc]) - except ValueError: - return False - - if _is_valid_url(filepath): - response = requests.get(filepath) - response.raise_for_status() - return json.loads(response.text) - else: - with open(filepath) as f: - return json.load(f) - -SCHEMA = get_schema(SCHEMA_FILEPATH) @pytest.mark.parametrize( "data_file", @@ -40,8 +24,11 @@ def _is_valid_url(url): + glob.glob(f"{YAMLS_DIR}/**/*.yaml", recursive=True) ], ) -def test_json_schema_validation(data_file): +def test_pydantic_validation(data_file): with open(data_file) as data_fp: data = yaml.safe_load(data_fp) - jsonschema.validate(instance=data, schema=SCHEMA) + try: + RoDouConfig(**data) + except ValidationError as e: + pytest.fail(f"YAML file {data_file} is not valid:\n{e}")