From b7a0f070f58cba20c60c087fb14b3f0de1045b4c Mon Sep 17 00:00:00 2001 From: Jarno Bakker Date: Tue, 23 Jul 2024 13:47:03 +0200 Subject: [PATCH] python linter cleanup --- src/import/article.py | 62 +++++++++++++++++++----------------- src/import/test_import.py | 10 +++--- src/service/app.py | 17 ++++++++-- src/service/elastic_index.py | 62 +++++++++++++++++++++++++++++++----- 4 files changed, 106 insertions(+), 45 deletions(-) diff --git a/src/import/article.py b/src/import/article.py index 3214f40..e3e3d41 100644 --- a/src/import/article.py +++ b/src/import/article.py @@ -13,25 +13,24 @@ class Article: Represents a single article. """ - def __init__(self, articleId, folder, path, from_raw, newsgroups, subject, message_id, date, x_gateway, lines, xref, - body, references): + def __init__(self, article_id, data): self.from_name = None self.from_email = None self.date = None - self.articleId = articleId - self.location = folder - self.path = path - self.newsgroups = newsgroups.split(',') - self.subject = subject - self.message_id = message_id - self.x_gateway = x_gateway - self.lines = lines - self.xref = xref - self.body = body - self.references = references - - self.set_from(from_raw) - self.set_date(date) + self.article_id = article_id + self.location = data['folder'] + self.path = data['path'] + self.newsgroups = data['newsgroups'].split(',') + self.subject = data['subject'] + self.message_id = data['message_id'] + self.x_gateway = data['x_gateway'] + self.lines = data['lines'] + self.xref = data['xref'] + self.body = data['body'] + self.references = data['references'] + + self.set_from(data['from_raw']) + self.set_date(data['date']) def set_date(self, date): """ @@ -78,7 +77,7 @@ def to_dict(self): :return: """ return { - 'id': self.articleId, + 'id': self.article_id, 'path': self.path, 'folder': self.location, 'from_name': self.from_name, @@ -126,18 +125,23 @@ def from_file(path): pathstr = str(path) path_parts = pathstr.split('/') article_id = '-'.join(path_parts[-3:]) + + data = { + 'path': msg['Path'], + 'folder': '/'.join(path_parts[-3:-1]), + 'from_raw': msg['From'], + 'newsgroups': msg.get('Newsgroups', ''), + 'subject': msg['Subject'], + 'message_id': msg['Message-ID'], + 'date': msg['Date'], + 'x_gateway': msg.get('X-Gateway', ''), + 'lines': msg['Lines'], + 'xref': msg['X-Reference'], + 'references': msg.get('References', ''), + 'body': body.strip(), + } + return Article( article_id, - '/'.join(path_parts[-3:-1]), - msg['Path'], - msg['From'], - msg.get('Newsgroups', ''), - msg['Subject'], - msg['Message-ID'], - msg['Date'], - msg.get('X-Gateway', ''), - msg['Lines'], - msg.get('Xref', ''), - body.strip(), - msg.get('References', '') + data ) diff --git a/src/import/test_import.py b/src/import/test_import.py index f193027..287e6bd 100755 --- a/src/import/test_import.py +++ b/src/import/test_import.py @@ -4,15 +4,16 @@ This is a test import script which imports data from zipped Usenet news files. """ +from elasticsearch.helpers import BulkIndexError -from article import Article import os import sys from pathlib import Path -from elasticsearch import Elasticsearch, helpers import tempfile import zipfile import concurrent.futures +from article import Article +from elasticsearch import Elasticsearch, helpers import yaml @@ -63,7 +64,6 @@ def create_mapping(name): client.indices.create( index=name, - ignore=400, body=settings, ) @@ -118,8 +118,8 @@ def process_bulk(filenames): for article in articles ] try: - res = helpers.bulk(client, bulk) - except BaseException as e: + helpers.bulk(client, bulk) + except BulkIndexError as e: print("Error!") print(e.__class__.__name__) print(e) diff --git a/src/service/app.py b/src/service/app.py index 0097725..54f197e 100644 --- a/src/service/app.py +++ b/src/service/app.py @@ -1,3 +1,8 @@ +""" +The Flask application, with routes for handling the +back-end of the application. +""" + import os from elastic_index import Index from flask import Flask, request, jsonify @@ -35,12 +40,13 @@ def catch_all(): return app.send_static_file("index.html") -@app.route("/detail/") -def detail(id): +@app.route("/detail/") +def detail(article_id): """ Return the front-end, pages are handled by React :return: """ + print(f"Requesting page for id: {article_id}") return app.send_static_file("index.html") @@ -66,7 +72,12 @@ def get_facet(): :return: """ struc = request.get_json() - ret_struc = index.get_facet(struc["name"], struc["amount"], struc["filter"], struc["searchvalues"]) + ret_struc = index.get_facet( + struc["name"], + struc["amount"], + struc["filter"], + struc["searchvalues"] + ) return jsonify(ret_struc) diff --git a/src/service/elastic_index.py b/src/service/elastic_index.py index 28c234b..e7eb4a4 100644 --- a/src/service/elastic_index.py +++ b/src/service/elastic_index.py @@ -1,8 +1,11 @@ -import json +""" +elastic_index.py +This includes class Index for dealing with Elasticsearch. +Contains methods for finding articles. +""" import yaml from elasticsearch import Elasticsearch -import string import math from werkzeug.exceptions import NotFound @@ -12,7 +15,13 @@ def __init__(self, config): self.config = config self.client = Elasticsearch([{"host": self.config["url"]}]) - def no_case(selfself, str_in): + @staticmethod + def no_case(str_in): + """ + Create query from string, case insensitive. + :param str_in: + :return: + """ str = str_in.strip() ret_str = "" if str != "": @@ -22,6 +31,11 @@ def no_case(selfself, str_in): @staticmethod def make_matches(searchvalues): + """ + Create match queries. + :param searchvalues: + :return: + """ must_collection = [] for item in searchvalues: if item["field"] == "FREE_TEXT": @@ -37,6 +51,14 @@ def make_matches(searchvalues): return must_collection def get_facet(self, field, amount, facet_filter, search_values): + """ + Get a facet. + :param field: + :param amount: + :param facet_filter: + :param search_values: + :return: + """ terms = { "field": field + ".keyword", "size": amount, @@ -46,8 +68,6 @@ def get_facet(self, field, amount, facet_filter, search_values): } if facet_filter: - # filtered_filter = facet_filter.translate(str.maketrans('', '', string.punctuation)) - # filtered_filter = ''.join([f"[{char.upper()}{char.lower()}]" for char in filtered_filter]) filtered_filter = ''.join([f"[{char.upper()}{char.lower()}]" for char in facet_filter]) terms["include"] = f'.*{filtered_filter}.*' @@ -72,6 +92,13 @@ def get_facet(self, field, amount, facet_filter, search_values): for hits in response["aggregations"]["names"]["buckets"]] def get_filter_facet(self, field, amount, facet_filter): + """ + Get a filter facet. + :param field: + :param amount: + :param facet_filter: + :return: + """ ret_array = [] response = self.client.search( index="articles", @@ -103,6 +130,13 @@ def get_filter_facet(self, field, amount, facet_filter): return ret_array def get_nested_facet(self, field, amount, facet_filter): + """ + Get a nested facet. + :param field: + :param amount: + :param facet_filter: + :return: + """ ret_array = [] path = field.split('.')[0] response = self.client.search( @@ -154,6 +188,13 @@ def get_min_max(self, fields): return tmp def browse(self, page, length, search_values): + """ + Search for articles. + :param page: + :param length: + :param search_values: + :return: + """ int_page = int(page) start = (int_page - 1) * length @@ -184,6 +225,11 @@ def browse(self, page, length, search_values): "items": [item["_source"] for item in response["hits"]["hits"]]} def get_facets(self): + """ + Get all facets. Parses the configuration YAML file for determining + what facets are available. + :return: + """ with open("fields.yaml", 'r') as stream: data = yaml.safe_load(stream) tmp = {} @@ -252,13 +298,13 @@ def get_replies(self, message_id): }) return response["hits"]['hits'] - def by_id(self, id): + def by_id(self, article_id): """ Get an article by id - :param id: + :param article_id: :return: """ - res = self.client.get(index='articles', id=id) + res = self.client.get(index='articles', id=article_id) if not res['found']: raise NotFound('Article not found')