Skip to content

Commit

Permalink
python linter cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
jarno-knaw committed Jul 23, 2024
1 parent e972051 commit b7a0f07
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 45 deletions.
62 changes: 33 additions & 29 deletions src/import/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,24 @@ class Article:
Represents a single article.
"""

def __init__(self, articleId, folder, path, from_raw, newsgroups, subject, message_id, date, x_gateway, lines, xref,
body, references):
def __init__(self, article_id, data):
self.from_name = None
self.from_email = None
self.date = None
self.articleId = articleId
self.location = folder
self.path = path
self.newsgroups = newsgroups.split(',')
self.subject = subject
self.message_id = message_id
self.x_gateway = x_gateway
self.lines = lines
self.xref = xref
self.body = body
self.references = references

self.set_from(from_raw)
self.set_date(date)
self.article_id = article_id
self.location = data['folder']
self.path = data['path']
self.newsgroups = data['newsgroups'].split(',')
self.subject = data['subject']
self.message_id = data['message_id']
self.x_gateway = data['x_gateway']
self.lines = data['lines']
self.xref = data['xref']
self.body = data['body']
self.references = data['references']

self.set_from(data['from_raw'])
self.set_date(data['date'])

def set_date(self, date):
"""
Expand Down Expand Up @@ -78,7 +77,7 @@ def to_dict(self):
:return:
"""
return {
'id': self.articleId,
'id': self.article_id,
'path': self.path,
'folder': self.location,
'from_name': self.from_name,
Expand Down Expand Up @@ -126,18 +125,23 @@ def from_file(path):
pathstr = str(path)
path_parts = pathstr.split('/')
article_id = '-'.join(path_parts[-3:])

data = {
'path': msg['Path'],
'folder': '/'.join(path_parts[-3:-1]),
'from_raw': msg['From'],
'newsgroups': msg.get('Newsgroups', ''),
'subject': msg['Subject'],
'message_id': msg['Message-ID'],
'date': msg['Date'],
'x_gateway': msg.get('X-Gateway', ''),
'lines': msg['Lines'],
'xref': msg['X-Reference'],
'references': msg.get('References', ''),
'body': body.strip(),
}

return Article(
article_id,
'/'.join(path_parts[-3:-1]),
msg['Path'],
msg['From'],
msg.get('Newsgroups', ''),
msg['Subject'],
msg['Message-ID'],
msg['Date'],
msg.get('X-Gateway', ''),
msg['Lines'],
msg.get('Xref', ''),
body.strip(),
msg.get('References', '')
data
)
10 changes: 5 additions & 5 deletions src/import/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@
This is a test import script which imports data from zipped
Usenet news files.
"""
from elasticsearch.helpers import BulkIndexError

from article import Article
import os
import sys
from pathlib import Path
from elasticsearch import Elasticsearch, helpers
import tempfile
import zipfile
import concurrent.futures
from article import Article
from elasticsearch import Elasticsearch, helpers
import yaml


Expand Down Expand Up @@ -63,7 +64,6 @@ def create_mapping(name):

client.indices.create(
index=name,
ignore=400,
body=settings,
)

Expand Down Expand Up @@ -118,8 +118,8 @@ def process_bulk(filenames):
for article in articles
]
try:
res = helpers.bulk(client, bulk)
except BaseException as e:
helpers.bulk(client, bulk)
except BulkIndexError as e:
print("Error!")
print(e.__class__.__name__)
print(e)
Expand Down
17 changes: 14 additions & 3 deletions src/service/app.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""
The Flask application, with routes for handling the
back-end of the application.
"""

import os
from elastic_index import Index
from flask import Flask, request, jsonify
Expand Down Expand Up @@ -35,12 +40,13 @@ def catch_all():
return app.send_static_file("index.html")


@app.route("/detail/<id>")
def detail(id):
@app.route("/detail/<article_id>")
def detail(article_id):
"""
Return the front-end, pages are handled by React
:return:
"""
print(f"Requesting page for id: {article_id}")
return app.send_static_file("index.html")


Expand All @@ -66,7 +72,12 @@ def get_facet():
:return:
"""
struc = request.get_json()
ret_struc = index.get_facet(struc["name"], struc["amount"], struc["filter"], struc["searchvalues"])
ret_struc = index.get_facet(
struc["name"],
struc["amount"],
struc["filter"],
struc["searchvalues"]
)
return jsonify(ret_struc)


Expand Down
62 changes: 54 additions & 8 deletions src/service/elastic_index.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import json
"""
elastic_index.py
This includes class Index for dealing with Elasticsearch.
Contains methods for finding articles.
"""

import yaml
from elasticsearch import Elasticsearch
import string
import math
from werkzeug.exceptions import NotFound

Expand All @@ -12,7 +15,13 @@ def __init__(self, config):
self.config = config
self.client = Elasticsearch([{"host": self.config["url"]}])

def no_case(selfself, str_in):
@staticmethod
def no_case(str_in):
"""
Create query from string, case insensitive.
:param str_in:
:return:
"""
str = str_in.strip()
ret_str = ""
if str != "":
Expand All @@ -22,6 +31,11 @@ def no_case(selfself, str_in):

@staticmethod
def make_matches(searchvalues):
"""
Create match queries.
:param searchvalues:
:return:
"""
must_collection = []
for item in searchvalues:
if item["field"] == "FREE_TEXT":
Expand All @@ -37,6 +51,14 @@ def make_matches(searchvalues):
return must_collection

def get_facet(self, field, amount, facet_filter, search_values):
"""
Get a facet.
:param field:
:param amount:
:param facet_filter:
:param search_values:
:return:
"""
terms = {
"field": field + ".keyword",
"size": amount,
Expand All @@ -46,8 +68,6 @@ def get_facet(self, field, amount, facet_filter, search_values):
}

if facet_filter:
# filtered_filter = facet_filter.translate(str.maketrans('', '', string.punctuation))
# filtered_filter = ''.join([f"[{char.upper()}{char.lower()}]" for char in filtered_filter])
filtered_filter = ''.join([f"[{char.upper()}{char.lower()}]" for char in facet_filter])
terms["include"] = f'.*{filtered_filter}.*'

Expand All @@ -72,6 +92,13 @@ def get_facet(self, field, amount, facet_filter, search_values):
for hits in response["aggregations"]["names"]["buckets"]]

def get_filter_facet(self, field, amount, facet_filter):
"""
Get a filter facet.
:param field:
:param amount:
:param facet_filter:
:return:
"""
ret_array = []
response = self.client.search(
index="articles",
Expand Down Expand Up @@ -103,6 +130,13 @@ def get_filter_facet(self, field, amount, facet_filter):
return ret_array

def get_nested_facet(self, field, amount, facet_filter):
"""
Get a nested facet.
:param field:
:param amount:
:param facet_filter:
:return:
"""
ret_array = []
path = field.split('.')[0]
response = self.client.search(
Expand Down Expand Up @@ -154,6 +188,13 @@ def get_min_max(self, fields):
return tmp

def browse(self, page, length, search_values):
"""
Search for articles.
:param page:
:param length:
:param search_values:
:return:
"""
int_page = int(page)
start = (int_page - 1) * length

Expand Down Expand Up @@ -184,6 +225,11 @@ def browse(self, page, length, search_values):
"items": [item["_source"] for item in response["hits"]["hits"]]}

def get_facets(self):
"""
Get all facets. Parses the configuration YAML file for determining
what facets are available.
:return:
"""
with open("fields.yaml", 'r') as stream:
data = yaml.safe_load(stream)
tmp = {}
Expand Down Expand Up @@ -252,13 +298,13 @@ def get_replies(self, message_id):
})
return response["hits"]['hits']

def by_id(self, id):
def by_id(self, article_id):
"""
Get an article by id
:param id:
:param article_id:
:return:
"""
res = self.client.get(index='articles', id=id)
res = self.client.get(index='articles', id=article_id)
if not res['found']:
raise NotFound('Article not found')

Expand Down

0 comments on commit b7a0f07

Please sign in to comment.