This repository has been archived by the owner on Nov 18, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
executable file
·174 lines (141 loc) · 6.3 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/env python
# Python 3.6+
import elasticsearch
from flask import Flask, request, render_template, send_from_directory
from flask_paginate import Pagination
from elasticsearch import Elasticsearch
from elasticsearch_dsl import FacetedSearch, RangeFacet, TermsFacet
from elasticsearch_dsl.connections import connections
from bleach import clean
from markupsafe import Markup
from operator import itemgetter
ES_INDEX_NAME = 'sou2'
connections.create_connection(hosts=['localhost'])
es = Elasticsearch()
app = Flask(__name__, static_folder='static')
# https://stackoverflow.com/a/14625619
@app.route('/robots.txt')
def static_from_root():
return send_from_directory(app.static_folder, request.path[1:])
# https://stackoverflow.com/a/27119458
@app.template_filter('clean')
def do_clean(text, **kw):
"""Perform clean and return a Markup object to mark the string as safe.
This prevents Jinja from re-escaping the result."""
return Markup(clean(text, **kw))
@app.template_filter('translate_tag')
def translate_tag(value):
if value == 'sou':
return 'SOU'
elif value == 'ds':
return 'Ds'
else:
return value
@app.template_filter('build_query_string')
def build_query_string(query_dict):
new_params = []
for key, itemlist in query_dict.lists():
for item in itemlist:
if key in ['q', 'year', 'type']:
new_params.append(f"{key}={item}")
return "&".join(new_params)
class SouSearch(FacetedSearch):
index = ES_INDEX_NAME # Index to search
fields = ['id_year_number^2', 'title^3', 'full_text'] # Fields to search
facets = {
'type': TermsFacet(field='type'),
'year': RangeFacet(field='year', ranges=[
('1922-1929', (1922, 1930)),
('1930-1939', (1930, 1940)),
('1940-1949', (1940, 1950)),
('1950-1959', (1950, 1960)),
('1960-1969', (1960, 1970)),
('1970-1979', (1970, 1980)),
('1980-1989', (1980, 1990)),
('1990-1999', (1990, 2000)),
('2000-2009', (2000, 2010)),
('2010-2019', (2010, 2020)),
('2020-', (2020, 2030)),
]),
}
def highlight(self, search):
return search.highlight('title', 'full_text', fragment_size=150, number_of_fragments=4)
def search(self):
s = super(SouSearch, self).search()
# Don't include the actual fulltext in result; we only need the highlighted extract
return s.source(excludes=["full_text"])
def query(self, search, query):
if query:
if self.fields:
return search.query('query_string', fields=self.fields, query=query, default_operator='and')
else:
return search.query('query_string', query=query, default_operator='and')
return search
@app.route('/')
def index():
hits_per_page = 12
q = request.args.get('q', '')
year = request.args.getlist('year')
doc_type = request.args.getlist('type')
# If there's no query and no sort option explicitly set - e.g. if user just
# arrived - sort by year/number in descending order
if not q and not request.args.get('sort_by'):
sort_by = 'year_number_sort'
else:
sort_by = request.args.get('sort_by', '_score')
order_by = request.args.get('order_by', 'desc')
# Sort by score (relevance) by default, and don't let users sort by
# anything other than what's specified belove
if sort_by not in ['year_number_sort', 'title_sort', '_score']:
sort_by = '_score'
sort = [{sort_by: {'order': order_by}}]
# The following is to make sure we can create appropriate sort links.
# If current sort is asc, then clicking again should make it desc, and
# vice versa.
if request.args.get('order_by') == 'asc':
order_by_next = 'desc'
elif request.args.get('order_by') == 'desc':
order_by_next = 'asc'
else:
order_by_next = 'asc'
# Display name, actual sort field, default order
sort_options = [
('relevans', '_score', 'desc'),
('år och nummer', 'year_number_sort', 'asc'),
('titel', 'title_sort', 'asc'),
]
# Dictionary of possible facets
filters = {'year': year, 'type': doc_type}
try:
# Figure out total number of hits (but don't actually fetch them)
rs_count = SouSearch(q, filters=filters, sort=sort)
response_count = rs_count[0:0].execute()
# What page are we on?
page = request.args.get('page', type=int, default=1)
# Create a pagination object based on the number of hits and the current page number
pagination = Pagination(page=page, total=response_count.hits.total.value,
record_name='doc', per_page=hits_per_page, bs_version=4, inner_window=1, outer_window=0)
# Make sure page number stays within the realm of possibility
if page > pagination.total_pages > 0:
page = pagination.total_pages
# Figure out which results we should fetch from ES
doc_from = (page-1)*hits_per_page
doc_to = page*hits_per_page
# Now fetch them
rs = SouSearch(q, filters=filters, sort=sort)
response = rs[doc_from:doc_to].execute()
# Sort year facet by year (asc) rather than by total number of hits
# TODO: let ES do that instead
response.facets.year = [t for t in response.facets.year if t[1] > 1]
response.facets.year = sorted(response.facets.year, key=itemgetter(0), reverse=True)
return render_template("sou/front.html", response=response, total=response.hits.total,
pagination=pagination, q=q, sort_options=sort_options, sort_by=sort_by,
order_by=order_by, order_by_next=order_by_next, doc_from=doc_from+1, doc_to=doc_to)
except elasticsearch.exceptions.ConnectionError:
return render_template("sou/error.html", error_title='Ett fel uppstod', error_message='Kunde inte ansluta till sökmotorn.'), 500
except elasticsearch.exceptions.RequestError:
return render_template("sou/error.html", error_title='Ogiltig sökfråga', error_message='Se över söksträngen och prova på nytt.'), 200
except:
return render_template("sou/error.html", error_message='Något gick galet.'), 500
if __name__ == '__main__':
app.run()