diff --git a/views/results.tpl b/views/results.tpl index 3d0db6b..9b75786 100644 --- a/views/results.tpl +++ b/views/results.tpl @@ -7,8 +7,8 @@ %if len(res) > 0:
- JSON - CSV + JSON + CSV
%end
diff --git a/webui.py b/webui.py index e86885f..ca2c0fa 100755 --- a/webui.py +++ b/webui.py @@ -1,4 +1,12 @@ #!/usr/bin/env python +#{{{ debug +# debug +from __future__ import print_function +import sys + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) +#}}} #{{{ imports import os import bottle @@ -7,13 +15,20 @@ import datetime import glob import hashlib -import json import csv import StringIO import ConfigParser import string import shlex import urllib + +# use ujson if avalible (faster than built in json) +try: + import ujson as json +except ImportError: + import json + print("ujson module not found, using (slower) built-in json module instead") + # import recoll and rclextract try: from recoll import recoll @@ -34,7 +49,7 @@ 'context': 30, 'stem': 1, 'timefmt': '%c', - 'dirdepth': 3, + 'dirdepth': 2, 'maxchars': 500, 'maxresults': 0, 'perpage': 25, @@ -100,6 +115,10 @@ def normalise_filename(fn): else: out += "_" return out + +def get_topdirs(db): + rclconf = rclconfig.RclConfig(os.path.dirname(db)) + return rclconf.getConfParam('topdirs') #}}} #{{{ get_config def get_config(): @@ -107,8 +126,15 @@ def get_config(): # get useful things from recoll.conf rclconf = rclconfig.RclConfig() config['confdir'] = rclconf.getConfDir() - config['dirs'] = [os.path.expanduser(d) for d in - shlex.split(rclconf.getConfParam('topdirs'))] + config['extradbs'] = [] + if 'RECOLL_EXTRA_DBS' in os.environ: + config['extradbs'] = os.environ.get('RECOLL_EXTRA_DBS').split(':') + config['dirs']={} + for dir in [os.path.expanduser(d) for d in + shlex.split(rclconf.getConfParam('topdirs'))]: + config['dirs'][dir] = os.path.join(config['confdir'], 'xapiandb') + # global options as set by the default recoll config are also used for extra databases + # when searching the entire set config['stemlang'] = rclconf.getConfParam('indexstemminglanguages') # get config from cookies or defaults for k, v in DEFAULTS.items(): @@ -119,22 +145,27 @@ def get_config(): ncf = [f for f in cf if f in FIELDS] config['csvfields'] = ' '.join(ncf) config['fields'] = ' '.join(FIELDS) + # get additional databases + for e in config['extradbs']: + for t in [os.path.expanduser(d) for d in + shlex.split(get_topdirs(e))]: + config['dirs'][t] = e # get mountpoints config['mounts'] = {} - for d in config['dirs']: + for d,db in config['dirs'].items(): name = 'mount_%s' % urllib.quote(d,'') config['mounts'][d] = select([bottle.request.get_cookie(name), 'file://%s' % d], [None, '']) return config #}}} #{{{ get_dirs -def get_dirs(tops, depth): +def get_dirs(dirs, depth): v = [] - for top in tops: - dirs = [top] + for dir,d in dirs.items(): + dirs = [dir] for d in range(1, depth+1): - dirs = dirs + glob.glob(top + '/*' * d) + dirs = dirs + glob.glob(dir + '/*' * d) dirs = filter(lambda f: os.path.isdir(f), dirs) - top_path = top.rsplit('/', 1)[0] + top_path = dir.rsplit('/', 1)[0] dirs = [w.replace(top_path+'/', '', 1) for w in dirs] v = v + dirs return [''] + v @@ -149,6 +180,8 @@ def get_query(): 'sort': select([bottle.request.query.get('sort'), SORTS[0][0]]), 'ascending': int(select([bottle.request.query.get('ascending'), 0])), 'page': int(select([bottle.request.query.get('page'), 0])), + 'highlight': int(select([bottle.request.query.get('highlight'), 1])), + 'snippets': int(select([bottle.request.query.get('snippets'), 1])), } return query #}}} @@ -164,7 +197,25 @@ def query_to_recoll_string(q): #{{{ recoll_initsearch def recoll_initsearch(q): config = get_config() - db = recoll.connect(config['confdir']) + """ The reason for this somewhat elaborate scheme is to keep the + set size as small as possible by searching only those databases + with matching topdirs """ + if q['dir'] == '': + db = recoll.connect(config['confdir'], config['extradbs']) + else: + dbs=[] + for d,db in config['dirs'].items(): + if os.path.commonprefix([os.path.basename(d),q['dir']]) == q['dir']: + dbs.append(db) + if len(dbs) == 0: + # should not happen, using non-existing q['dir']? + db = recoll.connect(config['confdir'],config['extradbs']) + elif len(dbs) == 1: + # only one db (most common situation) + db = recoll.connect(os.path.dirname(dbs[0])) + else: + # more than one db with matching topdir, use 'm all + db = recoll.connect(dbs[0],dbs[1:]) db.setAbstractParams(config['maxchars'], config['context']) query = db.query() query.sortby(q['sort'], q['ascending']) @@ -183,9 +234,10 @@ def endMatch(self): return '' #}}} #{{{ recoll_search -def recoll_search(q, dosnippets=True): +def recoll_search(q): config = get_config() tstart = datetime.datetime.now() + highlighter = HlMeths() results = [] query = recoll_initsearch(q) nres = query.rowcount @@ -199,31 +251,33 @@ def recoll_search(q, dosnippets=True): q['page'] = 1 offset = (q['page'] - 1) * config['perpage'] - if query.rowcount > 0: + if query.rowcount > 0 and offset < query.rowcount: if type(query.next) == int: query.next = offset else: query.scroll(offset, mode='absolute') - highlighter = HlMeths() - for i in range(config['perpage']): - try: - doc = query.fetchone() - except: - break - d = {} - for f in FIELDS: - v = getattr(doc, f) - if v is not None: - d[f] = v.encode('utf-8') - else: - d[f] = '' - d['label'] = select([d['title'], d['filename'], '?'], [None, '']) - d['sha'] = hashlib.sha1(d['url']+d['ipath']).hexdigest() - d['time'] = timestr(d['mtime'], config['timefmt']) - if dosnippets: - d['snippet'] = query.makedocabstract(doc, highlighter).encode('utf-8') - results.append(d) + for i in range(config['perpage']): + try: + doc = query.fetchone() + except: + break + d = {} + for f in FIELDS: + v = getattr(doc, f) + if v is not None: + d[f] = v.encode('utf-8') + else: + d[f] = '' + d['label'] = select([d['title'], d['filename'], '?'], [None, '']) + d['sha'] = hashlib.sha1(d['url']+d['ipath']).hexdigest() + d['time'] = timestr(d['mtime'], config['timefmt']) + if q['snippets']: + if q['highlight']: + d['snippet'] = query.makedocabstract(doc, highlighter).encode('utf-8') + else: + d['snippet'] = query.makedocabstract(doc).encode('utf-8') + results.append(d) tend = datetime.datetime.now() return results, nres, tend - tstart #}}} @@ -315,13 +369,12 @@ def edit(resnum): @bottle.route('/json') def get_json(): query = get_query() - query['page'] = 0 qs = query_to_recoll_string(query) bottle.response.headers['Content-Type'] = 'application/json' bottle.response.headers['Content-Disposition'] = 'attachment; filename=recoll-%s.json' % normalise_filename(qs) res, nres, timer = recoll_search(query) - return json.dumps({ 'query': query, 'results': res }) + return json.dumps({ 'query': query, 'nres': nres, 'results': res }) #}}} #{{{ csv @bottle.route('/csv') @@ -329,10 +382,11 @@ def get_csv(): config = get_config() query = get_query() query['page'] = 0 + query['snippets'] = 0 qs = query_to_recoll_string(query) bottle.response.headers['Content-Type'] = 'text/csv' bottle.response.headers['Content-Disposition'] = 'attachment; filename=recoll-%s.csv' % normalise_filename(qs) - res, nres, timer = recoll_search(query, False) + res, nres, timer = recoll_search(query) si = StringIO.StringIO() cw = csv.writer(si) fields = config['csvfields'].split() @@ -355,7 +409,7 @@ def set(): config = get_config() for k, v in DEFAULTS.items(): bottle.response.set_cookie(k, str(bottle.request.query.get(k)), max_age=3153600000, expires=315360000) - for d in config['dirs']: + for d,db in config['dirs'].items(): cookie_name = 'mount_%s' % urllib.quote(d, '') bottle.response.set_cookie(cookie_name, str(bottle.request.query.get('mount_%s' % d)), max_age=3153600000, expires=315360000) bottle.redirect('./')