diff --git a/views/results.tpl b/views/results.tpl
index 3d0db6b..9b75786 100644
--- a/views/results.tpl
+++ b/views/results.tpl
@@ -7,8 +7,8 @@
%if len(res) > 0:
%end
diff --git a/webui.py b/webui.py
index e86885f..ca2c0fa 100755
--- a/webui.py
+++ b/webui.py
@@ -1,4 +1,12 @@
#!/usr/bin/env python
+#{{{ debug
+# debug
+from __future__ import print_function
+import sys
+
+def eprint(*args, **kwargs):
+ print(*args, file=sys.stderr, **kwargs)
+#}}}
#{{{ imports
import os
import bottle
@@ -7,13 +15,20 @@
import datetime
import glob
import hashlib
-import json
import csv
import StringIO
import ConfigParser
import string
import shlex
import urllib
+
+# use ujson if avalible (faster than built in json)
+try:
+ import ujson as json
+except ImportError:
+ import json
+ print("ujson module not found, using (slower) built-in json module instead")
+
# import recoll and rclextract
try:
from recoll import recoll
@@ -34,7 +49,7 @@
'context': 30,
'stem': 1,
'timefmt': '%c',
- 'dirdepth': 3,
+ 'dirdepth': 2,
'maxchars': 500,
'maxresults': 0,
'perpage': 25,
@@ -100,6 +115,10 @@ def normalise_filename(fn):
else:
out += "_"
return out
+
+def get_topdirs(db):
+ rclconf = rclconfig.RclConfig(os.path.dirname(db))
+ return rclconf.getConfParam('topdirs')
#}}}
#{{{ get_config
def get_config():
@@ -107,8 +126,15 @@ def get_config():
# get useful things from recoll.conf
rclconf = rclconfig.RclConfig()
config['confdir'] = rclconf.getConfDir()
- config['dirs'] = [os.path.expanduser(d) for d in
- shlex.split(rclconf.getConfParam('topdirs'))]
+ config['extradbs'] = []
+ if 'RECOLL_EXTRA_DBS' in os.environ:
+ config['extradbs'] = os.environ.get('RECOLL_EXTRA_DBS').split(':')
+ config['dirs']={}
+ for dir in [os.path.expanduser(d) for d in
+ shlex.split(rclconf.getConfParam('topdirs'))]:
+ config['dirs'][dir] = os.path.join(config['confdir'], 'xapiandb')
+ # global options as set by the default recoll config are also used for extra databases
+ # when searching the entire set
config['stemlang'] = rclconf.getConfParam('indexstemminglanguages')
# get config from cookies or defaults
for k, v in DEFAULTS.items():
@@ -119,22 +145,27 @@ def get_config():
ncf = [f for f in cf if f in FIELDS]
config['csvfields'] = ' '.join(ncf)
config['fields'] = ' '.join(FIELDS)
+ # get additional databases
+ for e in config['extradbs']:
+ for t in [os.path.expanduser(d) for d in
+ shlex.split(get_topdirs(e))]:
+ config['dirs'][t] = e
# get mountpoints
config['mounts'] = {}
- for d in config['dirs']:
+ for d,db in config['dirs'].items():
name = 'mount_%s' % urllib.quote(d,'')
config['mounts'][d] = select([bottle.request.get_cookie(name), 'file://%s' % d], [None, ''])
return config
#}}}
#{{{ get_dirs
-def get_dirs(tops, depth):
+def get_dirs(dirs, depth):
v = []
- for top in tops:
- dirs = [top]
+ for dir,d in dirs.items():
+ dirs = [dir]
for d in range(1, depth+1):
- dirs = dirs + glob.glob(top + '/*' * d)
+ dirs = dirs + glob.glob(dir + '/*' * d)
dirs = filter(lambda f: os.path.isdir(f), dirs)
- top_path = top.rsplit('/', 1)[0]
+ top_path = dir.rsplit('/', 1)[0]
dirs = [w.replace(top_path+'/', '', 1) for w in dirs]
v = v + dirs
return [''] + v
@@ -149,6 +180,8 @@ def get_query():
'sort': select([bottle.request.query.get('sort'), SORTS[0][0]]),
'ascending': int(select([bottle.request.query.get('ascending'), 0])),
'page': int(select([bottle.request.query.get('page'), 0])),
+ 'highlight': int(select([bottle.request.query.get('highlight'), 1])),
+ 'snippets': int(select([bottle.request.query.get('snippets'), 1])),
}
return query
#}}}
@@ -164,7 +197,25 @@ def query_to_recoll_string(q):
#{{{ recoll_initsearch
def recoll_initsearch(q):
config = get_config()
- db = recoll.connect(config['confdir'])
+ """ The reason for this somewhat elaborate scheme is to keep the
+ set size as small as possible by searching only those databases
+ with matching topdirs """
+ if q['dir'] == '':
+ db = recoll.connect(config['confdir'], config['extradbs'])
+ else:
+ dbs=[]
+ for d,db in config['dirs'].items():
+ if os.path.commonprefix([os.path.basename(d),q['dir']]) == q['dir']:
+ dbs.append(db)
+ if len(dbs) == 0:
+ # should not happen, using non-existing q['dir']?
+ db = recoll.connect(config['confdir'],config['extradbs'])
+ elif len(dbs) == 1:
+ # only one db (most common situation)
+ db = recoll.connect(os.path.dirname(dbs[0]))
+ else:
+ # more than one db with matching topdir, use 'm all
+ db = recoll.connect(dbs[0],dbs[1:])
db.setAbstractParams(config['maxchars'], config['context'])
query = db.query()
query.sortby(q['sort'], q['ascending'])
@@ -183,9 +234,10 @@ def endMatch(self):
return ''
#}}}
#{{{ recoll_search
-def recoll_search(q, dosnippets=True):
+def recoll_search(q):
config = get_config()
tstart = datetime.datetime.now()
+ highlighter = HlMeths()
results = []
query = recoll_initsearch(q)
nres = query.rowcount
@@ -199,31 +251,33 @@ def recoll_search(q, dosnippets=True):
q['page'] = 1
offset = (q['page'] - 1) * config['perpage']
- if query.rowcount > 0:
+ if query.rowcount > 0 and offset < query.rowcount:
if type(query.next) == int:
query.next = offset
else:
query.scroll(offset, mode='absolute')
- highlighter = HlMeths()
- for i in range(config['perpage']):
- try:
- doc = query.fetchone()
- except:
- break
- d = {}
- for f in FIELDS:
- v = getattr(doc, f)
- if v is not None:
- d[f] = v.encode('utf-8')
- else:
- d[f] = ''
- d['label'] = select([d['title'], d['filename'], '?'], [None, ''])
- d['sha'] = hashlib.sha1(d['url']+d['ipath']).hexdigest()
- d['time'] = timestr(d['mtime'], config['timefmt'])
- if dosnippets:
- d['snippet'] = query.makedocabstract(doc, highlighter).encode('utf-8')
- results.append(d)
+ for i in range(config['perpage']):
+ try:
+ doc = query.fetchone()
+ except:
+ break
+ d = {}
+ for f in FIELDS:
+ v = getattr(doc, f)
+ if v is not None:
+ d[f] = v.encode('utf-8')
+ else:
+ d[f] = ''
+ d['label'] = select([d['title'], d['filename'], '?'], [None, ''])
+ d['sha'] = hashlib.sha1(d['url']+d['ipath']).hexdigest()
+ d['time'] = timestr(d['mtime'], config['timefmt'])
+ if q['snippets']:
+ if q['highlight']:
+ d['snippet'] = query.makedocabstract(doc, highlighter).encode('utf-8')
+ else:
+ d['snippet'] = query.makedocabstract(doc).encode('utf-8')
+ results.append(d)
tend = datetime.datetime.now()
return results, nres, tend - tstart
#}}}
@@ -315,13 +369,12 @@ def edit(resnum):
@bottle.route('/json')
def get_json():
query = get_query()
- query['page'] = 0
qs = query_to_recoll_string(query)
bottle.response.headers['Content-Type'] = 'application/json'
bottle.response.headers['Content-Disposition'] = 'attachment; filename=recoll-%s.json' % normalise_filename(qs)
res, nres, timer = recoll_search(query)
- return json.dumps({ 'query': query, 'results': res })
+ return json.dumps({ 'query': query, 'nres': nres, 'results': res })
#}}}
#{{{ csv
@bottle.route('/csv')
@@ -329,10 +382,11 @@ def get_csv():
config = get_config()
query = get_query()
query['page'] = 0
+ query['snippets'] = 0
qs = query_to_recoll_string(query)
bottle.response.headers['Content-Type'] = 'text/csv'
bottle.response.headers['Content-Disposition'] = 'attachment; filename=recoll-%s.csv' % normalise_filename(qs)
- res, nres, timer = recoll_search(query, False)
+ res, nres, timer = recoll_search(query)
si = StringIO.StringIO()
cw = csv.writer(si)
fields = config['csvfields'].split()
@@ -355,7 +409,7 @@ def set():
config = get_config()
for k, v in DEFAULTS.items():
bottle.response.set_cookie(k, str(bottle.request.query.get(k)), max_age=3153600000, expires=315360000)
- for d in config['dirs']:
+ for d,db in config['dirs'].items():
cookie_name = 'mount_%s' % urllib.quote(d, '')
bottle.response.set_cookie(cookie_name, str(bottle.request.query.get('mount_%s' % d)), max_age=3153600000, expires=315360000)
bottle.redirect('./')