Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Natay committed Mar 17, 2021
1 parent f99fee4 commit 53781b2
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 34 deletions.
20 changes: 5 additions & 15 deletions biostar/forum/management/commands/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,6 @@
LOCK = os.path.join(settings.INDEX_DIR, 'flag')


def handle_spam(posts):
"""
Remove list of spam from search index.
Add posts to spam index.
"""

for post in posts:
# Remove spam from search index.
search.remove_post(post=post)
# Add spam to its own index
spam.add_spam(post=post)


@check_lock(LOCK)
def build(size, remove=False):
"""
Expand All @@ -51,8 +38,11 @@ def build(size, remove=False):
spam_posts = Post.objects.filter(spam=Post.SPAM, indexed=False)[:size]
sids = [post.id for post in spam_posts]

# Remove spam post.
handle_spam(posts=spam_posts)
for post in spam_posts:
# Remove spam from search index.
search.remove_post(post=post)
# Add spam to its own index
spam.add_spam(post=post)

# Update the spam indexed flag.
Post.objects.filter(id__in=sids).update(indexed=True)
Expand Down
2 changes: 1 addition & 1 deletion biostar/forum/scripts/planet-download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ set -ue
export DJANGO_SETTINGS_MODULE=conf.run.site_settings

# Update latest five entries for each planet blog.
python manage.py planet --update ${UPDATE_COUNT}
python manage.py planet --download
3 changes: 3 additions & 0 deletions biostar/forum/scripts/search-index.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,8 @@ set -ue
# Set the configuration module.
export DJANGO_SETTINGS_MODULE=conf.run.site_settings

# Set the log
# export ENGINE_LOG_LEVEL=DEBUG

# Add BATCH_SIZE posts to search index
python manage.py index --size ${BATCH_SIZE}
23 changes: 8 additions & 15 deletions biostar/forum/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from django.db.models import Q
from whoosh import writing, classify
from whoosh.analysis import StemmingAnalyzer
from whoosh.writing import AsyncWriter
from whoosh.writing import AsyncWriter, BufferedWriter
from whoosh.searching import Results

import html2markdown
Expand Down Expand Up @@ -40,13 +40,13 @@ def elapsed(msg):
now = time.time()
sec = round(now - last, 1)
last = now
print(f"{msg} in {sec} seconds")
return f"{msg} in {sec} seconds"

def progress(index, step=500, total=0, msg=""):
nonlocal last
if index % step == 0:
percent = int((index / total) * 100) if total >= index else index
elapsed(f"... {percent}% ({index} out of {total}). {step} {msg}")
return f"... {percent}% ({index} out of {total}). {step} {msg}"

return elapsed, progress

Expand Down Expand Up @@ -236,17 +236,20 @@ def index_posts(posts, ix=None, overwrite=False, add_func=add_index):

# Loop through posts and add to index
for step, post in stream:
progress(step, total=total, msg="posts indexed")
pr = progress(step, total=total, msg="posts indexed")
logger.debug(pr)
add_func(post=post, writer=writer)

# Commit to index
if overwrite:
logger.info("Overwriting the old index")
writer.commit(mergetype=writing.CLEAR)
else:
logger.debug("Committing to index")
writer.commit()

#elapsed(f"Indexed posts={total}")
elapsed = elapsed(f"Committed {total} posts to index.")
logger.debug(elapsed)


def crawl(reindex=False, overwrite=False, limit=1000):
Expand Down Expand Up @@ -307,19 +310,9 @@ def preform_whoosh_search(query, ix=None, fields=None, page=None, per_page=None,
results.fragmenter.maxchars = 100
results.fragmenter.surround = 100

#logger.info("Preformed index search")

return results


def clean_index():
"""
Purge the search index of invalid posts ( deleted, spam, etc)
"""

return


def preform_search(query, fields=None, top=0, sortedby=[], more_like_this=False):

top = top or settings.SIMILAR_FEED_COUNT
Expand Down
2 changes: 1 addition & 1 deletion biostar/forum/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def finalize_post(sender, instance, created, **kwargs):
instance.tags.add(*tags)

# Classify post as spam/ham.
tasks.classify_spam.spool(uid=instance.uid)
#tasks.classify_spam.spool(uid=instance.uid)

# Ensure posts get re-indexed after being edited.
Post.objects.filter(uid=instance.uid).update(indexed=False)
Expand Down
2 changes: 1 addition & 1 deletion biostar/forum/templates/accounts/login.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

{% block content %}

{% if 0 %}
{% if social_login %}
<div class="ui segment inputcolor socialbox">
<div class="ui header">
<i class="universal access icon"></i> Automatic Login
Expand Down
2 changes: 1 addition & 1 deletion biostar/transfer/management/commands/transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def gen_profile():
score=user.score,
twitter=user.profile.twitter_id,
my_tags=user.profile.my_tags,
digest_prefs=user.profile.digest_prefs,
digest_prefs=Profile.NO_DIGEST,
new_messages=user.new_messages)

yield profile
Expand Down

0 comments on commit 53781b2

Please sign in to comment.