Skip to content

Commit

Permalink
Add defaults to error handling to the CSV data loader command
Browse files Browse the repository at this point in the history
  • Loading branch information
thriuin committed Oct 27, 2023
1 parent 399d6fd commit 03849de
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 20 deletions.
6 changes: 6 additions & 0 deletions oc_search/settings-sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,9 @@
SD_COMMENTS_BASE_FR = "http://127.0.0.1:8000/static/sd/"
SD_VOTES_BASE_EN = "http://127.0.0.1:8000/static/sd/"
SD_VOTES_BASE_FR = "http://127.0.0.1:8000/static/sd/"

# Used by the import_data_csv console command

IMPORT_DATA_CSV_DEFAULT_DEBUG = False
IMPORT_DATA_CSV_SOLR_INDEX_GROUP_SIZE = 10
IMPORT_DATA_CSV_DEFAULT_OLR_INDEX_GROUP_SIZE = 500
33 changes: 13 additions & 20 deletions search/management/commands/import_data_csv.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import sys

from babel.dates import format_date
from babel.numbers import format_currency, format_decimal, parse_decimal
from datetime import datetime
Expand Down Expand Up @@ -45,7 +47,7 @@ class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument('--search', type=str, help='The Search ID that is being loaded', required=True)
parser.add_argument('--csv', type=str, help='CSV filename to import', required=True)
parser.add_argument('--debug', required=False, action='store_true', default=False,
parser.add_argument('--debug', required=False, action='store_true', default=settings.IMPORT_DATA_CSV_DEFAULT_DEBUG,
help='Only display error messages')
parser.add_argument('--quiet', required=False, action='store_true', default=False,
help='Only display error messages')
Expand Down Expand Up @@ -368,7 +370,9 @@ def handle(self, *args, **options):
solr_items.append(solr_record)
total += 1

if (options['debug'] and index_cycle > 10) or index_cycle > 500:
# In debug mode, index the data to Solr much more frequently. This can be helpful for isolating
# problem rows. Otherwise use large batches
if (options['debug'] and index_cycle > settings.IMPORT_DATA_CSV_SOLR_INDEX_GROUP_SIZE) or index_cycle > settings.IMPORT_DATA_CSV_DEFAULT_OLR_INDEX_GROUP_SIZE:
try:
solr.index(self.solr_core, solr_items)
commit_count += len(solr_items)
Expand All @@ -377,30 +381,19 @@ def handle(self, *args, **options):
self.logger.info(f"Solr error on row {total}. Row data {solr_items}")
self.logger.error(cex)
error_count += 1
time.sleep(10)
# Force a delay to give the network/system time to recover - hopefully
time.sleep(5)

finally:
solr_items.clear()
index_cycle = 0

# Write to Solr whenever the cycle threshold is reached
# Commit to Solr whenever the cycle threshold is reached
if cycle >= self.cycle_on:
# try to connect to Solr up to 3 times
for countdown in reversed(range(3)):
try:
solr.commit(self.solr_core, softCommit=True, waitSearcher=True)
cycle = 0
if not options['quiet']:
self.logger.info(f"{total} rows processed")
break
except ConnectionError as cex:
if not countdown:
raise
self.logger.info(
f"Solr error: {0}. Waiting to try again ... {countdown}")
time.sleep((10 - countdown) * 5)
error_count += 1
if error_count > 10:
if not options['quiet']:
sys.stdout.write(f"{total} rows processed\r")
cycle = 0
if error_count > 100:
break

except Exception as x:
Expand Down

0 comments on commit 03849de

Please sign in to comment.