You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
invenio index destroy --yes-i-know
invenio index init
invenio rdm rebuild-all-indices
invenio-jobs changes:
invenio alembic upgrade
Live migration (no downtime)
Deploy the code in a separate environment and using the following script:
importjsonimporttimefromdatetimeimportdatetime, timedeltafromtextwrapimportdedentimporthumanizefromflaskimportcurrent_appfrominvenio_access.permissionsimportsystem_identityfrominvenio_oaiserver.percolatorimport (
PERCOLATOR_MAPPING,
_build_percolator_index_name,
)
frominvenio_rdm_records.proxiesimportcurrent_rdm_recordsfrominvenio_search.proxiesimportcurrent_search, current_search_clientfrominvenio_search.utilsimportbuild_alias_namedefget_index_info(index):
write_alias=build_alias_name(index)
indices=current_search_client.indices.get_alias(name=write_alias, ignore=[404])
ifindices.get("status") ==404:
returnNone, None, Noneassertlen(indices) ==1index_name=list(indices.keys())[0]
aliases_resp=current_search_client.indices.get_alias(index=index_name)
read_aliases= [
aforainaliases_resp[index_name]["aliases"].keys() ifa!=write_alias
]
returnindex_name, write_alias, read_aliasesdefreindex(old_index_name, new_index_name):
# Set replicas to 0print(f"Setting replicas to 0 for {new_index_name}")
current_search_client.indices.put_settings(
index=new_index_name,
body={"index": {"number_of_replicas": 0}},
)
# Reindex all records (this will return a Task ID)print(f"Reindexing {old_index_name} to {new_index_name}")
task=current_search_client.reindex(
body={
"source": {"index": old_index_name},
"dest": {
"index": new_index_name,
"version_type": "external_gte",
},
},
wait_for_completion=False,
)
print(
f"Task ID for reindexing {old_index_name} to {new_index_name}: {task['task']}"
)
returntaskdefreindex_delta(old_index_name, new_index_name, since):
total_docs=current_search_client.count(
index=old_index_name,
body={"query": {"range": {"updated": {"gte": since}}}},
)["count"]
print(
f"Reindexing {old_index_name} to {new_index_name} since {since} ({total_docs} docs)"
)
task=current_search_client.reindex(
body={
"source": {
"index": old_index_name,
"query": {"range": {"updated": {"gte": since}}},
},
"dest": {
"index": new_index_name,
"version_type": "external_gte",
},
},
wait_for_completion=False,
)
print(
f"Task ID for reindexing {old_index_name} to {new_index_name}: {task['task']}"
)
returntaskdefget_last_updated_ts(index_name):
res=current_search_client.search(
index=index_name,
body={"size": 0, "aggs": {"last_updated": {"max": {"field": "updated"}}}},
)
returnres["aggregations"]["last_updated"]["value_as_string"]
defcheck_progress(task_id):
progress=current_search_client.tasks.get(task_id=task_id)
ifnotprogress["completed"]:
total=progress["task"]["status"]["total"]
created=progress["task"]["status"]["created"]
iftotal==0orcreated==0:
print("Reindexing in progress: no records reindexed yet.")
returnFalsepercentage=round((created/total) *100, 2)
eta_seconds= (
progress["task"]["running_time_in_nanos"]
/created* (total-created)
/1_000_000_000
)
eta=datetime.now() +timedelta(seconds=eta_seconds)
print(
dedent(f"""\ Reindexing in progress: {created}/{total} ({percentage}%) records reindexed. ETA: {humanize.naturaldelta(eta_seconds)} ({eta.isoformat()}) """)
)
returnFalse# Refresh the indexindex_name=progress["task"]["description"].split(" to ")[1][1:-1]
total_time=progress["task"]["running_time_in_nanos"] /1_000_000_000print(f"Reindexing completed in {humanize.naturaldelta(total_time)}")
print(f"Refreshing {index_name}...")
current_search_client.indices.refresh(index=index_name)
print(f"Refreshed {index_name}")
# Set replicas to 2print(f"Updating replicas for {index_name}")
current_search_client.indices.put_settings(
index=index_name,
body={"index": {"number_of_replicas": 2}},
)
returnTruedefrollover_index(old_index, new_index):
old_index_name, old_index_alias, read_aliases=get_index_info(old_index)
new_index_name, new_index_alias, _=get_index_info(new_index)
# Update aliasesalias_ops= []
alias_ops.append({"remove": {"index": old_index_name, "alias": new_index_alias}})
alias_ops.append({"add": {"index": new_index_name, "alias": new_index_alias}})
foraliasinread_aliases:
# Skip aliases that are not part of the new index nameifaliasnotinnew_index_name:
continuealias_ops.append({"remove": {"index": old_index_name, "alias": alias}})
alias_ops.append({"add": {"index": new_index_name, "alias": alias}})
current_search_client.indices.update_aliases(body={"actions": alias_ops})
defdelete_old_index(old_index):
old_index_name, _, _=get_index_info(old_index)
# Delete old indexcurrent_search_client.indices.delete(index=old_index_name)
defrun_pre_deploy(old_index, new_index, custom_fields_cfg=None):
old_index_name, old_index_alias, read_aliases=get_index_info(old_index)
new_index_name, new_index_alias, _=get_index_info(new_index)
assertnew_index_nameisNone, f"New index {new_index_name} already exists."# Create the new index
(new_index_name, _), (new_index_alias, _) =current_search.create_index(
index=new_index, create_write_alias=True
)
# Handle custom fieldsifcustom_fields_cfg:
custom_fields=current_app.config[custom_fields_cfg]
properties=Mapping.properties_for_fields(None, custom_fields)
current_search_client.indices.put_mapping(
index=new_index_name,
body={"properties": properties},
)
# Reindex all recordstask=reindex(old_index_name, new_index_name)
whilenotcheck_progress(task["task"]):
print("Waiting 10sec for reindexing to complete...")
time.sleep(10)
defrun_sync(old_index, new_index):
old_index_name, old_index_alias, read_aliases=get_index_info(old_index)
new_index_name, new_index_alias, _=get_index_info(new_index)
# Reindex all records since last updatesince=get_last_updated_ts(new_index_name)
whileTrue:
task=reindex_delta(old_index_name, new_index_name, since)
whilenotcheck_progress(task["task"]):
print("Waiting 10sec for reindexing to complete...")
time.sleep(10)
# Refreshcurrent_search_client.indices.refresh(index=new_index_name)
# Check if there are newer documentsnew_index_latest=get_last_updated_ts(new_index_name)
old_index_latest=get_last_updated_ts(old_index_name)
ifnew_index_latest>=old_index_latest:
print("No new documents to sync.")
breakprint(f"More documents to sync: {new_index_latest} > {old_index_latest}")
# Reindex since we started the current reindexing tasksince=new_index_latest# Give an opportunity to interrupt the syncprint("Press Ctrl+C to stop the sync...")
time.sleep(10)
defrun_post_deploy(old_index, new_index):
# Rollover the indexrollover_index(old_index, new_index)
defupdate_records_percolator(index=None):
index=indexorcurrent_app.config["OAISERVER_RECORD_INDEX"]
percolator_index=_build_percolator_index_name(index)
mapping_path=current_search.mappings[index]
withopen(mapping_path, "r") asbody:
mapping=json.load(body)
mapping["mappings"]["properties"].update(PERCOLATOR_MAPPING["properties"])
current_search_client.indices.create(index=percolator_index, body=mapping)
# reindex all percolator queries from OAISetsoaipmh_service=current_rdm_records.oaipmh_server_serviceoaipmh_service.rebuild_index(identity=system_identity)
Run the following commands:
## Affiliations#OLD_AFFILIATIONS_INDEX="affiliations-affiliation-v1.0.0"NEW_AFFILIATIONS_INDEX="affiliations-affiliation-v2.0.0"run_pre_deploy(OLD_AFFILIATIONS_INDEX, NEW_AFFILIATIONS_INDEX)
# Once code is deployed, rollover the indexrun_post_deploy(OLD_AFFILIATIONS_INDEX, NEW_AFFILIATIONS_INDEX)
## Funders#OLD_FUNDERS_INDEX="funders-funder-v1.0.0"NEW_FUNDERS_INDEX="funders-funder-v2.0.0"run_pre_deploy(OLD_FUNDERS_INDEX, NEW_FUNDERS_INDEX)
# Once code is deployed, rollover the indexrun_post_deploy(OLD_FUNDERS_INDEX, NEW_FUNDERS_INDEX)
## Names#OLD_NAMES_INDEX="names-name-v1.0.0"NEW_NAMES_INDEX="names-name-v2.0.0"run_pre_deploy(OLD_NAMES_INDEX, NEW_NAMES_INDEX)
# Once code is deployed, rollover the indexrun_post_deploy(OLD_NAMES_INDEX, NEW_NAMES_INDEX)
## Communities#OLD_COMMUNITIES_INDEX="communities-communities-v1.0.0"NEW_COMMUNITIES_INDEX="communities-communities-v2.0.0"run_pre_deploy(
OLD_COMMUNITIES_INDEX, NEW_COMMUNITIES_INDEX,
custom_fields_cfg="COMMUNITIES_CUSTOM_FIELDS",
)
# Sync new and updated documentsrun_sync(OLD_COMMUNITIES_INDEX, NEW_COMMUNITIES_INDEX)
# Once code is deployed, rollover the indexrun_post_deploy(OLD_COMMUNITIES_INDEX, NEW_COMMUNITIES_INDEX)
# Run a last sync to make sure all documents are up-to-daterun_sync(OLD_COMMUNITIES_INDEX, NEW_COMMUNITIES_INDEX)
## Users#OLD_USERS_INDEX="users-user-v2.0.0"NEW_USERS_INDEX="users-user-v3.0.0"run_pre_deploy(OLD_USERS_INDEX, NEW_USERS_INDEX)
# Sync new and updated documentsrun_sync(OLD_USERS_INDEX, NEW_USERS_INDEX)
# Once code is deployed, rollover the indexrun_post_deploy(OLD_USERS_INDEX, NEW_USERS_INDEX)
# Run a last sync to make sure all documents are up-to-daterun_sync(OLD_USERS_INDEX, NEW_USERS_INDEX)
## Records#OLD_RECORDS_INDEX="rdmrecords-records-record-v6.0.0"NEW_RECORDS_INDEX="rdmrecords-records-record-v7.0.0"run_pre_deploy(OLD_RECORDS_INDEX, NEW_RECORDS_INDEX, custom_fields_cfg="RDM_CUSTOM_FIELDS")
update_records_percolator(index=NEW_RECORDS_INDEX)
# Sync new and updated documentsrun_sync(OLD_RECORDS_INDEX, NEW_RECORDS_INDEX)
# Once code is deployed, rollover the indexrun_post_deploy(OLD_RECORDS_INDEX, NEW_RECORDS_INDEX)
# Run a last sync to make sure all documents are up-to-daterun_sync(OLD_RECORDS_INDEX, NEW_RECORDS_INDEX)
## Update record view stats events template to add `is_machine`#frominvenio_search.proxiesimportcurrent_search# Will update all templates (including the record view events)list(current_search.put_templates())
# You'll also need to update the latest record views events index manually:"""PUT /events-stats-record-view-2024-09/_mapping{ "properties": { "is_machine": { "type": "boolean" } }}"""
The text was updated successfully, but these errors were encountered:
Cold migration (with downtime)
Deploy the code and run the following:
invenio-jobs changes:
Live migration (no downtime)
Deploy the code in a separate environment and using the following script:
Run the following commands:
The text was updated successfully, but these errors were encountered: