From 3365d3d5a7847f1c48d0b17390a5990c46a8591b Mon Sep 17 00:00:00 2001 From: Dung Cao Date: Fri, 23 Feb 2024 08:25:45 +0000 Subject: [PATCH] meta-ampere: mtmitchell: Support always retrieve BERT when power cycle/reset/off When BERT crash happens, depend on the Host UEFI firmware configuration, the Host might not be rebooted immediately so no REBOOT_ACK from Host for the BMC to check BERT crash. This might cause BERT crash is not handled. This commit changes to check BERT every time the Host is powered cycle, reset or off to handle above case. Signed-off-by: Dung Cao --- .../host/ampere-hostctrl.bbappend | 14 +++++- .../ampere-bert-power-handle.service | 10 ++++ .../ampere-bert-power-handle.sh | 43 ++++++++++++++++ .../ampere-utils/ampere_power_util.sh | 49 ++----------------- 4 files changed, 70 insertions(+), 46 deletions(-) create mode 100644 meta-ampere/meta-mitchell/recipes-ampere/host/ampere-hostctrl/ampere-bert-power-handle.service create mode 100644 meta-ampere/meta-mitchell/recipes-ampere/host/ampere-hostctrl/ampere-bert-power-handle.sh diff --git a/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-hostctrl.bbappend b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-hostctrl.bbappend index 6a3bf862d454..6d5a6daecebb 100644 --- a/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-hostctrl.bbappend +++ b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-hostctrl.bbappend @@ -3,12 +3,24 @@ FILESEXTRAPATHS:append := "${THISDIR}/${PN}:" SRC_URI += " \ file://ampere_power_on_failure_check.sh \ file://ampere-host-on-host-check-override.conf \ + file://ampere-bert-power-handle.service \ + file://ampere-bert-power-handle.sh \ " -FILES:${PN} += "${systemd_system_unitdir}/ampere-host-on-host-check@0.service.d" +FILES:${PN} += " \ + ${systemd_system_unitdir}/ampere-host-on-host-check@0.service.d \ + ${systemd_system_unitdir}/ampere-bert-power-handle.service \ + " +AMPERE_BERT_TMPL = "ampere-bert-power-handle.service" +AMPERE_BERT_INSTMPL = "ampere-bert-power-handle.service" +AMPERE_BERT_TGTFMT = "obmc-host-stop@{0}.target" +AMPERE_BERT_FMT = "../${AMPERE_BERT_TMPL}:${AMPERE_BERT_TGTFMT}.requires/${AMPERE_BERT_INSTMPL}" +SYSTEMD_LINK:${PN} += "${@compose_list_zip(d, 'AMPERE_BERT_FMT', 'OBMC_HOST_INSTANCES')}" +SYSTEMD_SERVICE:${PN} += "${AMPERE_BERT_TMPL}" do_install:append() { install -m 0755 ${WORKDIR}/ampere_power_on_failure_check.sh ${D}/${sbindir}/ + install -m 0755 ${WORKDIR}/ampere-bert-power-handle.sh ${D}/${sbindir}/ install -d ${D}${systemd_system_unitdir}/ampere-host-on-host-check@0.service.d install -m 644 ${WORKDIR}/ampere-host-on-host-check-override.conf \ diff --git a/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-hostctrl/ampere-bert-power-handle.service b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-hostctrl/ampere-bert-power-handle.service new file mode 100644 index 000000000000..58fb06a83b2d --- /dev/null +++ b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-hostctrl/ampere-bert-power-handle.service @@ -0,0 +1,10 @@ +[Unit] +Description=Ampere BERT handler for power action of the host +After=obmc-host-stop@0.target +Before=obmc-host-reboot@0.target + +[Service] +Restart=no +ExecStart=/usr/sbin/ampere-bert-power-handle.sh +SyslogIdentifier=ampere-bert-power-handle +Type=oneshot diff --git a/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-hostctrl/ampere-bert-power-handle.sh b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-hostctrl/ampere-bert-power-handle.sh new file mode 100644 index 000000000000..e7ed39aa982b --- /dev/null +++ b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-hostctrl/ampere-bert-power-handle.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# shellcheck disable=SC2046 +# shellcheck source=meta-ampere/meta-common/recipes-ampere/platform/ampere-utils/ampere_power_control_lock.sh +source /usr/sbin/ampere_power_control_lock.sh + +wait_bert_complete() { + # Wait maximum 60 seconds for BERT completed + cnt=20 + while [ $cnt -gt 0 ] + do + bert_done=$(busctl get-property com.ampere.CrashCapture.Trigger /com/ampere/crashcapture/trigger com.ampere.CrashCapture.Trigger TriggerActions | cut -d"." -f6) + if ! [ "$bert_done" == "Done\"" ]; then + sleep 3 + cnt=$((cnt - 1)) + else + break + fi + done + if [ "$cnt" -eq "0" ]; then + echo "Timeout 60 seconds, BERT is still not completed" + return 1 + fi + return 0 +} + + +echo "Notify Crash Capture to read BERT." +busctl set-property com.ampere.CrashCapture.Trigger \ + /com/ampere/crashcapture/trigger \ + com.ampere.CrashCapture.Trigger \ + TriggerProcess b true +bert_timeout="0" +# Wait until RAS BERT process completed +wait_bert_complete +bert_timeout=$? +# If the crash capture process is crash or works unstable, it does +# not unmask the power action. We should call unmask here to make sure +# the power control is unmasked +if [[ "${bert_timeout}" == "1" ]]; then + unmask_reboot_targets + unmask_off_targets +fi diff --git a/meta-ampere/meta-mitchell/recipes-ampere/platform/ampere-utils/ampere_power_util.sh b/meta-ampere/meta-mitchell/recipes-ampere/platform/ampere-utils/ampere_power_util.sh index 807f7e7d8b68..624eeec4c2e3 100644 --- a/meta-ampere/meta-mitchell/recipes-ampere/platform/ampere-utils/ampere_power_util.sh +++ b/meta-ampere/meta-mitchell/recipes-ampere/platform/ampere-utils/ampere_power_util.sh @@ -1,8 +1,6 @@ #!/bin/bash # shellcheck disable=SC2046 -# shellcheck source=meta-ampere/meta-common/recipes-ampere/platform/ampere-utils/ampere_power_control_lock.sh -source /usr/sbin/ampere_power_control_lock.sh # Usage of this utility function usage() { @@ -85,44 +83,7 @@ force_reset() { gpioset $(gpiofind host0-sysreset-n)=1 } -wait_bert_complete() { - # Wait maximum 60 seconds for BERT completed - cnt=20 - while [ $cnt -gt 0 ] - do - bert_done=$(busctl get-property com.ampere.CrashCapture.Trigger /com/ampere/crashcapture/trigger com.ampere.CrashCapture.Trigger TriggerActions | cut -d"." -f6) - if ! [ "$bert_done" == "Done\"" ]; then - sleep 3 - cnt=$((cnt - 1)) - else - break - fi - done - if [ "$cnt" -eq "0" ]; then - echo "Timeout 60 seconds, BERT is still not completed" - return 1 - fi - return 0 -} - host_reboot_wa() { - echo "Notify Crash Capture reboot action from host." - busctl set-property com.ampere.CrashCapture.Trigger \ - /com/ampere/crashcapture/trigger \ - com.ampere.CrashCapture.Trigger \ - TriggerProcess b true - bert_timeout="0" - # Wait until RAS BERT process completed - wait_bert_complete - bert_timeout=$? - # If the crash capture process is crash or works unstable, it does - # not unmask the power action. We should call unmask here to make sure - # the power control is unmasked - if [[ "${bert_timeout}" == "1" ]]; then - unmask_reboot_targets - unmask_off_targets - fi - busctl set-property xyz.openbmc_project.State.Chassis \ /xyz/openbmc_project/state/chassis0 xyz.openbmc_project.State.Chassis \ RequestedPowerTransition s "xyz.openbmc_project.State.Chassis.Transition.Off" @@ -136,12 +97,10 @@ host_reboot_wa() { done echo "The power is already Off." - # Keep the system off if BERT is timeout - if [[ "${bert_timeout}" == "0" ]]; then - busctl set-property xyz.openbmc_project.State.Host \ - /xyz/openbmc_project/state/host0 xyz.openbmc_project.State.Host \ - RequestedHostTransition s "xyz.openbmc_project.State.Host.Transition.On" - fi + busctl set-property xyz.openbmc_project.State.Host \ + /xyz/openbmc_project/state/host0 xyz.openbmc_project.State.Host \ + RequestedHostTransition s "xyz.openbmc_project.State.Host.Transition.On" + } if [ ! -d "/run/openbmc/" ]; then