Skip to content

Commit

Permalink
meta-ampere: mtmitchell: Support always retrieve BERT when power cycl…
Browse files Browse the repository at this point in the history
…e/reset/off

When BERT crash happens, depend on the Host UEFI firmware configuration,
the Host might not be rebooted immediately so no REBOOT_ACK from Host
for the BMC to check BERT crash. This might cause BERT crash is not
handled.
This commit changes to check BERT every time the Host is powered cycle,
reset or off to handle above case.

Signed-off-by: Dung Cao <[email protected]>
  • Loading branch information
dcao-ampere authored and thangqn-ampere committed May 6, 2024
1 parent 29f12ca commit 3365d3d
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,24 @@ FILESEXTRAPATHS:append := "${THISDIR}/${PN}:"
SRC_URI += " \
file://ampere_power_on_failure_check.sh \
file://ampere-host-on-host-check-override.conf \
file://ampere-bert-power-handle.service \
file://ampere-bert-power-handle.sh \
"

FILES:${PN} += "${systemd_system_unitdir}/[email protected]"
FILES:${PN} += " \
${systemd_system_unitdir}/[email protected] \
${systemd_system_unitdir}/ampere-bert-power-handle.service \
"
AMPERE_BERT_TMPL = "ampere-bert-power-handle.service"
AMPERE_BERT_INSTMPL = "ampere-bert-power-handle.service"
AMPERE_BERT_TGTFMT = "obmc-host-stop@{0}.target"
AMPERE_BERT_FMT = "../${AMPERE_BERT_TMPL}:${AMPERE_BERT_TGTFMT}.requires/${AMPERE_BERT_INSTMPL}"
SYSTEMD_LINK:${PN} += "${@compose_list_zip(d, 'AMPERE_BERT_FMT', 'OBMC_HOST_INSTANCES')}"
SYSTEMD_SERVICE:${PN} += "${AMPERE_BERT_TMPL}"

do_install:append() {
install -m 0755 ${WORKDIR}/ampere_power_on_failure_check.sh ${D}/${sbindir}/
install -m 0755 ${WORKDIR}/ampere-bert-power-handle.sh ${D}/${sbindir}/

install -d ${D}${systemd_system_unitdir}/ampere-host-on-host-check@0.service.d
install -m 644 ${WORKDIR}/ampere-host-on-host-check-override.conf \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[Unit]
Description=Ampere BERT handler for power action of the host
After[email protected]
Before[email protected]

[Service]
Restart=no
ExecStart=/usr/sbin/ampere-bert-power-handle.sh
SyslogIdentifier=ampere-bert-power-handle
Type=oneshot
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash

# shellcheck disable=SC2046
# shellcheck source=meta-ampere/meta-common/recipes-ampere/platform/ampere-utils/ampere_power_control_lock.sh
source /usr/sbin/ampere_power_control_lock.sh

wait_bert_complete() {
# Wait maximum 60 seconds for BERT completed
cnt=20
while [ $cnt -gt 0 ]
do
bert_done=$(busctl get-property com.ampere.CrashCapture.Trigger /com/ampere/crashcapture/trigger com.ampere.CrashCapture.Trigger TriggerActions | cut -d"." -f6)
if ! [ "$bert_done" == "Done\"" ]; then
sleep 3
cnt=$((cnt - 1))
else
break
fi
done
if [ "$cnt" -eq "0" ]; then
echo "Timeout 60 seconds, BERT is still not completed"
return 1
fi
return 0
}


echo "Notify Crash Capture to read BERT."
busctl set-property com.ampere.CrashCapture.Trigger \
/com/ampere/crashcapture/trigger \
com.ampere.CrashCapture.Trigger \
TriggerProcess b true
bert_timeout="0"
# Wait until RAS BERT process completed
wait_bert_complete
bert_timeout=$?
# If the crash capture process is crash or works unstable, it does
# not unmask the power action. We should call unmask here to make sure
# the power control is unmasked
if [[ "${bert_timeout}" == "1" ]]; then
unmask_reboot_targets
unmask_off_targets
fi
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#!/bin/bash

# shellcheck disable=SC2046
# shellcheck source=meta-ampere/meta-common/recipes-ampere/platform/ampere-utils/ampere_power_control_lock.sh
source /usr/sbin/ampere_power_control_lock.sh

# Usage of this utility
function usage() {
Expand Down Expand Up @@ -85,44 +83,7 @@ force_reset() {
gpioset $(gpiofind host0-sysreset-n)=1
}

wait_bert_complete() {
# Wait maximum 60 seconds for BERT completed
cnt=20
while [ $cnt -gt 0 ]
do
bert_done=$(busctl get-property com.ampere.CrashCapture.Trigger /com/ampere/crashcapture/trigger com.ampere.CrashCapture.Trigger TriggerActions | cut -d"." -f6)
if ! [ "$bert_done" == "Done\"" ]; then
sleep 3
cnt=$((cnt - 1))
else
break
fi
done
if [ "$cnt" -eq "0" ]; then
echo "Timeout 60 seconds, BERT is still not completed"
return 1
fi
return 0
}

host_reboot_wa() {
echo "Notify Crash Capture reboot action from host."
busctl set-property com.ampere.CrashCapture.Trigger \
/com/ampere/crashcapture/trigger \
com.ampere.CrashCapture.Trigger \
TriggerProcess b true
bert_timeout="0"
# Wait until RAS BERT process completed
wait_bert_complete
bert_timeout=$?
# If the crash capture process is crash or works unstable, it does
# not unmask the power action. We should call unmask here to make sure
# the power control is unmasked
if [[ "${bert_timeout}" == "1" ]]; then
unmask_reboot_targets
unmask_off_targets
fi

busctl set-property xyz.openbmc_project.State.Chassis \
/xyz/openbmc_project/state/chassis0 xyz.openbmc_project.State.Chassis \
RequestedPowerTransition s "xyz.openbmc_project.State.Chassis.Transition.Off"
Expand All @@ -136,12 +97,10 @@ host_reboot_wa() {
done
echo "The power is already Off."

# Keep the system off if BERT is timeout
if [[ "${bert_timeout}" == "0" ]]; then
busctl set-property xyz.openbmc_project.State.Host \
/xyz/openbmc_project/state/host0 xyz.openbmc_project.State.Host \
RequestedHostTransition s "xyz.openbmc_project.State.Host.Transition.On"
fi
busctl set-property xyz.openbmc_project.State.Host \
/xyz/openbmc_project/state/host0 xyz.openbmc_project.State.Host \
RequestedHostTransition s "xyz.openbmc_project.State.Host.Transition.On"

}

if [ ! -d "/run/openbmc/" ]; then
Expand Down

0 comments on commit 3365d3d

Please sign in to comment.