Skip to content

Commit

Permalink
meta-ampere: boot-progress: update /sys paths
Browse files Browse the repository at this point in the history
Update new smpro-misc and smpro-errmon /sys FS paths so that boot
progress handling can find data for BootProgress logging and DIMM
training failure.

Verified by:
+ Checked boot progress is correctly reported

Signed-off-by: Quan Nguyen <[email protected]>
  • Loading branch information
qnguyen-ampere authored and thangqn-ampere committed Apr 29, 2022
1 parent 2048ee3 commit c898d18
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 138 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ uefi_code=0x00000000

function set_postcode()
{
# shellcheck disable=SC2086
postcode=$( printf "0x%02x%02x%08x" $1 $2 $3 )
busctl set-property xyz.openbmc_project.State.Boot.Raw \
/xyz/openbmc_project/state/boot/raw0 \
xyz.openbmc_project.State.Boot.Raw Value \(tay\) $postcode 0
xyz.openbmc_project.State.Boot.Raw Value \(tay\) "$postcode" 0
}

function update_boot_progress()
Expand All @@ -20,7 +21,7 @@ function update_boot_progress()
/xyz/openbmc_project/state/host0 \
xyz.openbmc_project.State.Boot.Progress \
BootProgress s \
xyz.openbmc_project.State.Boot.Progress.ProgressStages.$bootprog
"xyz.openbmc_project.State.Boot.Progress.ProgressStages.$bootprog"
}

function get_boot_stage_string()
Expand Down Expand Up @@ -68,7 +69,7 @@ function get_boot_stage_string()

esac

echo $boot_stage_str
echo "$boot_stage_str"
}

function set_boot_progress()
Expand Down Expand Up @@ -110,7 +111,7 @@ EOF

function log_redfish_bios_panic_event()
{
boot_state_str=$(get_boot_stage_string $1 $2)
boot_state_str=$(get_boot_stage_string "$1" "$2")

logger-systemd --journald << EOF
MESSAGE=
Expand All @@ -130,12 +131,12 @@ while [ $cnt -lt 100 ];
do
# Sleep 200ms
usleep 200000
bg=(`cat /sys/bus/i2c/devices/2-004f/1e78a0c0.i2c-bus:smpro@4f:misc/boot_progress`)
if [ $? -ne 0 ]; then
if ! read -r -a bg <<< "$(cat /sys/bus/platform/devices/smpro-misc.2.auto/boot_progress)";
then
cnt=$((cnt + 1))
# When boot-progress is running but suddenly off or reboot,
# the /sys interface is unavailable. Stop executing the script
if [ ${host_booted} == "0" ];
if [ "${host_booted}" == "0" ];
then
break
else
Expand All @@ -146,12 +147,12 @@ do
cnt=0

# Check if any update from previous check
if ([ "${boot_stage}" == "${bg[0]}" ] && [ "${boot_status}" == "${bg[1]}" ] && [ "${uefi_code}" == "${bg[2]}" ]); then
if [ "${boot_stage}" == "${bg[0]}" ] && [ "${boot_status}" == "${bg[1]}" ] && [ "${uefi_code}" == "${bg[2]}" ]; then
continue
fi

# Check if the Host is already ON or not. If Host is already boot, update boot progress and break.
if ([ ${boot_stage} == "0x00" ] && [ ${bg[0]} == "0x08" ]);
if [ "${boot_stage}" == "0x00" ] && [ "${bg[0]}" == "0x08" ];
then
update_boot_progress "OSStart"
break
Expand All @@ -165,24 +166,24 @@ do
echo "Boot Progress = ${boot_stage} ${boot_status} ${uefi_code}"

# Log Boot Progress to dbus
if [ ${boot_status} == "0x03" ]; then
if [ "${boot_status}" == "0x03" ]; then
# Log Redfish Event if failure.
log_redfish_bios_panic_event $boot_stage $uefi_code
log_redfish_bios_panic_event "$boot_stage" "$uefi_code"
# Dimm training failed, check errors
if [ ${boot_stage} == "0x04" ]; then
if [ "${boot_stage}" == "0x04" ]; then
/usr/sbin/dimm_train_fail_log.sh 0
/usr/sbin/dimm_train_fail_log.sh 1
fi
elif [ ${boot_status} == "0x01" ]; then
elif [ "${boot_status}" == "0x01" ]; then
# Check and set boot progress to dbus
set_boot_progress $boot_stage $uefi_code
set_boot_progress "$boot_stage" "$uefi_code"
fi

# Log POST Code to dbus.
set_postcode $boot_stage $boot_status $uefi_code
set_postcode "$boot_stage" "$boot_status" "$uefi_code"

# Stop the service when booting to OS
if ([ ${boot_stage} == "0x08" ] && [ ${boot_status} == "0x02" ]);
if [ "${boot_stage}" == "0x08" ] && [ "${boot_status}" == "0x02" ];
then
update_boot_progress "OSStart"
log_redfish_biosboot_ok_event
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
#!/bin/bash
syndrome_path() {
if [ $1 == 0 ]; then
echo "/sys/bus/platform/drivers/smpro-errmon/1e78a0c0.i2c-bus:smpro@4f:errmon/event_dimm_syndrome"
else
echo "/sys/bus/platform/drivers/smpro-errmon/1e78a0c0.i2c-bus:smpro@4e:errmon/event_dimm_syndrome"
fi
if [ "$1" == 0 ]; then
echo "/sys/bus/platform/devices/smpro-errmon.1.auto/event_dimm_syndrome"
else
echo "/sys/bus/platform/devices/smpro-errmon.4.auto/event_dimm_syndrome"
fi
}

function log_ampere_oem_redfish_event()
{
msg=$1
priority=$2
severity=$3
msgID=$4
msgArgs1=$5
msgArgs2=$6
logger-systemd --journald << EOF
msg=$1
priority=$2
severity=$3
msgID=$4
msgArgs1=$5
msgArgs2=$6

logger-systemd --journald << EOF
MESSAGE=${msg}
PRIORITY=${priority}
SEVERITY=${severity}
Expand All @@ -25,140 +26,141 @@ EOF
}

parse_phy_syndrome_s1_type() {
s1=$1
slice=$((s1 & 0xf))
ubit=$(((s1 & 0x10) >> 4))
lbit=$(((s1 & 0x20) >> 5))
uMsg="Upper Nibble: No Error"
lMsg="Lower Nibble: No Error"
if [ $ubit == 1 ]; then
uMsg="Upper Nibble: No rising edge error"
fi
if [ $lbit == 1 ]; then
lMsg="Lower Nibble: No rising edge error"
fi
echo "Slice $slice: $uMsg, $lMsg"
s1=$1
slice=$((s1 & 0xf))
ubit=$(((s1 & 0x10) >> 4))
lbit=$(((s1 & 0x20) >> 5))
uMsg="Upper Nibble: No Error"
lMsg="Lower Nibble: No Error"
if [ $ubit == 1 ]; then
uMsg="Upper Nibble: No rising edge error"
fi
if [ $lbit == 1 ]; then
lMsg="Lower Nibble: No rising edge error"
fi
echo "Slice $slice: $uMsg, $lMsg"
}

parse_phy_syndrome() {
s0=$1
s1=$2
case $s0 in
1)
echo "PHY Training Setup failure"
;;
2)
s1Msg=$(parse_phy_syndrome_s1_type $s1)
echo "PHY Write Leveling failure: $s1Msg"
;;
3)
echo "PHY Read Gate Leveling failure"
;;
4)
echo "PHY Read Leveling failure"
;;
5)
echo "PHY Software Training failure"
;;
*)
echo "N/A"
;;
esac
s0=$1
s1=$2
case $s0 in
1)
echo "PHY Training Setup failure"
;;
2)
s1Msg=$(parse_phy_syndrome_s1_type "$s1")
echo "PHY Write Leveling failure: $s1Msg"
;;
3)
echo "PHY Read Gate Leveling failure"
;;
4)
echo "PHY Read Leveling failure"
;;
5)
echo "PHY Software Training failure"
;;
*)
echo "N/A"
;;
esac
}

parse_dimm_syndrome() {
s0=$1
case $s0 in
1)
echo "DRAM VREFDQ Training failure"
;;
2)
echo "LRDIMM DB Training failure"
;;
3)
echo "LRDIMM DB Software Training failure"
;;
*)
echo "N/A"
;;
esac
s0=$1
case $s0 in
1)
echo "DRAM VREFDQ Training failure"
;;
2)
echo "LRDIMM DB Training failure"
;;
3)
echo "LRDIMM DB Software Training failure"
;;
*)
echo "N/A"
;;
esac
}

log_err_to_redfish_err() {
err="$(printf '%d' "0x$1" 2>/dev/null)"
channel="$(printf '%d' "0x$2" 2>/dev/null)"
data="$(printf '%d' "0x$3" 2>/dev/null)"
trErrType=$((data & 0x03))
rank=$(((data & 0x1C) >> 2))
syndrome0=$(((data & 0xE0) >> 5))
syndrome1=$(((data & 0xFF00) >> 8))
#phy sysdrom errors
fType=""
redfisComp="DIMM"
redfisMsg=""
if [ $trErrType == 1 ]; then
fType="PHY training failure"
redfisMsg=$(parse_phy_syndrome $syndrome0 $syndrome1)
#dimm traning errors
elif [ $trErrType == 2 ]; then
fType="DIMM training failure"
redfisMsg=$(parse_dimm_syndrome $syndrome0)
else
fType="Invalid DIMM Syndrome error type"
redfisMsg="NA"
fi
#smg=$("DDR training: MCU rank $rank: $fType: $redfisMsg")
log_ampere_oem_redfish_event \
"" 2 "" "OpenBMC.0.1.AmpereCritical.Critical" \
$redfisComp "Slot $channel MCU rank $rank: $fType: $redfisMsg"
channel="$(printf '%d' "0x$2" 2>/dev/null)"
data="$(printf '%d' "0x$3" 2>/dev/null)"
trErrType=$((data & 0x03))
rank=$(((data & 0x1C) >> 2))
syndrome0=$(((data & 0xE0) >> 5))
syndrome1=$(((data & 0xFF00) >> 8))

# PHY sysdrom errors
fType=""
redfisComp="DIMM"
redfisMsg=""
if [ $trErrType == 1 ]; then
fType="PHY training failure"
redfisMsg=$(parse_phy_syndrome $syndrome0 $syndrome1)
# DIMM traning errors
elif [ $trErrType == 2 ]; then
fType="DIMM training failure"
redfisMsg=$(parse_dimm_syndrome $syndrome0)
else
fType="Invalid DIMM Syndrome error type"
redfisMsg="NA"
fi

#smg=$("DDR training: MCU rank $rank: $fType: $redfisMsg")
log_ampere_oem_redfish_event \
"" 2 "" "OpenBMC.0.1.AmpereCritical.Critical" \
$redfisComp "Slot $channel MCU rank $rank: $fType: $redfisMsg"
}

log_err_to_sel_err() {
channel="$(printf '%d' "0x$2" 2>/dev/null)"
data="$(printf '%d' "0x$3" 2>/dev/null)"
byte0=$(((data & 0xff00) >> 8))
byte1=$((data & 0xff))
evtdata0=$(($EVENT_DIR_ASSERTION | $OEM_SENSOR_SPECIFIC))
evtdata1=$((($channel << 4) | $BOOT_SYNDROME_DATA | ($socket << 3)))
#phy sysdrom errors
#OEM data bytes
# oem id: 3 bytes [0x3a 0xcd 0x00]
# sensor num: 1 bytes
# sensor type: 1 bytes
# data bytes: 4 bytes
# sel type: 4 byte [0x00 0x00 0x00 0xC0]
busctl call xyz.openbmc_project.Logging.IPMI \
/xyz/openbmc_project/Logging/IPMI \
xyz.openbmc_project.Logging.IPMI IpmiSelAddOem \
sayy "" 12 \
0x3a 0xcd 0x00 \
$SENSOR_TYPE_SYSTEM_FW_PROGRESS $SENSOR_BOOT_PROGRESS \
$evtdata0 $evtdata1 $byte0 $byte1\
0x00 0x00 0x00 0xC0
channel="$(printf '%d' "0x$2" 2>/dev/null)"
data="$(printf '%d' "0x$3" 2>/dev/null)"
byte0=$(((data & 0xff00) >> 8))
byte1=$((data & 0xff))
evtdata0=$((EVENT_DIR_ASSERTION | OEM_SENSOR_SPECIFIC))
evtdata1=$(((channel << 4) | BOOT_SYNDROME_DATA | (socket << 3)))

# phy sysdrom errors
# OEM data bytes
# oem id: 3 bytes [0x3a 0xcd 0x00]
# sensor num: 1 bytes
# sensor type: 1 bytes
# data bytes: 4 bytes
# sel type: 4 byte [0x00 0x00 0x00 0xC0]
busctl call xyz.openbmc_project.Logging.IPMI \
/xyz/openbmc_project/Logging/IPMI \
xyz.openbmc_project.Logging.IPMI IpmiSelAddOem \
sayy "" 12 \
0x3a 0xcd 0x00 \
"$SENSOR_TYPE_SYSTEM_FW_PROGRESS" "$SENSOR_BOOT_PROGRESS" \
"$evtdata0" "$evtdata1" "$byte0" "$byte1" \
0x00 0x00 0x00 0xC0
}

BOOT_SYNDROME_DATA=4
SENSOR_BOOT_PROGRESS=235
EVENT_DIR_ASSERTION=0x00
EVENT_DIR_DEASSERTION=0x01
OEM_SENSOR_SPECIFIC=0x70
SENSOR_TYPE_SYSTEM_FW_PROGRESS=0x0F

socket=$1
filename=$(syndrome_path $socket)
if [ ! -f $filename ]; then
echo "Can not find event_dimm_syndrome of socket $socket"
exit 0;
filename=$(syndrome_path "$socket")
if [ ! -f "$filename" ]; then
echo "Can not find event_dimm_syndrome of socket $socket"
exit 0;
fi

echo "File syndrome $filename"
n=0
cat $filename | while read line; do
# reading each line
n=$((n+1))
log_err_to_redfish_err ${line[0]} ${line[1]} ${line[2]}
log_err_to_sel_err ${line[0]} ${line[1]} ${line[2]}
usleep 300000
done
while read -r line; do
# reading each line
n=$((n+1))
log_err_to_redfish_err "${line[0]}" "${line[1]}" "${line[2]}"
log_err_to_sel_err "${line[0]}" "${line[1]}" "${line[2]}"
usleep 300000
done < "$filename"

exit 0;

0 comments on commit c898d18

Please sign in to comment.