Skip to content

Commit

Permalink
flashy: remediation: disable kernel hung_task_panic
Browse files Browse the repository at this point in the history
Summary:
Disable the "hung_task_panic" as a common remediation.

We have seen on some Yv4 systems this kernel panic.  The default
behavior per the kernel documentation is disabled, but there is
a lesser documented CONFIG_BOOTPARAM_HUNG_TASK_PANIC which is the
real default (and which somehow is enabled presently).  By setting
to disabled we can reduce this type of kernel panic during flash
updates.

Test Plan: Go tests.

Reviewed By: doranand

Differential Revision: D66163794

fbshipit-source-id: 37b9363800cc24a5c69f13c61dcef720dedd25ac
  • Loading branch information
williamspatrick authored and facebook-github-bot committed Nov 19, 2024
1 parent b87cabb commit b17b25b
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/**
* Copyright 2020-present Facebook. All Rights Reserved.
*
* This program file is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; version 2 of the License.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program in a file named COPYING; if not, write to the
* Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301 USA
*/

package common

import (
"time"

"github.com/facebook/openbmc/tools/flashy/lib/fileutils"
"github.com/facebook/openbmc/tools/flashy/lib/step"
"github.com/pkg/errors"
)

func init() {
step.RegisterStep(disableHangPanic)
}

const disableHangPanicFilePath = "/proc/sys/kernel/hung_task_panic"

// disableHangPanic disables the "hung_task_panic".
func disableHangPanic(stepParams step.StepParams) step.StepExitError {
err := fileutils.WriteFileWithTimeout(
disableHangPanicFilePath, []byte("0"), 0644, 30*time.Second,
)
if err != nil {
errMsg := errors.Errorf("Failed to write to hang_task_panic file '%v': %v", disableHangPanicFilePath, err)
return step.ExitSafeToReboot{Err: errMsg}
}
return nil
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/**
* Copyright 2020-present Facebook. All Rights Reserved.
*
* This program file is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; version 2 of the License.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program in a file named COPYING; if not, write to the
* Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301 USA
*/

package common

import (
"os"
"testing"
"time"

"github.com/facebook/openbmc/tools/flashy/lib/fileutils"
"github.com/facebook/openbmc/tools/flashy/lib/step"
"github.com/pkg/errors"
)

func TestDisableHangPanic(t *testing.T) {
// mock fileutils.WriteFileWithTimeout to return nil if the write is correct
writeFileOrig := fileutils.WriteFileWithTimeout
defer func() {
fileutils.WriteFileWithTimeout = writeFileOrig
}()

cases := []struct {
name string
writeFileErr error
want step.StepExitError
}{
{
name: "succeeded",
writeFileErr: nil,
want: nil,
},
{
name: "WriteFile failed",
writeFileErr: errors.Errorf("WriteFile failed"),
want: step.ExitSafeToReboot{
Err: errors.Errorf(
"Failed to write to hang_task_panic file '/proc/sys/kernel/hung_task_panic': WriteFile failed",
),
},
},
}

wantFilename := "/proc/sys/kernel/hung_task_panic"
wantDataString := "0"

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
fileutils.WriteFileWithTimeout = func(filename string, data []byte, perm os.FileMode, timeout time.Duration) error {
if filename != wantFilename {
return errors.Errorf("filename: want %v got %v", wantFilename, filename)
}
if string(data) != wantDataString {
return errors.Errorf("data: want %v got %v", wantDataString, string(data))
}
return tc.writeFileErr
}

got := disableHangPanic(step.StepParams{})
step.CompareTestExitErrors(tc.want, got, t)
})
}
}

0 comments on commit b17b25b

Please sign in to comment.