Skip to content

Commit

Permalink
overhauling the comp_and_load_drivers.sh script
Browse files Browse the repository at this point in the history
  • Loading branch information
ruck314 committed Jul 9, 2024
1 parent 7181767 commit a4e2c64
Showing 1 changed file with 28 additions and 22 deletions.
50 changes: 28 additions & 22 deletions data_gpu/driver/comp_and_load_drivers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,43 +7,49 @@ if [ "$EUID" -ne 0 ]; then
fi

# Get the gcc that kernel was built with
version_info=$(cat /proc/version)
CC=$(echo "$version_info" | grep -oP '\b\w+-\w+-gcc-\d+\b')
version_content=$(cat /proc/version)
CC=$(echo "$version_content" | grep -oP 'x86_64-linux-gnu-gcc-\d+')
echo "CC: $CC"

# Define Nvidia path
output=$(find /usr -name nv-p2p.h)
NVIDIA_PATH=$(echo "$output" | grep -oP '^/usr/src/nvidia-\d+\.\d+\.\d+')
output=$(find /usr -name nv-p2p.h 2>/dev/null)
NVIDIA_PATH=$(echo "$output" | grep -oP '^/usr/src/nvidia-\d+\.\d+\.\d+' | head -n 1)
echo "Using Nvidia path: $NVIDIA_PATH"

# Return directory
RET_DIR=$PWD
echo "Using RET_DIR: $RET_DIR"

# Stop the Xserver and nvidia-persistenced to prevent rmmod due to Module XXX is in use by: YYY
# https://forums.developer.nvidia.com/t/cant-install-new-driver-cannot-unload-module/63639
systemctl stop gdm # For GNOME Display Manager
systemctl stop lightdm # For LightDM
systemctl stop sddm # For SDDM
systemctl stop nvidia-persistenced

# Remove existing Nvidia modules (if any)
/usr/sbin/rmmod datagpu 2>/dev/null
/usr/sbin/rmmod nvidia-drm 2>/dev/null
/usr/sbin/rmmod nvidia-uvm 2>/dev/null
/usr/sbin/rmmod nvidia-modeset 2>/dev/null
/usr/sbin/rmmod nvidia 2>/dev/null
/usr/sbin/rmmod datagpu
/usr/sbin/rmmod nvidia-drm
/usr/sbin/rmmod nvidia-uvm
/usr/sbin/rmmod nvidia-modeset
/usr/sbin/rmmod nvidia

# Go to nvidia path and build
cd $NVIDIA_PATH
make
# Go to nvidia path and build Nvidia driver
cd "$NVIDIA_PATH" || { echo "Error: Failed to change directory to $NVIDIA_PATH"; exit 1; }
make CC=$CC

if modinfo ecc >/dev/null 2>&1; then
modprobe ecc || { echo "Error: Failed to insert ecc module."; exit 1; }
fi

/usr/sbin/insmod nvidia.ko NVreg_OpenRmEnableUnsupportedGpus=1 NVreg_EnableStreamMemOPs=1 || { echo "Error: Failed to insert nvidia.ko."; exit 1; }

/usr/sbin/insmod nvidia-modeset.ko || { echo "Error: Failed to insert nvidia-modeset.ko."; exit 1; }

/usr/sbin/insmod nvidia-uvm.ko || { echo "Error: Failed to insert nvidia-uvm.ko."; exit 1; }

/usr/sbin/insmod nvidia-drm.ko modeset=1 || { echo "Error: Failed to insert nvidia-drm.ko."; exit 1; }
# Load the nvidia kernel drivers
/usr/sbin/insmod $NVIDIA_PATH/nvidia.ko NVreg_OpenRmEnableUnsupportedGpus=1 NVreg_EnableStreamMemOPs=1 || { echo "Error: Failed to insert nvidia.ko."; exit 1; }
/usr/sbin/insmod $NVIDIA_PATH/nvidia-modeset.ko || { echo "Error: Failed to insert nvidia-modeset.ko."; exit 1; }
/usr/sbin/insmod $NVIDIA_PATH/nvidia-drm.ko modeset=1 || { echo "Error: Failed to insert nvidia-drm.ko."; exit 1; }
/usr/sbin/insmod $NVIDIA_PATH/nvidia-uvm.ko || { echo "Error: Failed to insert nvidia-uvm.ko."; exit 1; }

cd $RET_DIR
# Go to nvidia path and build Nvidia driver
cd "$RET_DIR" || { echo "Error: Failed to change directory to $RET_DIR"; exit 1; }
make CC=$CC NVIDIA_DRIVERS=$NVIDIA_PATH
/usr/sbin/insmod $RET_DIR/datagpu.ko || { echo "Error: Failed to insert datagpu.ko."; exit 1; }

make NVIDIA_DRIVERS=$NVIDIA_PATH
/usr/sbin/insmod datagpu.ko || { echo "Error: Failed to insert datagpu.ko."; exit 1; }

0 comments on commit a4e2c64

Please sign in to comment.