Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing index & type errors in prime_offload_tester.py (Bugfix) #1588

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 26 additions & 14 deletions providers/base/bin/prime_offload_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import sys
import re
import os
import typing as T
from shlex import split as sh_split


class PrimeOffloader:
Expand All @@ -48,7 +50,7 @@

def find_file_containing_string(
self, search_directory: str, filename_pattern: str, search_string: str
) -> str:
) -> T.Optional[str]:
"""
Search for a file matching a specific pattern
that contains a given string.
Expand Down Expand Up @@ -89,6 +91,7 @@
card_path = self.find_file_containing_string(
"/sys/kernel/debug/dri", "name", pci_bdf
)
assert card_path, "Couldn't find a card named: {}".format(pci_bdf)
return card_path.split("/")[5]
except IndexError as e:
raise SystemExit("return value format error {}".format(repr(e)))
Expand Down Expand Up @@ -139,7 +142,7 @@
return ""

def check_offload(
self, cmd: list, card_id: str, card_name: str, timeout: int
self, cmd: T.List[str], card_id: str, card_name: str, timeout: int
):
"""
Used to check if the provided command is executed on a specific GPU.
Expand Down Expand Up @@ -168,7 +171,7 @@
self.logger.info(" Device Name:[{}]".format(card_name))
return
self.logger.info("Checking fail:")
self.logger.info(" Couldn't find process [{}]".format(cmd))
self.logger.info(" Couldn't find process {}".format(cmd))
self.check_result = True

def _find_bdf(self, card_id: str):
Expand All @@ -182,7 +185,7 @@
data_in_name = f.read()
return data_in_name.split()[1].split("=")[1]

def find_offload(self, cmd: str, timeout: int):
def find_offload(self, cmd_str: str, timeout: int):
"""
Find the card that the command is running on.
This script looks for the card on which a specific command is running.
Expand All @@ -200,7 +203,7 @@

deadline = time.time() + timeout

cmd = cmd.split()
cmd = sh_split(cmd_str)

while time.time() < deadline:
time.sleep(delay)
Expand All @@ -209,15 +212,16 @@
card_path = self.find_file_containing_string(
directory, "clients", cmd[0]
)
if directory in card_path:

if card_path and directory in card_path:
try:
# The graphic will be shown such as 0 and 128
# at the same time. Therefore, pick up the first one
first_card = card_path.splitlines()[0]
card_id = first_card.split("/")[5]
bdf = self._find_bdf(card_id)
self.logger.info("Process is running on:")
self.logger.info(" process:[{}]".format(cmd))
self.logger.info(" process:[{}]".format(cmd[0]))
self.logger.info(
" Card ID:[{}]".format(self.find_card_id(bdf))
)
Expand All @@ -229,8 +233,9 @@
self.logger.info(
"Finding card information failed {}".format(repr(e))
)

self.logger.info("Checking fail:")
self.logger.info(" Couldn't find process [{}]".format(cmd))
self.logger.info(" Couldn't find process {}".format(cmd))
self.check_result = True

def check_nv_offload_env(self):
Expand Down Expand Up @@ -262,7 +267,7 @@
"No prime-select, it should be ok to run prime offload"
)

def cmd_runner(self, cmd: list, env: dict = None):
def cmd_runner(self, cmd: T.List[str], env: T.Optional[T.Dict] = None):
"""
use to execute command and piping the output to the screen.
Expand All @@ -283,7 +288,8 @@

# redirect command output real time
while runner.poll() is None:
line = runner.stdout.readline().strip()
# when stdout=subprocess.PIPE, stdout is not None
line = runner.stdout.readline().strip() # type: ignore

Check warning on line 292 in providers/base/bin/prime_offload_tester.py

View check run for this annotation

Codecov / codecov/patch

providers/base/bin/prime_offload_tester.py#L292

Added line #L292 was not covered by tests
self.logger.info(line)
except subprocess.CalledProcessError as e:
raise SystemExit("run command failed {}".format(repr(e)))
Expand Down Expand Up @@ -313,7 +319,9 @@
if self.check_result:
raise SystemExit("Couldn't find process running on GPU")

def cmd_checker(self, cmd: str, pci_bdf: str, driver: str, timeout: int):
def cmd_checker(
self, cmd_str: str, pci_bdf: str, driver: str, timeout: int
):
"""
run offload command and check it runs on correct GPU
Expand All @@ -331,7 +339,7 @@
# run offload command in other process
dri_pci_bdf_format = re.sub("[:.]", "_", pci_bdf)

if "timeout" in cmd:
if "timeout" in cmd_str:
raise SystemExit("Put timeout in command isn't allowed")

env = os.environ.copy()
Expand All @@ -349,19 +357,22 @@
# if nv driver under nvidia mode, prime/reverse prime couldn't work.
self.check_nv_offload_env()

cmd = sh_split(cmd_str)
# use other thread to check offload is correctly or not
check_thread = threading.Thread(
target=self.check_offload, args=(cmd, card_id, card_name, timeout)
)
check_thread.start()
try:
run_with_timeout(self.cmd_runner, timeout, cmd.split(), env)
run_with_timeout(self.cmd_runner, timeout, cmd, env)
except TimeoutError:
self.logger.info("Test finished")
check_thread.join()

if self.check_result:
raise SystemExit("offload to specific GPU failed")
raise SystemExit(
"offload to specific GPU: {} failed".format(pci_bdf)
)

def parse_args(self, args=sys.argv[1:]):
"""
Expand Down Expand Up @@ -428,4 +439,5 @@


if __name__ == "__main__":
assert os.getuid() == 0, "This test must be run as root"
PrimeOffloader().main()
10 changes: 5 additions & 5 deletions providers/base/tests/test_prime_offload_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ def test_pci_name_digital_error_format_check(self, mock_cmd):
def test_empty_string_id_not_found(self, mock_cmd):
po = PrimeOffloader()
# empty string
mock_cmd.return_value = ""
with self.assertRaises(SystemExit):
mock_cmd.return_value = None
with self.assertRaises(AssertionError):
po.find_card_id("0000:00:00.0")
mock_cmd.assert_called_with(
"/sys/kernel/debug/dri",
Expand Down Expand Up @@ -578,7 +578,7 @@ def test_non_nv_driver_check(self):
os.environ.copy = MagicMock(return_value={})
po.cmd_checker("glxgears", "0000:00:00.0", "xxx", 0)
# check check_offload function get correct args
po.check_offload.assert_called_with("glxgears", "0", "Intel", 0)
po.check_offload.assert_called_with(["glxgears"], "0", "Intel", 0)

@patch("prime_offload_tester.run_with_timeout", MagicMock())
def test_nv_driver_check(self):
Expand All @@ -591,7 +591,7 @@ def test_nv_driver_check(self):
os.environ.copy = MagicMock(return_value={})
po.cmd_checker("glxgears", "0000:00:00.0", "nvidia", 1)
# check check_offload function get correct args
po.check_offload.assert_called_with("glxgears", "0", "NV", 1)
po.check_offload.assert_called_with(["glxgears"], "0", "NV", 1)

@patch("prime_offload_tester.run_with_timeout")
@patch("threading.Thread")
Expand All @@ -608,7 +608,7 @@ def test_not_found(self, mock_thread, mock_run_timeout):
po.cmd_checker("glxgears", "0000:00:00.0", "nvidia", 1)
# check check_offload function get correct args
mock_thread.assert_called_with(
target=po.check_offload, args=("glxgears", "0", "NV", 1)
target=po.check_offload, args=(["glxgears"], "0", "NV", 1)
)


Expand Down
Loading