Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CBG-4322 use unique output files #7352

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
extend-include = ["tools/sgcollect_info"]

[tool.ruff.lint]
extend-select = ["I"]
extend-select = ["I","B006"] # isort, mutable default argument
51 changes: 51 additions & 0 deletions tools-tests/sgcollect_info_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,54 @@ def test_make_collect_logs_heap_profile(tmpdir):
assert [tasks[0].log_file] == [pprof_file.basename]
# ensure that this is not redacted task
assert tasks[0].description.startswith("Contents of")


@pytest.mark.parametrize("should_redact", [True, False])
def test_make_collect_logs_tasks_duplicate_files(should_redact, tmp_path):
tmpdir1 = tmp_path / "tmpdir1"
tmpdir2 = tmp_path / "tmpdir2"
config = """
{{"logfilepath": "{tmpdir1}",
"logging": {{ "log_file_path": "{tmpdir2}" }}
}}
"""
for d in [tmpdir1, tmpdir2]:
d.mkdir()
(d / "sg_info.log").write_text("foo")
(d / "sg_info-01.log.gz").write_text("foo")

with unittest.mock.patch(
"sgcollect_info.urlopen_with_basic_auth",
return_value=io.BytesIO(
config.format(
tmpdir1=str(tmpdir1).replace("\\", "\\\\"),
tmpdir2=str(tmpdir2).replace("\\", "\\\\"),
).encode("utf-8")
),
):
tasks = sgcollect_info.make_collect_logs_tasks(
sg_url="fakeurl",
sg_config_file_path="",
sg_username="",
sg_password="",
)
# assert all tasks have unique log_file names
assert len(set(t.log_file for t in tasks)) == len(tasks)
assert set(t.log_file for t in tasks) == {
"sg_info.log",
"sg_info.log.1",
"sg_info-01.log.gz",
"sg_info-01.log.gz.1",
}


@pytest.mark.parametrize(
"basenames, filename, expected",
[
({}, "foo", "foo"),
({"foo"}, "foo", "foo.1"),
({"foo", "foo.1"}, "foo", "foo.2"),
],
)
def test_get_unique_filename(basenames, filename, expected):
assert sgcollect_info.get_unique_filename(basenames, filename) == expected
12 changes: 7 additions & 5 deletions tools-tests/tasks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,23 +47,25 @@


@pytest.mark.parametrize("tag_userdata", [True, False])
def test_add_file_task(tmpdir, tag_userdata):
def test_add_file_task(tmp_path, tag_userdata):
if tag_userdata:
expected = REDACTED_OUTPUT
else:
expected = REDACTED_OUTPUT.replace("<ud>foo</ud>", "foo")

filename = "config.json"
config_file = tmpdir.join(filename)
config_file.write(INPUT_CONFIG)
config_file = tmp_path / filename
config_file.write_text(INPUT_CONFIG)
postprocessors = [password_remover.remove_passwords]
if tag_userdata:
postprocessors.append(password_remover.tag_userdata_in_server_config)
task = tasks.add_file_task(
config_file.strpath,
sourcefile_path=config_file,
output_path=config_file.name,
content_postprocessors=postprocessors,
)
output_dir = tmpdir.mkdir("output")
output_dir = tmp_path / "output"
output_dir.mkdir()
runner = tasks.TaskRunner(
verbosity=VERBOSE,
default_name="sg.log",
Expand Down
18 changes: 17 additions & 1 deletion tools/sgcollect_info
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,18 @@ def urlopen_with_basic_auth(url, username, password):
return urllib.request.urlopen(url)


def get_unique_filename(filenames: set[str], original_filename: str) -> str:
"""
Given a set of filenames, return a unique filename that is not in the set.
"""
i = 1
filename = original_filename
while filename in filenames:
filename = f"{original_filename}.{i}"
i += 1
return filename


def make_collect_logs_tasks(
sg_url: str,
sg_config_file_path: Optional[str],
Expand Down Expand Up @@ -391,6 +403,7 @@ def make_collect_logs_tasks(

# Keep a dictionary of log file paths we've added, to avoid adding duplicates
sg_log_file_paths: set[pathlib.Path] = set()
output_basenames: set[str] = set()

sg_tasks: List[PythonTask] = []

Expand All @@ -399,8 +412,10 @@ def make_collect_logs_tasks(
canonical_filename.resolve()
if canonical_filename in sg_log_file_paths:
return
output_basename = get_unique_filename(output_basenames, canonical_filename.name)
sg_log_file_paths.add(canonical_filename)
task = add_file_task(sourcefile_path=filename)
output_basenames.add(output_basename)
task = add_file_task(sourcefile_path=filename, output_path=output_basename)
task.no_header = True
sg_tasks.append(task)

Expand Down Expand Up @@ -538,6 +553,7 @@ def make_config_tasks(
for sg_config_file in sg_config_files:
task = add_file_task(
sourcefile_path=sg_config_file,
output_path=os.path.basename(sg_config_file),
content_postprocessors=server_config_postprocessors,
)
collect_config_tasks.append(task)
Expand Down
15 changes: 9 additions & 6 deletions tools/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def make_curl_task(
url,
user="",
password="",
content_postprocessors=[],
content_postprocessors: Optional[List[Callable]] = None,
timeout=60,
log_file="python_curl.log",
**kwargs,
Expand Down Expand Up @@ -496,8 +496,9 @@ def python_curl_task():
print("WARNING: Error connecting to url {0}: {1}".format(url, e))

response_string = response_file_handle.read()
for content_postprocessor in content_postprocessors:
response_string = content_postprocessor(response_string)
if content_postprocessors:
for content_postprocessor in content_postprocessors:
response_string = content_postprocessor(response_string)
return response_string

return PythonTask(
Expand All @@ -506,8 +507,10 @@ def python_curl_task():


def add_file_task(
sourcefile_path, content_postprocessors: Optional[List[Callable]] = None
) -> PythonTask:
sourcefile_path: Union[pathlib.Path, str],
output_path: Union[pathlib.Path, str],
content_postprocessors: Optional[List[Callable]] = None,
):
"""
Adds the contents of a file to the output zip

Expand All @@ -525,7 +528,7 @@ def python_add_file_task():
task = PythonTask(
description="Contents of {0}".format(sourcefile_path),
callable=python_add_file_task,
log_file=os.path.basename(sourcefile_path),
log_file=output_path,
log_exception=False,
)

Expand Down
Loading