-
Notifications
You must be signed in to change notification settings - Fork 81
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Resource allocation colormap #3453
Changes from 12 commits
88dc031
447e01a
442e735
43aa8bf
3afba3a
a36dc8a
071eb5f
2ecfa1e
d5996f1
dd12143
1c30fc8
82c95e1
2cb3f37
2ea03e2
60ec9fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import pandas as pd | ||
|
||
# Example data loading | ||
filename = './data/jobs_2024-02-21.tsv.gz' | ||
df = pd.read_csv(filename, sep='\t', dtype={'extra_info': str}) | ||
|
||
# Convert string to timedelta, then to total seconds | ||
df['ElapsedRawTime'] = pd.to_timedelta( | ||
df['ElapsedRawTime']).apply( | ||
lambda x: x.total_seconds()) | ||
|
||
cname = "Validate" | ||
sname = "Diversity types - alpha_vector" | ||
df = df[(df.cName == cname) & (df.sName == sname)] | ||
|
||
df['samples'] = df['samples'].fillna(0).astype(int) | ||
df['columns'] = df['columns'].fillna(0).astype(int) | ||
df['input_size'] = df['input_size'].fillna(0).astype(int) | ||
df['MaxRSSRaw'] = df['MaxRSSRaw'].fillna(0).astype(int) | ||
df['ElapsedRawTime'] = df['ElapsedRawTime'].fillna(0).astype(int) | ||
|
||
COL_NAME = 'samples * columns' | ||
df[COL_NAME] = df['samples'] * df['columns'] | ||
columns = ["MaxRSSRaw", "ElapsedRawTime"] | ||
max_rows = [] | ||
|
||
for curr in columns: | ||
# Get the maximum value for 'curr' within each COL_NAME group | ||
max_values = df.groupby(COL_NAME)[curr].transform(max) | ||
# Filter rows where the current column's value | ||
# is the maximum within its group | ||
curr_rows = df[df[curr] == max_values] | ||
max_rows.append(curr_rows) | ||
|
||
filtered_df = pd.concat(max_rows).drop_duplicates().reset_index(drop=True) | ||
|
||
# INSERT INTO qiita.processing_job(processing_job_id, email, command_id, | ||
# command_parameters, processing_job_status_id) | ||
# VALUES('ca27ddbc-a678-4b09-8a1d-b65f52f8eb49', | ||
# '[email protected]', 1, '""'::json, 1); | ||
|
||
# INSERT INTO qiita.slurm_resource_allocations(processing_job_id, samples, | ||
# columns, input_size, extra_info, memory_used, walltime_used) | ||
# VALUES('ca27ddbc-a678-4b09-8a1d-b65f52f8eb49', 39, 81, 2, 'nan', | ||
# 327036000, 91); | ||
|
||
# processing_job_id uuid NOT NULL, | ||
# samples integer, | ||
# columns integer, | ||
# input_size bigint, | ||
# extra_info varchar DEFAULT NULL, | ||
# memory_used bigint, | ||
# walltime_used integer, | ||
|
||
res = "" | ||
|
||
for index, row in filtered_df.iterrows(): | ||
res += f"""('{row['QiitaID']}', '[email protected]', 1, '""'::json, 1),\n""" | ||
res += ";\n" | ||
res += "Split\n" | ||
for index, row in filtered_df.iterrows(): | ||
res += ( | ||
f"('{row['QiitaID']}', {int(row['samples'])}, " | ||
f"{int(row['columns'])}, {int(row['input_size'])}, " | ||
f"'{row['extra_info']}', {int(row['MaxRSSRaw'])}, " | ||
f"{int(row['ElapsedRawTime'])}),\n" | ||
) | ||
|
||
res += ";\n" | ||
|
||
with open("sql.txt", 'w') as filename: | ||
filename.write(res) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,3 +62,11 @@ CREATE INDEX IF NOT EXISTS processing_job_command_parameters_payload ON qiita.pr | |
-- Addding contraints for the slurm_reservation column | ||
ALTER TABLE qiita.analysis DROP CONSTRAINT IF EXISTS analysis_slurm_reservation_valid_chars; | ||
ALTER TABLE qiita.analysis ADD CONSTRAINT analysis_slurm_reservation_valid_chars CHECK ( slurm_reservation ~ '^[a-zA-Z0-9_]*$' ); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that we did a release this morning so these lines need to be moved to 94.sql. |
||
-- Jan 7, 2025 | ||
-- Adding a table for formulas for resource allocations | ||
CREATE TABLE qiita.allocation_equations ( | ||
equation_id SERIAL PRIMARY KEY, | ||
equation_name TEXT NOT NULL, | ||
expression TEXT NOT NULL | ||
); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
INSERT INTO qiita.allocation_equations(equation_name, expression) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as the other sql: these lines need to be moved to 94.sql. |
||
VALUES | ||
('mem_model1', '(k * (np.log(x))) + (x * a) + b'), | ||
('mem_model2', '(k * (np.log(x))) + (b * ((np.log(x))**2)) + a'), | ||
('mem_model3', '(k * (np.log(x))) + (b * ((np.log(x))**2)) + (a * ((np.log(x))**3))'), | ||
('mem_model4', '(k * (np.log(x))) + (b * ((np.log(x))**2)) + (a * ((np.log(x))**2.5))'), | ||
('time_model1', 'a + b + ((np.log(x)) * k)'), | ||
('time_model2', 'a + (b * x) + ((np.log(x)) * k)'), | ||
('time_model3', 'a + (b * ((np.log(x))**2)) + ((np.log(x)) * k)'), | ||
('time_model4', '(a * ((np.log(x))**3)) + (b * ((np.log(x))**2)) + ((np.log(x)) * k)'); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1327,8 +1327,7 @@ def setUp(self): | |
|
||
def test_plot_return(self): | ||
# check the plot returns correct objects | ||
fig1, axs1 = qdb.util.resource_allocation_plot( | ||
self.df, self.cname, self.sname, self.col_name) | ||
fig1, axs1 = qdb.util.resource_allocation_plot(self.df, self.col_name) | ||
self.assertIsInstance( | ||
fig1, Figure, | ||
"Returned object fig1 is not a Matplotlib Figure") | ||
|
@@ -1344,46 +1343,46 @@ def test_minimize_const(self): | |
self.df[self.col_name] = self.df.samples * self.df['columns'] | ||
fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False) | ||
|
||
bm, options = qdb.util._resource_allocation_plot_helper( | ||
self.df, axs[0], self.cname, self.sname, 'MaxRSSRaw', | ||
qdb.util.MODELS_MEM, self.col_name) | ||
mem_models, time_models = qdb.util._retrieve_equations() | ||
bm_name, bm, options = qdb.util._resource_allocation_plot_helper( | ||
self.df, axs[0], 'MaxRSSRaw', mem_models, self.col_name) | ||
# check that the algorithm chooses correct model for MaxRSSRaw and | ||
# has 0 failures | ||
k, a, b = options.x | ||
failures_df = qdb.util._resource_allocation_failures( | ||
self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw') | ||
failures_df = qdb.util._resource_allocation_success_failures( | ||
self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')[-1] | ||
failures = failures_df.shape[0] | ||
self.assertEqual(bm, qdb.util.mem_model3, | ||
|
||
self.assertEqual(bm_name, 'mem_model4', | ||
msg=f"""Best memory model | ||
doesn't match | ||
{bm_name} != 'mem_model4'""") | ||
self.assertEqual(bm, mem_models['mem_model4'], | ||
msg=f"""Best memory model | ||
doesn't match | ||
Coefficients:{k} {a} {b} | ||
{qdb.util.mem_model1}, "qdb.util.mem_model1" | ||
{qdb.util.mem_model2}, "qdb.util.mem_model2" | ||
{qdb.util.mem_model3}, "qdb.util.mem_model3" | ||
{qdb.util.mem_model4}, "qdb.util.mem_model4" | ||
""") | ||
self.assertEqual(failures, 0, "Number of failures must be 0") | ||
|
||
# check that the algorithm chooses correct model for ElapsedRaw and | ||
# has 1 failure | ||
bm, options = qdb.util._resource_allocation_plot_helper( | ||
self.df, axs[1], self.cname, self.sname, 'ElapsedRaw', | ||
qdb.util.MODELS_TIME, self.col_name) | ||
bm_name, bm, options = qdb.util._resource_allocation_plot_helper( | ||
self.df, axs[1], 'ElapsedRaw', time_models, self.col_name) | ||
k, a, b = options.x | ||
failures_df = qdb.util._resource_allocation_failures( | ||
self.df, k, a, b, bm, self.col_name, 'ElapsedRaw') | ||
failures_df = qdb.util._resource_allocation_success_failures( | ||
self.df, k, a, b, bm, self.col_name, 'ElapsedRaw')[-1] | ||
failures = failures_df.shape[0] | ||
self.assertEqual(bm_name, 'time_model4', | ||
msg=f"""Best time model | ||
doesn't match | ||
{bm_name} != 'time_model4'""") | ||
|
||
self.assertEqual(bm, qdb.util.time_model1, | ||
self.assertEqual(bm, time_models[bm_name], | ||
msg=f"""Best time model | ||
doesn't match | ||
Coefficients:{k} {a} {b} | ||
{qdb.util.time_model1}, "qdb.util.time_model1" | ||
{qdb.util.time_model2}, "qdb.util.time_model2" | ||
{qdb.util.time_model3}, "qdb.util.time_model3" | ||
{qdb.util.time_model4}, "qdb.util.time_model4" | ||
""") | ||
self.assertEqual(failures, 1, "Number of failures must be 1") | ||
self.assertEqual(failures, 0, "Number of failures must be 0") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just wondering why this changed from 1 to 0? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now the code chooses 4th model instead of 1st, which has 0 failures. |
||
|
||
def test_MaxRSS_helper(self): | ||
tests = [ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need this file? Can it be deleted?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can delete this file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, thank you; then please rm.