Skip to content

Commit

Permalink
Using qiita.allocation_equations table in util.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Gossty committed Jan 9, 2025
1 parent dd12143 commit 1c30fc8
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 55 deletions.
9 changes: 9 additions & 0 deletions qiita_db/support_files/patches/test_db_sql/93.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
INSERT INTO qiita.allocation_equations(equation_name, expression)
VALUES ('mem_model1', 'k * np.log(x) + x * a + b'),
('mem_model2', 'k * np.log(x) + b * np.log(x)**2 + a'),
('mem_model3', 'k * np.log(x) + b * np.log(x)**2 + a * np.log(x)**3'),
('mem_model4', 'k * np.log(x) + b * np.log(x)**2 + a * np.log(x)**2.5'),
('time_model1', 'a + b + np.log(x) * k'),
('time_model2', 'a + b * x + np.log(x) * k'),
('time_model3', 'a + b * np.log(x)**2 + np.log(x) * k'),
('time_model4', 'a * np.log(x)**3 + b * np.log(x)**2 + np.log(x) * k');
29 changes: 16 additions & 13 deletions qiita_db/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1343,42 +1343,45 @@ def test_minimize_const(self):
self.df[self.col_name] = self.df.samples * self.df['columns']
fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False)

bm, options = qdb.util._resource_allocation_plot_helper(
self.df, axs[0], 'MaxRSSRaw', qdb.util.MODELS_MEM, self.col_name)
mem_models, time_models = qdb.util._retrieve_equations()
bm_name, bm, options = qdb.util._resource_allocation_plot_helper(
self.df, axs[0], 'MaxRSSRaw', mem_models, self.col_name)
# check that the algorithm chooses correct model for MaxRSSRaw and
# has 0 failures
k, a, b = options.x
failures_df = qdb.util._resource_allocation_success_failures(
self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')[-1]
failures = failures_df.shape[0]
self.assertEqual(bm, qdb.util.mem_model3,

self.assertEqual(bm_name, 'mem_model3',
msg=f"""Best memory model
doesn't match
{bm_name} != 'mem_model3'""")
self.assertEqual(bm, mem_models['mem_model3'],
msg=f"""Best memory model
doesn't match
Coefficients:{k} {a} {b}
{qdb.util.mem_model1}, "qdb.util.mem_model1"
{qdb.util.mem_model2}, "qdb.util.mem_model2"
{qdb.util.mem_model3}, "qdb.util.mem_model3"
{qdb.util.mem_model4}, "qdb.util.mem_model4"
""")
self.assertEqual(failures, 0, "Number of failures must be 0")

# check that the algorithm chooses correct model for ElapsedRaw and
# has 1 failure
bm, options = qdb.util._resource_allocation_plot_helper(
self.df, axs[1], 'ElapsedRaw', qdb.util.MODELS_TIME, self.col_name)
bm_name, bm, options = qdb.util._resource_allocation_plot_helper(
self.df, axs[1], 'ElapsedRaw', time_models, self.col_name)
k, a, b = options.x
failures_df = qdb.util._resource_allocation_success_failures(
self.df, k, a, b, bm, self.col_name, 'ElapsedRaw')[-1]
failures = failures_df.shape[0]

self.assertEqual(bm_name, 'time_model1',
msg=f"""Best time model
doesn't match
{bm_name} != 'time_model1'""")

self.assertEqual(bm, qdb.util.time_model1,
msg=f"""Best time model
doesn't match
Coefficients:{k} {a} {b}
{qdb.util.time_model1}, "qdb.util.time_model1"
{qdb.util.time_model2}, "qdb.util.time_model2"
{qdb.util.time_model3}, "qdb.util.time_model3"
{qdb.util.time_model4}, "qdb.util.time_model4"
""")
self.assertEqual(failures, 1, "Number of failures must be 1")

Expand Down
116 changes: 74 additions & 42 deletions qiita_db/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,41 +81,23 @@
from json import loads
from scipy.optimize import minimize

# memory constant functions defined for @resource_allocation_plot
mem_model1 = (lambda x, k, a, b: k * np.log(x) + x * a + b)
mem_model2 = (lambda x, k, a, b: k * np.log(x) + b * np.log(x)**2 + a)
mem_model3 = (lambda x, k, a, b: k * np.log(x) + b * np.log(x)**2 +
a * np.log(x)**3)
mem_model4 = (lambda x, k, a, b: k * np.log(x) + b * np.log(x)**2 +
a * np.log(x)**2.5)
MODELS_MEM = [mem_model1, mem_model2, mem_model3, mem_model4]

# time constant functions defined for @resource_allocation_plot
time_model1 = (lambda x, k, a, b: a + b + np.log(x) * k)
time_model2 = (lambda x, k, a, b: a + b * x + np.log(x) * k)
time_model3 = (lambda x, k, a, b: a + b * np.log(x)**2 + np.log(x) * k)
time_model4 = (lambda x, k, a, b: a * np.log(x)**3 + b * np.log(x)**2 +
np.log(x) * k)

MODELS_TIME = [time_model1, time_model2, time_model3, time_model4]


def get_model_name(model):
if model == mem_model1:
if model == 'mem_model1':
return "k * log(x) + x * a + b"
elif model == mem_model2:
elif model == 'mem_model2':
return "k * log(x) + b * log(x)^2 + a"
elif model == mem_model3:
elif model == 'mem_model3':
return "k * log(x) + b * log(x)^2 + a * log(x)^3"
elif model == mem_model4:
elif model == 'mem_model4':
return "k * log(x) + b * log(x)^2 + a * log(x)^2.5"
elif model == time_model1:
elif model == 'time_model1':
return "a + b + log(x) * k"
elif model == time_model2:
elif model == 'time_model2':
return "a + b * x + log(x) * k"
elif model == time_model3:
elif model == 'time_model3':
return "a + b * log(x)^2 + log(x) * k"
elif model == time_model4:
elif model == 'time_model4':
return "a * log(x)^3 + b * log(x)^2 + log(x) * k"
else:
return "Unknown model"
Expand Down Expand Up @@ -2387,19 +2369,63 @@ def resource_allocation_plot(df, col_name):
fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False)

ax = axs[0]
mem_models, time_models = _retrieve_equations()

# models for memory
_resource_allocation_plot_helper(
df, ax, "MaxRSSRaw", MODELS_MEM, col_name)

df, ax, "MaxRSSRaw", mem_models, col_name)
ax = axs[1]
# models for time
_resource_allocation_plot_helper(
df, ax, "ElapsedRaw", MODELS_TIME, col_name)
df, ax, "ElapsedRaw", time_models, col_name)

return fig, axs


def _retrieve_equations():
'''
Helepr function for resource_allocation_plot.
Retrieves equations from db. Creates dictionary for memory and time models.
Returns
-------
tuple
dict
memory models - potential memory models for resource allocations
dict
time models - potential time models for resource allocations
'''
memory_models = {}
time_models = {}
with qdb.sql_connection.TRN:
sql = ''' SELECT * FROM qiita.allocation_equations; '''
qdb.sql_connection.TRN.add(sql)
res = qdb.sql_connection.TRN.execute_fetchindex()
for models in res:
if 'mem' in models[1]:
memory_models[models[1]] = lambda x, k, a, b: eval(models[2])
else:
time_models[models[2]] = lambda x, k, a, b: eval(models[2])
return (memory_models, time_models)


def retrieve_resource_data(cname, sname, version, columns):
'''
Retrieves resource data from db and constructs a DataFrame with relevant
fields.
Parameters
----------
cname - command name for which we retrieve the resources
sname - software name for which we retrieve the resources
version - version of sftware for whhich we retrieve the resources
columns - column names for the DataFrame returned by this function
Returns
-------
pd.DataFrame
DataFrame with resources.
'''
with qdb.sql_connection.TRN:
sql = """
SELECT
Expand Down Expand Up @@ -2457,8 +2483,8 @@ def _resource_allocation_plot_helper(
Specifies x axis for the graph
curr: str, required
Either MaxRSSRaw or ElapsedRaw (y axis)
models: list, required
List of functions that will be used for visualization
models: dictionary, required
Dictionary of functions that will be used for visualization
"""

Expand Down Expand Up @@ -2494,7 +2520,7 @@ def _resource_allocation_plot_helper(
ax.set_xlabel(col_name)

# 50 - number of maximum iterations, 3 - number of failures we tolerate
best_model, options = _resource_allocation_calculate(
best_model_name, best_model, options = _resource_allocation_calculate(
df, x_data, y_data, models, curr, col_name, 50, 3)
k, a, b = options.x
x_plot = np.array(sorted(df[col_name].unique()))
Expand Down Expand Up @@ -2522,22 +2548,24 @@ def _resource_allocation_plot_helper(
label="failures")
success_df['node_name'] = success_df['node_name'].fillna('unknown')
slurm_hosts = set(success_df['node_name'].tolist())
cmap = colormaps.get_cmap('Accent').resampled(len(slurm_hosts))
colors = [cmap(
i / (len(slurm_hosts) - 1)) for i in range(len(slurm_hosts))]
cmap = colormaps.get_cmap('Accent')
if len(slurm_hosts) > len(cmap.colors):
raise ValueError(f"""'Accent' colormap only has {len(cmap.colors)}
colors, but {len(slurm_hosts)} hosts are provided.""")
colors = cmap.colors[:len(slurm_hosts)]

for i, host in enumerate(slurm_hosts):
host_df = success_df[success_df['node_name'] == host]
ax.scatter(host_df[col_name], host_df[curr], color=colors[i], s=3,
label=host)
ax.set_title(
f'k||a||b: {k}||{a}||{b}\n'
f'model: {get_model_name(best_model)}\n'
f'model: {get_model_name(best_model_name)}\n'
f'real: {mini} || {maxi}\n'
f'calculated: {cmin} || {cmax}\n'
f'failures: {failures}')
ax.legend(loc='upper left')
return best_model, options
return best_model_name, best_model, options


def _resource_allocation_calculate(
Expand All @@ -2555,27 +2583,30 @@ def _resource_allocation_calculate(
current type (e.g. MaxRSSRaw)
col_name: str, required
Specifies x axis for the graph
models: list, required
List of functions that will be used for visualization
models: dictionary, required
Dictionary of functions that will be used for visualization
depth: int, required
Maximum number of iterations in binary search
tolerance: int, required,
Tolerance to number of failures possible to be considered as a model
Returns
----------
best_model_name: string
the name of the best model from the table
best_model: function
best fitting function for the current list models
best fitting function for the current dictionary models
best_result: object
object containing constants for the best model (e.g. k, a, b in kx+b*a)
"""

init = [1, 1, 1]
best_model_name = None
best_model = None
best_result = None
best_failures = np.inf
best_max = np.inf
for model in models:
for model_name, model in models.items():
# start values for binary search, where sl is left, sr is right
# penalty weight must be positive & non-zero, hence, sl >= 1.
# the upper bound for error can be an arbitrary large number
Expand Down Expand Up @@ -2646,9 +2677,10 @@ def _resource_allocation_calculate(
if min_max <= best_max:
best_failures = prev_failures
best_max = min_max
best_model_name = model_name
best_model = model
best_result = res
return best_model, best_result
return best_model_name, best_model, best_result


def _resource_allocation_custom_loss(params, x, y, model, p):
Expand Down

0 comments on commit 1c30fc8

Please sign in to comment.