Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added column_name variability to resource_allocation #3457

Open
wants to merge 25 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
88dc031
Update to DB qiita.slurm_resource_allocations
Gossty Apr 30, 2024
447e01a
Merge branch 'dev' of https://github.com/qiita-spots/qiita into dev
Gossty May 7, 2024
442e735
Merge branch 'dev' of https://github.com/qiita-spots/qiita into dev
Gossty Jun 5, 2024
43aa8bf
Merge branch 'dev' of https://github.com/qiita-spots/qiita into dev
Gossty Jun 26, 2024
3afba3a
Merge branch 'dev' of https://github.com/qiita-spots/qiita into dev
Gossty Jul 2, 2024
a36dc8a
Merge branch 'dev' of https://github.com/qiita-spots/qiita into dev
Gossty Jul 9, 2024
071eb5f
Merge branch 'dev' of https://github.com/qiita-spots/qiita into dev
Gossty Jul 12, 2024
2ecfa1e
Merge branch 'dev' of https://github.com/qiita-spots/qiita into dev
Gossty Dec 16, 2024
d5996f1
added colormap, create equation table
Gossty Jan 7, 2025
dd12143
Fixed styling
Gossty Jan 7, 2025
1c30fc8
Using qiita.allocation_equations table in util.py
Gossty Jan 9, 2025
82c95e1
Debug
Gossty Jan 9, 2025
2cb3f37
Updates to @antgonza comments
Gossty Jan 12, 2025
2ea03e2
Changes to @antgonza comments
Gossty Jan 14, 2025
60ec9fd
Back to np.log
Gossty Jan 14, 2025
49b4a79
Added column_name variability
Gossty Feb 11, 2025
fbe01a2
Fix styling
Gossty Feb 11, 2025
7da3659
Merge branch 'dev' into resource-allocation-colormap
Gossty Feb 11, 2025
9717adb
Update description of software retrieval function
Gossty Feb 11, 2025
8d27f89
Remove the unnecessary test_db 94.sql file
Gossty Feb 11, 2025
7fb202a
Update test_user.py
Gossty Feb 11, 2025
c9ce88b
Updates to @antgonza comments
Gossty Feb 12, 2025
43e1463
Update meta_util.py
Gossty Feb 12, 2025
4032ed5
Update util.py
Gossty Feb 12, 2025
8e148b6
Time limit for resource_allocation_plot
Gossty Feb 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
245 changes: 168 additions & 77 deletions qiita_db/meta_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
from hashlib import md5
from re import sub
from json import loads, dump, dumps
import signal
import traceback

from qiita_db.util import create_nested_path, retrieve_resource_data
from qiita_db.util import resource_allocation_plot
Expand Down Expand Up @@ -555,96 +557,185 @@ def generate_plugin_releases():
f(redis_key, v)


def get_software_commands(active):
software_list = [s for s in qdb.software.Software.iter(active=active)]
software_commands = defaultdict(lambda: defaultdict(list))

for software in software_list:
sname = software.name
sversion = software.version
commands = software.commands

for command in commands:
software_commands[sname][sversion].append(command.name)
software_commands[sname] = dict(software_commands[sname])

return dict(software_commands)


def update_resource_allocation_redis(active=True):
def update_resource_allocation_redis(active=True, verbose=False,
time_limit=300):
"""Updates redis with plots and information about current software.

Parameters
----------
active: boolean, optional
Defaults to True. Should only be False when testing.

verbose: boolean, optional
Defaults to False. Prints status on what function is running.

time_limit: integer, optional
Defaults to 300, representing 5 minutes. This is the limit for how long
resource_allocation_plot function will run.

"""
time = datetime.now().strftime('%m-%d-%y')
scommands = get_software_commands(active)
redis_key = 'resources:commands'
r_client.set(redis_key, str(scommands))

# Retreave available col_name for commands
with qdb.sql_connection.TRN:
sql = 'SELECT col_name FROM qiita.resource_allocation_column_names;'
qdb.sql_connection.TRN.add(sql)
col_names = qdb.sql_connection.TRN.execute_fetchflatten()

# Retreave available software
software_list = list(qdb.software.Software.iter(active=active))
scommands = {}
for software in software_list:
sname = software.name
sversion = software.version

if sname not in scommands:
scommands[sname] = {}

if sversion not in scommands[sname]:
scommands[sname][sversion] = {}

for command in software.commands:
cmd_name = command.name
scommands[sname][sversion][cmd_name] = col_names

# software commands for which resource allocations were sucessfully
# calculated
scommands_allocation = {}
for sname, versions in scommands.items():
for version, commands in versions.items():
for cname in commands:
col_name = "samples * columns"
for cname, col_names in commands.items():
df = retrieve_resource_data(cname, sname, version, COLUMNS)
if verbose:
print(("\nRetrieving allocation resources for:\n" +
f" software: {sname}\n" +
f" version: {version}\n" +
f" command: {cname}"))
if len(df) == 0:
if verbose:
print(("\nNo allocation resources available for" +
f" software: {sname}" +
f" version: {version}" +
f" command: {cname}\n"))
continue
# column_name_str looks like col1*col2*col3, etc
for col_name in col_names:
new_column = None
col_name_split = col_name.split('*')
df_copy = df.dropna(subset=col_name_split)

# Create a column with the desired columns
for curr_column in col_name_split:
if new_column is None:
new_column = df_copy[curr_column]
else:
new_column *= df_copy[curr_column]
if verbose:
print(
("\nBuilding resource allocation plot for:\n" +
f" software: {sname}\n" +
f" version: {version}\n" +
f" command: {cname}\n" +
f" column name: {col_name}\n" +
f" {datetime.now().strftime('%b %d %H:%M:%S')}"))

def timeout_handler(signum, frame):
raise TimeoutError((
"\nresource_allocation_plot " +
"execution exceeded time limit." +
"For:\n"
f" software: {sname}\n" +
f" version: {version}\n" +
f" command: {cname}\n" +
f" column name: {col_name}\n" +
f" {datetime.now().strftime('%b %d %H:%M:%S')}"))

signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(time_limit)
try:
fig, axs = resource_allocation_plot(df_copy,
col_name,
new_column,
verbose=verbose)
signal.alarm(0)
except TimeoutError:
print("Timeout reached!")
traceback.print_exc()
continue

titles = [0, 0]
images = [0, 0]

# Splitting 1 image plot into 2 separate for better layout.
for i, ax in enumerate(axs):
titles[i] = ax.get_title()
ax.set_title("")
# new_fig, new_ax – copy with either only memory plot
# or only time
new_fig = plt.figure()
new_ax = new_fig.add_subplot(111)
line = ax.lines[0]
new_ax.plot(line.get_xdata(), line.get_ydata(),
linewidth=1, color='orange')
handles, labels = ax.get_legend_handles_labels()
for handle, label, scatter_data in zip(
handles,
labels,
ax.collections):
color = handle.get_facecolor()
new_ax.scatter(scatter_data.get_offsets()[:, 0],
scatter_data.get_offsets()[:, 1],
s=scatter_data.get_sizes(),
label=label,
color=color)

new_ax.set_xscale('log')
new_ax.set_yscale('log')
new_ax.set_xlabel(ax.get_xlabel())
new_ax.set_ylabel(ax.get_ylabel())
new_ax.legend(loc='upper left')

new_fig.tight_layout()
plot = BytesIO()
new_fig.savefig(plot, format='png')
plot.seek(0)
img = 'data:image/png;base64,' + quote(
b64encode(plot.getvalue()).decode('ascii'))
images[i] = img
plt.close(new_fig)
plt.close(fig)

# SID, CID, col_name
values = [
("img_mem", images[0], r_client.set),
("img_time", images[1], r_client.set),
('time', time, r_client.set),
("title_mem", titles[0], r_client.set),
("title_time", titles[1], r_client.set)
]
if verbose:
print(
("Saving resource allocation image for\n" +
f" software: {sname}\n" +
f" version: {version}\n" +
f" command: {cname}\n" +
f" column name: {col_name}\n" +
f" {datetime.now().strftime('%b %d %H:%M:%S')}"))

for k, v, f in values:
redis_key = 'resources$#%s$#%s$#%s$#%s:%s' % (
cname, sname, version, col_name, k)
r_client.delete(redis_key)
f(redis_key, v)

if sname not in scommands_allocation:
scommands_allocation[sname] = {}
if version not in scommands_allocation[sname]:
scommands_allocation[sname][version] = {}
if cname not in scommands_allocation[sname][version]:
scommands_allocation[sname][version][cname] = []
scommands_allocation[sname][version][cname].append(
col_name)

fig, axs = resource_allocation_plot(df, col_name)
titles = [0, 0]
images = [0, 0]

# Splitting 1 image plot into 2 separate for better layout.
for i, ax in enumerate(axs):
titles[i] = ax.get_title()
ax.set_title("")
# new_fig, new_ax – copy with either only memory plot or
# only time
new_fig = plt.figure()
new_ax = new_fig.add_subplot(111)
line = ax.lines[0]
new_ax.plot(line.get_xdata(), line.get_ydata(),
linewidth=1, color='orange')
handles, labels = ax.get_legend_handles_labels()
for handle, label, scatter_data in zip(handles,
labels,
ax.collections):
color = handle.get_facecolor()
new_ax.scatter(scatter_data.get_offsets()[:, 0],
scatter_data.get_offsets()[:, 1],
s=scatter_data.get_sizes(), label=label,
color=color)

new_ax.set_xscale('log')
new_ax.set_yscale('log')
new_ax.set_xlabel(ax.get_xlabel())
new_ax.set_ylabel(ax.get_ylabel())
new_ax.legend(loc='upper left')

new_fig.tight_layout()
plot = BytesIO()
new_fig.savefig(plot, format='png')
plot.seek(0)
img = 'data:image/png;base64,' + quote(
b64encode(plot.getvalue()).decode('ascii'))
images[i] = img
plt.close(new_fig)
plt.close(fig)

# SID, CID, col_name
values = [
("img_mem", images[0], r_client.set),
("img_time", images[1], r_client.set),
('time', time, r_client.set),
("title_mem", titles[0], r_client.set),
("title_time", titles[1], r_client.set)
]

for k, v, f in values:
redis_key = 'resources$#%s$#%s$#%s$#%s:%s' % (
cname, sname, version, col_name, k)
r_client.delete(redis_key)
f(redis_key, v)
redis_key = 'resources:commands'
r_client.set(redis_key, str(scommands_allocation))
30 changes: 25 additions & 5 deletions qiita_db/support_files/patches/94.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,27 @@
-- Jan 13, 2025
-- Adding a table for formulas for resource allocations
CREATE TABLE qiita.allocation_equations (
equation_id SERIAL PRIMARY KEY,
equation_name TEXT NOT NULL,
expression TEXT NOT NULL
);
CREATE TABLE qiita.resource_allocation_equations (
equation_id SERIAL PRIMARY KEY,
equation_name TEXT NOT NULL,
expression TEXT NOT NULL
);

INSERT INTO qiita.resource_allocation_equations(equation_name, expression) VALUES
('mem_model1', '(k * (np.log(x))) + (x * a) + b'),
('mem_model2', '(k * (np.log(x))) + (b * ((np.log(x))**2)) + a'),
('mem_model3', '(k * (np.log(x))) + (b * ((np.log(x))**2)) + (a * ((np.np.log(x))**3))'),
('mem_model4', '(k * (np.log(x))) + (b * ((np.log(x))**2)) + (a * ((np.log(x))**2.5))'),
('time_model1', 'a + b + ((np.log(x)) * k)'),
('time_model2', 'a + (b * x) + ((np.log(x)) * k)'),
('time_model3', 'a + (b * ((np.log(x))**2)) + ((np.log(x)) * k)'),
('time_model4', '(a * ((np.log(x))**3)) + (b * ((np.log(x))**2)) + ((np.log(x)) * k)');

CREATE TABLE qiita.resource_allocation_column_names (
col_name_id SERIAL PRIMARY KEY,
col_name TEXT NOT NULL
);

INSERT INTO qiita.resource_allocation_column_names(col_name) VALUES
('samples'), ('columns'), ('input_size'),
('samples*columns'), ('samples*input_size'),
('columns*input_size'), ('samples*columns*input_size');
10 changes: 0 additions & 10 deletions qiita_db/support_files/patches/test_db_sql/94.sql

This file was deleted.

2 changes: 1 addition & 1 deletion qiita_db/test/test_meta_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ def test_generate_plugin_releases(self):
def test_update_resource_allocation_redis(self):
cname = "Split libraries FASTQ"
sname = "QIIMEq2"
col_name = "samples * columns"
col_name = "samples*columns"
version = "1.9.1"
qdb.meta_util.update_resource_allocation_redis(False)
title_mem_str = 'resources$#%s$#%s$#%s$#%s:%s' % (
Expand Down
2 changes: 1 addition & 1 deletion qiita_db/test/test_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ def test_mark_messages(self):
user.mark_messages([1, 2])
obs = user.messages()
exp = [True, True, False]
self.assertEqual([x[3] for x in obs], exp)
self.assertCountEqual([x[3] for x in obs], exp)

user.mark_messages([1], read=False)
obs = user.messages()
Expand Down
16 changes: 8 additions & 8 deletions qiita_db/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1316,7 +1316,7 @@ def setUp(self):
self.cname = "Split libraries FASTQ"
self.sname = "QIIMEq2"
self.version = "1.9.1"
self.col_name = 'samples * columns'
self.col_name = 'samples*columns'
self.columns = [
"sName", "sVersion", "cID", "cName", "processing_job_id",
"parameters", "samples", "columns", "input_size", "extra_info",
Expand All @@ -1327,9 +1327,13 @@ def setUp(self):
self.df = qdb.util.retrieve_resource_data(
self.cname, self.sname, self.version, self.columns)

self.df.dropna(subset=['samples', 'columns'], inplace=True)
self.df[self.col_name] = self.df.samples * self.df['columns']

def test_plot_return(self):
# check the plot returns correct objects
fig1, axs1 = qdb.util.resource_allocation_plot(self.df, self.col_name)
fig1, axs1 = qdb.util.resource_allocation_plot(self.df, self.col_name,
self.df[self.col_name])
self.assertIsInstance(
fig1, Figure,
"Returned object fig1 is not a Matplotlib Figure")
Expand All @@ -1339,13 +1343,10 @@ def test_plot_return(self):
"Returned object axs1 is not a single Matplotlib Axes object")

def test_minimize_const(self):
self.df = self.df[
(self.df.cName == self.cname) & (self.df.sName == self.sname)]
self.df.dropna(subset=['samples', 'columns'], inplace=True)
self.df[self.col_name] = self.df.samples * self.df['columns']

fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False)

mem_models, time_models = qdb.util.retrieve_equations()
mem_models, time_models = qdb.util._retrieve_equations()
bm_name, bm, options = qdb.util._resource_allocation_plot_helper(
self.df, axs[0], 'MaxRSSRaw', mem_models, self.col_name)
# check that the algorithm chooses correct model for MaxRSSRaw and
Expand Down Expand Up @@ -1420,7 +1421,6 @@ def test_db_update(self):
'8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0'
]
}

qdb.util.update_resource_allocation_table(test=test_data)

for curr_cname, ids in types.items():
Expand Down
Loading