Skip to content

Commit

Permalink
updated report with histogram
Browse files Browse the repository at this point in the history
  • Loading branch information
cehbrecht committed Sep 5, 2023
1 parent cbca5c2 commit 3f91565
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 23 deletions.
37 changes: 25 additions & 12 deletions duck/data_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
import numpy as np
import yaml
import pathlib
import io
import base64

# import matplotlib
# matplotlib.use('agg')

def get_stats(data):
return {
Expand All @@ -20,6 +25,7 @@ def __init__(self, output_dir):
else:
self.output_dir = pathlib.Path(output_dir)
self.info = None
self.histogram = None


def gen_data_stats(self, filename, var, nbins=100):
Expand All @@ -41,14 +47,28 @@ def gen_data_stats(self, filename, var, nbins=100):
mratio = 1 - mratio / (nlon * nlat)

# TODO: It would be great to store the distribution graph in a database
if False:
if True:
plt.close()
plt.imshow(hist, aspect="auto", origin='lower', extent=[vstats["min"], vstats["max"], 0, ntime], cmap="gist_ncar")
ax = plt.gca()
ax.grid(color='gray', linestyle='-.', linewidth=1)
plt.xlabel(var)
plt.ylabel("Timesteps")
outfile = self.output_dir / "histime.png"
plt.savefig(outfile.as_posix(), dpi=50)
# outfile = self.output_dir / "histogram.png"
# print(f"histogram: {outfile}")
# plt.savefig(outfile.as_posix(), dpi=50)
# store as base64
# Save the plot to a BytesIO object
buffer = io.BytesIO()
plt.savefig(buffer, format='png')
buffer.seek(0)

# Encode the BytesIO object as base64
base64_encoded_plot = base64.b64encode(buffer.read()).decode('utf-8')
print(f"{base64_encoded_plot}")
self.histogram = base64_encoded_plot
# close plot
plt.close()

# The following information should be stored in a database
attrs = {}
Expand All @@ -64,18 +84,11 @@ def gen_data_stats(self, filename, var, nbins=100):
self.info["Vars"] = list(dict(ds.variables).keys())
self.info["Vstats"] = vstats
self.info["Mstats"] = get_stats(mratio)
# self.info["Hist"] = hist
print(self.info)
# print(self.info)

def write_json(self):
outfile = self.output_dir / "info.txt"
with open(outfile.as_posix(), "w") as f:
yaml.dump(self.info, f)
return outfile

def write_png(self):
outfile = self.output_dir / "histime.png"
return outfile




1 change: 1 addition & 0 deletions duck/processes/wps_clintai.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def _handler(self, request, response):
"mean": stats["mean"],
"stddev": stats["std"],
"info": json.dumps(datastats.info, separators=(',', ':')),
"histogram": datastats.histogram,
},
[datasets[0].as_posix()],
[f"{datasets[0].as_posix()}_infilled.nc"],
Expand Down
2 changes: 1 addition & 1 deletion duck/processes/wps_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def _handler(self, request, response):

def write_html(self, df, workdir):
# Convert the DataFrame to an HTML table
html_table = df.to_html(index=False)
html_table = df.to_html(escape=False, index=False)

# Define the HTML template
html_template = f"""
Expand Down
1 change: 1 addition & 0 deletions duck/provenance.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def add_operator(self, operator, parameters, collection, output, start, end):
"stddev",
"mean",
"info",
"histogram",
"dataset_name",
"variable_name",
]:
Expand Down
37 changes: 27 additions & 10 deletions duck/query.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,22 @@
from duck.db import GraphDB
import pandas as pd
import base64
from PIL import Image
import io
import json

def display_image(base64_image):
img_data = base64.b64decode(base64_image)
img = Image.open(io.BytesIO(img_data))
return '<img src="data:image/png;base64,{}" width="200"/>'.format(base64_image)

def display_json(data):
content = json.loads(data)
return f"<pre>{content}</pre>"

def query():
query_str = """
SELECT ?process ?dataset ?variable ?startTime ?endTime ?input ?output ?min ?max ?mean ?stddev ?info
SELECT ?process ?dataset ?variable ?startTime ?endTime ?input ?output ?min ?max ?mean ?stddev ?info ?histogram
WHERE {
?exec rdf:type provone:Execution ;
rdfs:label ?process ;
Expand All @@ -16,7 +28,8 @@ def query():
clint:max ?max ;
clint:mean ?mean ;
clint:stddev ?stddev ;
clint:info ?info .
clint:info ?info ;
clint:histogram ?histogram .
?input rdf:type prov:Entity .
Expand All @@ -43,20 +56,24 @@ def query():
max = row.max.value
mean = row.mean.value
stddev = row.stddev.value
info = row.info.value
data.append({
info = json.loads(row.info.value)
histogram = row.histogram.value
entry = {
"Process": process,
"Dataset": dataset,
"Variable": variable,
"Start Time": start_time,
"End Time": end_time,
"Input": input,
"Output": output,
"Min": min,
"Max": max,
"Mean": mean,
"StdDev": stddev,
"Info": info,
})
# "Min": min,
# "Max": max,
# "Mean": mean,
# "StdDev": stddev,
"Histogram": display_image(histogram),
}
for key in info:
entry[key] = info[key]
data.append(entry)
df = pd.DataFrame(data)
return df

0 comments on commit 3f91565

Please sign in to comment.