Skip to content

Commit

Permalink
Merge pull request #961 from djvreeman/dv-20240930-plot-builder-enhan…
Browse files Browse the repository at this point in the history
…cement

Update plot-ig-builder-auto.py
  • Loading branch information
grahamegrieve authored Oct 8, 2024
2 parents cd205fe + 8804a2e commit 8f8e49e
Showing 1 changed file with 34 additions and 87 deletions.
121 changes: 34 additions & 87 deletions .azure/plot-ig-builder-auto/plot-ig-builder-auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,166 +23,113 @@
import sys
import os
import numpy as np

import pandas as pd

# Function to parse and sort version numbers
def parse_version(version):
# Split version into major, minor, and patch, and convert them to integers
try:
# Original code is now in the 'try' block, indented.
major, minor, patch = map(int, version.split('.'))
return major, minor, patch
except ValueError: # Handling non-integer splits
except ValueError:
return (0, 0, 0) # Default value for non-version strings

def load_json_data(source):
if source.startswith('http://') or source.startswith('https://'):
# Fetch the JSON data from a URL
response = requests.get(source)
response.raise_for_status() # Raise an exception if the request failed
response.raise_for_status()
data = response.json()
else:
# Load the JSON data from a local file
with open(source, 'r') as file:
data = json.load(file)
return data

def calculate_dynamic_width(versions, base_width, additional_width_per_version, max_width):
num_versions = len(versions)
if num_versions <= 10:
return base_width
else:
additional_width = (num_versions - 10) * additional_width_per_version
return min(base_width + additional_width, max_width)

def main(source):
data = load_json_data(source)

# Prepare data for visualization
build_times = {} # Structure to hold the build times

# Extracting the keys, which represent version numbers
version_keys = list(data.keys())
version_keys = [key for key in version_keys if key[0].isdigit()]
build_times = {}

# Sorting the version numbers
version_keys = [key for key in data.keys() if key[0].isdigit()]
sorted_versions = sorted(version_keys, key=parse_version)

# The latest version is the last one in the sorted list
latest_version = sorted_versions[-1]

# Construct the filename using the version number
filename = f"{latest_version}.png"

# Process the JSON data
for version, guides in data.items():
if version == 'format-version':
continue # Skip the 'format-version' entry

continue
for guide, stats in guides.items():
if guide in ['sync-date', 'date']:
continue # Skip non-guide entries

continue
guide_name = guide
time = stats.get('time', 0) / 1000.0 # Convert milliseconds to seconds
time = stats.get('time', 0) / 1000.0 # Convert to seconds

if guide_name not in build_times:
build_times[guide_name] = {}
build_times[guide_name][version] = time

# Determine the number of unique guides to plot
num_guides = len(build_times)

# Define the colormaps
# More on colormaps: https://matplotlib.org/stable/gallery/color/colormap_reference.html
cmap1 = plt.get_cmap('tab20', 20) # This map has 20 distinct colors
cmap2 = plt.get_cmap('tab20b', 20) # This map has 20 distinct colors too
cmap3 = plt.get_cmap('tab20c', 20)
# Convert to DataFrame and replace 0 with NaN to show gaps instead of 0s
build_times_df = pd.DataFrame(build_times).replace(0, pd.NA)

# Initialize an empty list to store the colors
# Define colormaps
cmap1 = plt.get_cmap('tab20', 20)
cmap2 = plt.get_cmap('tab20b', 20)
combined_colors = []

# Function to add colors to the list from a given colormap
def add_colors_from_cmap(cmap, num_colors, color_list):
for i in range(num_colors):
color_list.append(cmap(i))

# Add colors from each colormap to the combined list
add_colors_from_cmap(cmap1, 20, combined_colors)
add_colors_from_cmap(cmap2, 20, combined_colors)
#add_colors_from_cmap(cmap3, 20, combined_colors)

# Create the visualization
color_index = 0

# Assuming 'build_times' is a dictionary where keys are guide names and values are dictionaries
# of version: build_time pairs.
# Start by collecting all timings and labels
handles = []
timing_label_pairs = []

for guide, times in build_times.items():
# Extract the total build time for the current guide
total_build_time = sum(times.values())
# Append the total build time and the guide label to the list as a tuple
for guide in build_times_df.columns:
total_build_time = build_times_df[guide].sum(skipna=True)
timing_label_pairs.append((total_build_time, guide))

# Sort the list by timings in descending order
timing_label_pairs.sort(reverse=True, key=lambda x: x[0])

# Now we plot in the sorted order and collect handles for the legend
handles = []
for total_build_time, guide in timing_label_pairs:
times = build_times[guide]
sorted_items = sorted(times.items())
versions = [item[0] for item in sorted_items]
timings = [item[1] for item in sorted_items]

# Use the next color in the color list
handle, = plt.plot(versions, timings, marker='o', label=guide, color=combined_colors[color_index % len(combined_colors)])
guide_times = build_times_df[guide]
handle, = plt.plot(guide_times.index, guide_times, marker='o', label=guide, color=combined_colors[color_index % len(combined_colors)])
handles.append(handle)
color_index += 1

# Update the legend with the sorted handles
plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left')

plt.ylabel('Build Time (seconds)') # Update label to reflect new units
plt.ylabel('Build Time (seconds)')
plt.xlabel('Version')
plt.title('Build Time for each Implementation Guide by Version')

# Set x-axis ticks to correspond to the actual versions present in the data
plt.xticks(ticks=np.arange(len(sorted_versions)), labels=sorted_versions, rotation=90, fontsize=8)

# Assume 'sorted_versions' is the list of version strings from the JSON data
base_width = 8 # Base width for up to 10 versions
additional_width_per_version = 0.2 # Additional width for each version above 10
max_reasonable_width = 30 # Maximum width to keep the plot reasonable
fixed_height = 5 # Fixed height in inches

# Calculate the dynamic width based on the number of versions
dynamic_width = calculate_dynamic_width(sorted_versions, base_width, additional_width_per_version, max_reasonable_width)

# Set the dynamic figure size
plt.gcf().set_size_inches(dynamic_width, fixed_height)
# Calculate dynamic width based on the number of versions
dynamic_width = calculate_dynamic_width(sorted_versions, base_width=8, additional_width_per_version=0.2, max_width=30)
plt.gcf().set_size_inches(dynamic_width, 5)
plt.tight_layout()

# Save the figure
# Save the plot
plt.savefig(args.output)
# plt.show()

plt.close(args.output)

def calculate_dynamic_width(versions, base_width, additional_width_per_version, max_width):
num_versions = len(versions)
if num_versions <= 10:
return base_width
else:
additional_width = (num_versions - 10) * additional_width_per_version
return min(base_width + additional_width, max_width)
plt.close()

if __name__ == "__main__":
# Set up the command-line argument parser
parser = argparse.ArgumentParser(description='Visualize FHIR IG Publisher build times.')
parser.add_argument('--source', type=str, help='The path or URL to the JSON data source')
parser.add_argument('-o', '--output', type=str, help='Output filename with path', default='../data/publisher-build-time-trends/latest-version.png') # You can change the default to any relevant path or filename.
parser.add_argument('-o', '--output', type=str, help='Output filename with path', default='../data/publisher-build-time-trends/latest-version.png')

# Parse the arguments
args = parser.parse_args()
args.source = args.source if args.source else 'https://raw.githubusercontent.com/HL7/fhir-ig-publisher/master/test-statistics.json'

try:
main(args.source)
except Exception as e:
print(f"Error: {str(e)}", file=sys.stderr)
args.source = args.source if (args.source is not None) else 'https://raw.githubusercontent.com/HL7/fhir-ig-publisher/master/test-statistics.json'
print(f"Error: {str(e)}", file=sys.stderr)

0 comments on commit 8f8e49e

Please sign in to comment.