Merge pull request #961 from djvreeman/dv-20240930-plot-builder-enhan…

…cement Update plot-ig-builder-auto.py
HL7 · Oct 8, 2024 · 8f8e49e · 8f8e49e
2 parents cd205fe + 8804a2e
commit 8f8e49e
Showing 1 changed file with 34 additions and 87 deletions.
diff --git a/.azure/plot-ig-builder-auto/plot-ig-builder-auto.py b/.azure/plot-ig-builder-auto/plot-ig-builder-auto.py
@@ -23,166 +23,113 @@
 import sys
 import os
 import numpy as np
-
+import pandas as pd
 
 # Function to parse and sort version numbers
 def parse_version(version):
-    # Split version into major, minor, and patch, and convert them to integers
     try:
-        # Original code is now in the 'try' block, indented.
         major, minor, patch = map(int, version.split('.'))
         return major, minor, patch
-    except ValueError:  # Handling non-integer splits
+    except ValueError:
         return (0, 0, 0)  # Default value for non-version strings
 
 def load_json_data(source):
     if source.startswith('http://') or source.startswith('https://'):
-        # Fetch the JSON data from a URL
         response = requests.get(source)
-        response.raise_for_status()  # Raise an exception if the request failed
+        response.raise_for_status()
         data = response.json()
     else:
-        # Load the JSON data from a local file
         with open(source, 'r') as file:
             data = json.load(file)
     return data
 
+def calculate_dynamic_width(versions, base_width, additional_width_per_version, max_width):
+    num_versions = len(versions)
+    if num_versions <= 10:
+        return base_width
+    else:
+        additional_width = (num_versions - 10) * additional_width_per_version
+        return min(base_width + additional_width, max_width)
+
 def main(source):
     data = load_json_data(source)
 
-    # Prepare data for visualization
-    build_times = {}  # Structure to hold the build times
-
-    # Extracting the keys, which represent version numbers
-    version_keys = list(data.keys())
-    version_keys = [key for key in version_keys if key[0].isdigit()]
+    build_times = {}
 
-    # Sorting the version numbers
+    version_keys = [key for key in data.keys() if key[0].isdigit()]
     sorted_versions = sorted(version_keys, key=parse_version)
-
-    # The latest version is the last one in the sorted list
     latest_version = sorted_versions[-1]
 
-    # Construct the filename using the version number
     filename = f"{latest_version}.png"
 
-    # Process the JSON data
     for version, guides in data.items():
         if version == 'format-version':
-            continue  # Skip the 'format-version' entry
-
+            continue
         for guide, stats in guides.items():
             if guide in ['sync-date', 'date']:
-                continue  # Skip non-guide entries
-
+                continue
             guide_name = guide
-            time = stats.get('time', 0) / 1000.0  # Convert milliseconds to seconds
+            time = stats.get('time', 0) / 1000.0  # Convert to seconds
 
             if guide_name not in build_times:
                 build_times[guide_name] = {}
             build_times[guide_name][version] = time
 
-    # Determine the number of unique guides to plot
-    num_guides = len(build_times)
-
-    # Define the colormaps
-    # More on colormaps: https://matplotlib.org/stable/gallery/color/colormap_reference.html
-    cmap1 = plt.get_cmap('tab20', 20) # This map has 20 distinct colors
-    cmap2 = plt.get_cmap('tab20b', 20) # This map has 20 distinct colors too
-    cmap3 = plt.get_cmap('tab20c', 20)
+    # Convert to DataFrame and replace 0 with NaN to show gaps instead of 0s
+    build_times_df = pd.DataFrame(build_times).replace(0, pd.NA)
 
-    # Initialize an empty list to store the colors
+    # Define colormaps
+    cmap1 = plt.get_cmap('tab20', 20)
+    cmap2 = plt.get_cmap('tab20b', 20)
     combined_colors = []
 
-    # Function to add colors to the list from a given colormap
     def add_colors_from_cmap(cmap, num_colors, color_list):
         for i in range(num_colors):
             color_list.append(cmap(i))
 
-    # Add colors from each colormap to the combined list
     add_colors_from_cmap(cmap1, 20, combined_colors)
     add_colors_from_cmap(cmap2, 20, combined_colors)
-    #add_colors_from_cmap(cmap3, 20, combined_colors)
 
-    # Create the visualization
     color_index = 0
-
-    # Assuming 'build_times' is a dictionary where keys are guide names and values are dictionaries
-    # of version: build_time pairs.
-    # Start by collecting all timings and labels
+    handles = []
     timing_label_pairs = []
 
-    for guide, times in build_times.items():
-        # Extract the total build time for the current guide
-        total_build_time = sum(times.values())
-        # Append the total build time and the guide label to the list as a tuple
+    for guide in build_times_df.columns:
+        total_build_time = build_times_df[guide].sum(skipna=True)
         timing_label_pairs.append((total_build_time, guide))
 
-    # Sort the list by timings in descending order
     timing_label_pairs.sort(reverse=True, key=lambda x: x[0])
 
-    # Now we plot in the sorted order and collect handles for the legend
-    handles = []
     for total_build_time, guide in timing_label_pairs:
-        times = build_times[guide]
-        sorted_items = sorted(times.items())
-        versions = [item[0] for item in sorted_items]
-        timings = [item[1] for item in sorted_items]
-
-        # Use the next color in the color list
-        handle, = plt.plot(versions, timings, marker='o', label=guide, color=combined_colors[color_index % len(combined_colors)])
+        guide_times = build_times_df[guide]
+        handle, = plt.plot(guide_times.index, guide_times, marker='o', label=guide, color=combined_colors[color_index % len(combined_colors)])
         handles.append(handle)
         color_index += 1
 
-    # Update the legend with the sorted handles
     plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left')
-
-    plt.ylabel('Build Time (seconds)')  # Update label to reflect new units
+    plt.ylabel('Build Time (seconds)')
     plt.xlabel('Version')
     plt.title('Build Time for each Implementation Guide by Version')
-
-    # Set x-axis ticks to correspond to the actual versions present in the data
     plt.xticks(ticks=np.arange(len(sorted_versions)), labels=sorted_versions, rotation=90, fontsize=8)
 
-    # Assume 'sorted_versions' is the list of version strings from the JSON data
-    base_width = 8  # Base width for up to 10 versions
-    additional_width_per_version = 0.2  # Additional width for each version above 10
-    max_reasonable_width = 30  # Maximum width to keep the plot reasonable
-    fixed_height = 5 # Fixed height in inches
-
-    # Calculate the dynamic width based on the number of versions
-    dynamic_width = calculate_dynamic_width(sorted_versions, base_width, additional_width_per_version, max_reasonable_width)
-
-    # Set the dynamic figure size
-    plt.gcf().set_size_inches(dynamic_width, fixed_height)
+    # Calculate dynamic width based on the number of versions
+    dynamic_width = calculate_dynamic_width(sorted_versions, base_width=8, additional_width_per_version=0.2, max_width=30)
+    plt.gcf().set_size_inches(dynamic_width, 5)
     plt.tight_layout()
 
-    # Save the figure
+    # Save the plot
     plt.savefig(args.output)
-    # plt.show()
-
-    plt.close(args.output)
-
-def calculate_dynamic_width(versions, base_width, additional_width_per_version, max_width):
-    num_versions = len(versions)
-    if num_versions <= 10:
-        return base_width
-    else:
-        additional_width = (num_versions - 10) * additional_width_per_version
-        return min(base_width + additional_width, max_width)
+    plt.close()
 
 if __name__ == "__main__":
-    # Set up the command-line argument parser
     parser = argparse.ArgumentParser(description='Visualize FHIR IG Publisher build times.')
     parser.add_argument('--source', type=str, help='The path or URL to the JSON data source')
-    parser.add_argument('-o', '--output', type=str, help='Output filename with path', default='../data/publisher-build-time-trends/latest-version.png')  # You can change the default to any relevant path or filename.
+    parser.add_argument('-o', '--output', type=str, help='Output filename with path', default='../data/publisher-build-time-trends/latest-version.png')
 
-    # Parse the arguments
     args = parser.parse_args()
     args.source = args.source if args.source else 'https://raw.githubusercontent.com/HL7/fhir-ig-publisher/master/test-statistics.json'
 
     try:
         main(args.source)
     except Exception as e:
-        print(f"Error: {str(e)}", file=sys.stderr)
-    args.source = args.source if (args.source is not None) else 'https://raw.githubusercontent.com/HL7/fhir-ig-publisher/master/test-statistics.json'
+        print(f"Error: {str(e)}", file=sys.stderr)