diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index c0b79317..2256e9e7 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -442,6 +442,7 @@ Generate OpenSCAD, CadQuery or build123d scripts with Generative AI using the fo : type: provider: + model: <(optional) the model to use> tokens: <(optional) the limit of token context> temperature: <(optional) the temperature LLM parameter> top_p: <(optional) the top_p LLM parameter> @@ -449,6 +450,22 @@ Generate OpenSCAD, CadQuery or build123d scripts with Generative AI using the fo images: <(optional) contextual images as input for AI> - +The following models are recommended for use: + ++----------+----------------------------+ +| Provider | Model | ++==========+============================+ +| google | - gemini-1.5-pro (default) | +| | - gemini-1.5-flash | ++----------+----------------------------+ +| openai | - gpt-4o (default) | +| | - gpt-4o-mini | ++----------+----------------------------+ +| ollama | - llama-3.1:8b | +| | - llama-3.1:70b (default) | +| | - llama-3.1:405b | ++----------+----------------------------+ + +---------------------------+-------------------------------------------------------------------------------------------------------------------------+ | Example | Result | +===========================+=========================================================================================================================+ diff --git a/partcad/src/partcad/ai.py b/partcad/src/partcad/ai.py index e405e9da..95b40ed6 100644 --- a/partcad/src/partcad/ai.py +++ b/partcad/src/partcad/ai.py @@ -24,6 +24,7 @@ "gpt-4-vision-preview", "gpt-4o", "gpt-4o-mini", + "o1-*", "gemini-pro", "gemini-pro-vision", "gemini-1.5-pro", @@ -46,12 +47,15 @@ def generate( prompt: str, config: dict[str, str], num_options: int = 1, - image_filenames: list[str] = [], ): with pc_logging.Action("Ai" + action, package, item): # Determine the model to use provider = config.get("provider", None) - if "model" in config and config["model"] is not None: + if ( + "model" in config + and config["model"] is not None + and config["model"] != "" + ): model = config["model"] else: if provider is None: @@ -68,9 +72,9 @@ def generate( model = "gemini-1.5-pro" elif provider == "openai": # if len(image_filenames) > 0: - # model = "gpt-4-vision-preview" + # model = "gpt-4o" # else: - # model = "gpt-4" + # model = "gpt-4o" model = "gpt-4o" elif provider == "ollama": model = "llama3.1:70b" @@ -96,7 +100,6 @@ def generate( result = self.generate_google( model, prompt, - image_filenames, config, num_options, ) @@ -111,7 +114,6 @@ def generate( result = self.generate_openai( model, prompt, - image_filenames, config, num_options, ) @@ -126,7 +128,6 @@ def generate( result = self.generate_ollama( model, prompt, - image_filenames, config, num_options, ) diff --git a/partcad/src/partcad/ai_google.py b/partcad/src/partcad/ai_google.py index d9ecf088..68411a10 100644 --- a/partcad/src/partcad/ai_google.py +++ b/partcad/src/partcad/ai_google.py @@ -8,6 +8,7 @@ # import importlib +import re import threading import time from typing import Any @@ -68,7 +69,6 @@ def generate_google( self, model: str, prompt: str, - image_filenames: list[str] = [], config: dict[str, Any] = {}, options_num: int = 1, ): @@ -95,13 +95,21 @@ def generate_google( else: temperature = None - images = list( - map( - lambda f: pil_image.open(f), - image_filenames, - ) - ) - contents = [prompt, *images] + image_content = [] + + def insert_image(match): + filename = match.group(1) + image_content.append(pil_image.open(filename)) + return "IMAGE_INSERTED_HERE" + + prompt = re.sub(r"INSERT_IMAGE_HERE\(([^)]*)\)", insert_image, prompt) + text_content = prompt.split("IMAGE_INSERTED_HERE") + + content = [] + for i in range(len(text_content)): + content.append(text_content[i]) + if i < len(image_content): + content.append(image_content[i]) client = google_genai.GenerativeModel( model, @@ -121,9 +129,7 @@ def generate_google( while retry == True: retry = False try: - response = client.generate_content( - contents, - ) + response = client.generate_content(content) except google_api_core_exceptions.ResourceExhausted as e: pc_logging.exception(e) retry = True diff --git a/partcad/src/partcad/ai_ollama.py b/partcad/src/partcad/ai_ollama.py index f5dfc01c..5f0082b1 100644 --- a/partcad/src/partcad/ai_ollama.py +++ b/partcad/src/partcad/ai_ollama.py @@ -58,7 +58,6 @@ def generate_ollama( self, model: str, prompt: str, - image_filenames: list[str] = [], config: dict[str, Any] = {}, options_num: int = 1, ): @@ -71,7 +70,7 @@ def generate_ollama( if not ollama_once(): return None - if len(image_filenames) > 0: + if "INSERT_IMAGE_HERE" in prompt: raise NotImplementedError("Images are not supported by Ollama") if "tokens" in config: diff --git a/partcad/src/partcad/ai_openai.py b/partcad/src/partcad/ai_openai.py index 83b87423..2b3ff66b 100644 --- a/partcad/src/partcad/ai_openai.py +++ b/partcad/src/partcad/ai_openai.py @@ -11,6 +11,7 @@ import importlib import mimetypes from pathlib import Path +import re import threading from typing import Any @@ -30,8 +31,8 @@ "gpt-3.5-turbo": 4096, "gpt-4": 8000, # 32600, "gpt-4-vision-preview": 8192, - "gpt-4o": 4096, # 32600, - "gpt-4o-mini": 8000, # 32600, + "gpt-4o": 16000, # 32600, + "gpt-4o-mini": 16000, # 32600, } @@ -61,7 +62,6 @@ def generate_openai( self, model: str, prompt: str, - image_filenames: list[str] = [], config: dict[str, Any] = {}, options_num: int = 1, ): @@ -83,29 +83,45 @@ def generate_openai( else: temperature = None - content = [ - {"type": "text", "text": prompt}, - *list( - map( - lambda f: { - "type": "image_url", - "image_url": { - "url": "data:%s;base64,%s" - % ( - mimetypes.guess_type(f, False)[0], - base64.b64encode(Path(f).read_bytes()).decode(), - ), - "detail": "high", - }, + pc_logging.debug("Prompt: %s", prompt) + + image_content = [] + + def insert_image(match): + filename = match.group(1) + image_content.append( + { + "type": "image_url", + "image_url": { + "url": "data:%s;base64,%s" + % ( + mimetypes.guess_type(filename, False)[0], + base64.b64encode( + Path(filename).read_bytes() + ).decode(), + ), + "detail": "high", }, - image_filenames, - ) - ), - ] + } + ) + return "IMAGE_INSERTED_HERE" + + prompt = re.sub(r"INSERT_IMAGE_HERE\(([^)]*)\)", insert_image, prompt) + text_content = list( + map( + lambda prompt_section: {"type": "text", "text": prompt_section}, + prompt.split("IMAGE_INSERTED_HERE"), + ) + ) + + content = [] + for i in range(len(text_content)): + content.append(text_content[i]) + if i < len(image_content): + content.append(image_content[i]) cc = openai_client.chat.completions.create( messages=[ - {"role": "system", "content": "You are a mechanical engineer"}, {"role": "user", "content": content}, ], stream=False, diff --git a/partcad/src/partcad/part_factory_feature_ai.py b/partcad/src/partcad/part_factory_feature_ai.py index 90097b1f..1ffe7886 100644 --- a/partcad/src/partcad/part_factory_feature_ai.py +++ b/partcad/src/partcad/part_factory_feature_ai.py @@ -78,9 +78,18 @@ def __init__(self, config, part_type, script_type, prompt_suffix=""): ): self.num_script_correction = user_config.max_script_correction - if not "tokens" in self.ai_config: - self.ai_config["tokens"] = 2000 + # Normalize the input configuration + pc_logging.debug("AI configuration: %s" % self.ai_config) + if ( + not "tokens" in self.ai_config + or not isinstance(self.ai_config["tokens"], int) + or self.ai_config["tokens"] == 0 + ): + self.ai_config["tokens"] = 2048 + pc_logging.debug("Setting the default number of tokens: 2048") + if not "images" in self.ai_config: + self.ai_config["images"] = [] # Use `temperature` and `top_p` values recommended for code generation # if no other preferences are set if not "temperature" in self.ai_config: @@ -108,14 +117,18 @@ async def _instantiate_ai(self, part): """This is a wrapper for the instantiate method that ensures that the part is (re)generated before the instantiation.""" if not os.path.exists(part.path) or os.path.getsize(part.path) == 0: - self._create_file(part.path) + try: + self._create_file(part.path) + except Exception as e: + pc_logging.error(f"Failed to create the file: {e}") + raise e return await self.instantiate_orig(part) def _create_file(self, path): """This method is called to (re)generate the part.""" - # Geometric modeling + # CSG modeling modeling_options = [] max_models = self.num_geometric_modeling max_tries = 2 * max_models @@ -125,7 +138,7 @@ def _create_file(self, path): # with huge quotas. De-prioritized for now. # # def modeling_task(): - # modeling_options.extend(self._geometric_modeling()) + # modeling_options.extend(self._csg_modeling()) # threads = [] # for _ in range(NUM_ALTERNATIVES_GEOMETRIC_MODELING): # thread = threading.Thread(target=modeling_task) @@ -136,10 +149,9 @@ def _create_file(self, path): tries = 0 while len(modeling_options) < max_models and tries < max_tries: - modeling_options.extend(self._geometric_modeling()) + modeling_options.extend(self._csg_modeling()) pc_logging.info( - "Generated %d geometric modeling candidates" - % len(modeling_options) + "Generated %d CSG modeling candidates" % len(modeling_options) ) tries += 1 @@ -172,6 +184,32 @@ def _create_file(self, path): # Record the valid model and the image script_candidates.append((image_filename, script)) + # Once we generated a valid script and rendered the result, + # Attempt to improve the script by comparing the result with + # the original request + improved_scripts = self._improve_script( + modeling_option, script, image_filename + ) + for improved_script in improved_scripts: + pc_logging.debug( + "Generated the improved script candidate %d: %s" + % (candidate_id, improved_script) + ) + + # Validate the image by rendering it, + # attempt to correct the script if rendering doesn't work + image_filename, improved_script = ( + self._validate_and_fix( + modeling_option, improved_script, candidate_id + ) + ) + # Check if the model was valid + if image_filename is not None: + # Record the valid model and the image + script_candidates.append( + (image_filename, improved_script) + ) + candidate_id += 1 pc_logging.info( @@ -195,20 +233,26 @@ def _create_file(self, path): f.write(script) f.close() - def _geometric_modeling(self): - """This method generates geometric modeling options for the part.""" + def _csg_modeling(self): + """This method generates CSG for the part.""" prompt = ( - """You are an engineer performing geometric modeling of mechanical parts. -Given a short verbal description of a part, -you are creating a detailed description of the geometric shapes -required to reproduce that part. -Create a detailed listing of all geometric shapes and how they are -located against each other -(including dimensions, distances and offset in millimeters, -and angles in degrees), -to reproduce the part with the following description: + """You are an AI assistant to engineers modeling mechanical parts using constructive solid geometry. +Given a description of the part, +create a detailed sequence of instructions how to model this part using constructive solid geometry. +First, select an intuitive coordinate system for ease of placement for all primitives. +Then specify the minimum possible number of initial geometric primitives, their dimensions, location and orientation. +For simplicity, consider using large fillets on two edges of the cuboid to make a cylindrical end on a cuboid if needed, instead of adding a cylinder. +Also, for simplicity, consider using a chamfer to make a conical end on a cylinder if needed, insted of adding a cone. +Then specify how to locate and orient the primitives against each other. +Then specify what CSG operations to perform on sets of primitives +(unions, differences and intersections) +and on each of these primitives individually +(including but not limited to adding fillets, chamfers, paddings and cutting holes). + +The part is described by (until DESCRIPTION END): %s +DESCRIPTION END """ % self.ai_config["desc"] ) @@ -233,42 +277,60 @@ def _geometric_modeling(self): if len(image_filenames) > 0: prompt += """ -The part is further described by the attached images. +The part is further described by the images: """ + for image_filename in image_filenames: + prompt += "INSERT_IMAGE_HERE(%s)\n" % image_filename + + prompt += """ +Ensure all dimensions, distances and angles specified int the input data +are reflected in the output CSG instructions. +Use milimeters for dimensions and degrees for angles. +""" + + config = self.ai_config + config = copy.copy(config) + # if config["temperature"] < 0.8: + # config["temperature"] += 0.4 + # if config["top_p"] < 0.4: + # config["top_p"] += 0.2 options = self.generate( "Geometric", self.project.name, self.name, prompt, - self.ai_config, + config, self.num_geometric_modeling, - image_filenames=image_filenames, ) return options - def _generate_script(self, geometric_modeling): - """This method generates a script given specific geometric modeling.""" + def _generate_script(self, csg_instructions): + """This method generates a script given specific CSG description.""" prompt = """You are an AI assistant in an engineering department. You are helping engineers to create programmatic scripts that produce CAD geometry data for parts, mechanisms, buildings or anything else. -The scripts you create a fully functional and can be used right away, as is, in automated workflows. +The scripts you create are fully functional and can be used right away, as is, in automated workflows. Assume that the scripts you produce are used automatically to render 3D models and to validate them. This time you are asked to generate a %s to define a 3D model of a part defined by -the following geometric modeling: - %s - """ % ( +the following CSG instructions (until CSG END): +%s +CSG END +Ensure that all primitives are placed in the correct coordinates and that all dimensions are correct. +""" % ( self.script_type, - geometric_modeling, + csg_instructions, ) image_filenames = self.ai_config.get("images", []) if len(image_filenames) > 0: prompt += """ -The part is further described by the attached images. +The part is further described by the images: """ + for image_filename in image_filenames: + prompt += "INSERT_IMAGE_HERE(%s)\n" % image_filename prompt += """%s @@ -278,13 +340,105 @@ def _generate_script(self, geometric_modeling): self.script_type, ) + config = self.ai_config + config = copy.copy(config) + # if config["temperature"] < 0.8: + # config["temperature"] += 0.4 + # if config["top_p"] < 0.4: + # config["top_p"] += 0.2 scripts = self.generate( "Script", self.project.name, self.name, prompt, - self.ai_config, - image_filenames=image_filenames, + config, + ) + + # Sanitize the output to remove the decorations + scripts = list(map(lambda s: self._sanitize_script(s), scripts)) + + return scripts + + def _improve_script(self, csg_instructions, script, rendered_image): + """This method improves the script given the original request and the produced script.""" + + config = copy.copy(self.ai_config) + + prompt = """You are an AI assistant in an engineering department. +You are asked to create a %s matching the given description%s. + +The given description follows (until DESCRIPTION END): +%s +DESCRIPTION END +""" % ( + self.script_type, + " and images" if len(config["images"]) > 0 else "", + config["desc"], + ) + + image_filenames = config["images"] + if len(image_filenames) > 0: + prompt += """ + +The given images are: +""" + for image_filename in image_filenames: + prompt += "INSERT_IMAGE_HERE(%s)\n" % image_filename + + prompt += ( + """ + +You considered the following constructive solid geometry model (until CSG END): +%s +CSG END +""" + % csg_instructions + ) + + prompt += ( + """ +You produced the following script (until SCRIPT END): +%s +SCRIPT END +""" + % script + ) + + prompt += """ +When rendered, this script produces the following image: +""" + prompt += "INSERT_IMAGE_HERE(%s)\n" % rendered_image + + prompt += """ + +Please, analyze whether the produced script and image match the original request +(where the original image and description take precedence +over the constructive solid geometry instructions). +Analyze both the shape and the dimensions. +Pay special attention to the coordinates used to place the initial geometric primitives. +Make sure every single dimension provided in the request are reflected in the produced script. + +If they do precisely match the request, repeat the same script. +Otherwise, produce a corrected script following the instructions: +Do not generate exactly the same script +(make the changes necessary to address identified issues). +%s +""" % ( + self.prompt_suffix, + ) + + if config["temperature"] < 0.8: + config["temperature"] += 0.8 + if config["top_p"] < 0.4: + config["top_p"] += 0.4 + + scripts = self.generate( + "Improve", + self.project.name, + self.name, + prompt, + config, + self.num_script_correction, # TODO(clairbee): add a separate user config param and loop around this until the needed number is produced ) # Sanitize the output to remove the decorations @@ -354,7 +508,7 @@ def _validate_and_fix(self, modeling_option, script, candidate_id, depth=0): return image_filename, script # Failed to render the image. - if next_depth <= self.num_script_correction: + if next_depth <= self.num_script_correction and error_text: # Ask AI to make incremental fixes based on the errors. correction_candidate_id = 0 for _ in range(self.num_script_correction): @@ -389,7 +543,7 @@ def _validate_and_fix(self, modeling_option, script, candidate_id, depth=0): return None, script def _correct_script(self, modeling_option, script, error_text): - # TODO(clairbee): prove that the use of geometric modeling product + # TODO(clairbee): prove that the use of CSG instructions product # in this prompt is benefitial prompt = """You are an AI assistant to a mechanical engineer. You are given an automatically generated %s which has flaws that need to be @@ -407,8 +561,7 @@ def _correct_script(self, modeling_option, script, error_text): ERRORS_END Please, generate a corrected script so that it does not produce the given errors. -Make as little changes as possible and prefer not to make any changes that are -not necessary to fix the errors. +Limit the changes to the script to the minimum necessary to fix the errors. Very important not to produce exactly the same script: at least something has to change. """ % ( self.script_type, @@ -465,6 +618,11 @@ def render(part): ): shape = asyncio.run(coro) if not shape is None: + try: + # Best effort to provide an interactive experience + part.show() + except Exception as e: + pass part.render_png(self.ctx, None, output_path) except Exception as e: part.error("Failed to render the image: %s" % e) @@ -516,16 +674,38 @@ def select_best_image(self, script_candidates): prompt = ( """ -From the attached images, select the image that is the best fit -for the following description: +You are an AI assistant to a mechanical engineer. +The mechanical engineer was given a task to create a 3D model of a part. +The engineer has produced several models and rendered an image per model. + +The part is described as (until DESCRIPTION END): %s +DESCRIPTION END +""" + % self.ai_config["desc"] + ) + + if "images" in self.ai_config and len(self.ai_config["images"]) > 0: + prompt += """ +The part is further described by the images: +""" + for image_filename in self.ai_config["images"]: + prompt += "INSERT_IMAGE_HERE(%s)\n" % image_filename + + prompt += """ + +From the following images, +select the rendered image that matches the part the best: +""" + for image_filename in image_filenames: + prompt += "INSERT_IMAGE_HERE(%s)\n" % image_filename + + prompt += """ Respond with the numeric index (starting with 1) of the best fit image. No other text is acceptable. Just the number. """ - % self.ai_config["desc"] - ) # Ask AI to compare the images pc_logging.info( @@ -538,7 +718,6 @@ def select_best_image(self, script_candidates): prompt, self.ai_config, 1, - image_filenames=image_filenames, ) pc_logging.debug("Image comparison responses: %s" % responses) diff --git a/partcad/src/partcad/project.py b/partcad/src/partcad/project.py index f20543d7..5d176d67 100644 --- a/partcad/src/partcad/project.py +++ b/partcad/src/partcad/project.py @@ -1392,6 +1392,9 @@ def set_part_config(self, part_name, part_config): def update_part_config( self, part_name, part_config_update: dict[str, typing.Any] ): + pc_logging.debug( + "Updating part config: %s: %s" % (part_name, part_config_update) + ) yaml = ruamel.yaml.YAML() yaml.preserve_quotes = True with open(self.config_path) as fp: @@ -1403,7 +1406,11 @@ def update_part_config( if part_name in parts: part_config = parts[part_name] for key, value in part_config_update.items(): - part_config[key] = value + if value is not None: + part_config[key] = value + else: + if key in part_config: + del part_config[key] with open(self.config_path, "w") as fp: yaml.dump(config, fp) @@ -1764,6 +1771,27 @@ def add_section(name, shape, render_cfg): ): return [] + path = None + if "path" in config: + path = config["path"] + else: + path = name + if "type" in config: + if ( + config["type"] == "cadquery" + or config["type"] == "build123d" + or config["type"] == "ai-cadquery" + or config["type"] == "ai-build123d" + ): + path += ".py" + elif ( + config["type"] == "openscad" + or config["type"] == "ai-openscad" + ): + path += ".scad" + else: + path += "." + config["type"] + columns = [] if "svg" in render_cfg or ( "type" in config and config["type"] == "svg" @@ -1781,9 +1809,13 @@ def add_section(name, shape, render_cfg): svg_cfg.get("prefix", "."), name + ".svg", ) - columns += [ - '' % image_path - ] + img_text = ( + '' + % image_path + ) + if path: + img_text = '%s' % (path, img_text) + columns += [img_text] elif "png" in render_cfg: png_cfg = render_cfg["png"] png_cfg = png_cfg if png_cfg is not None else {} @@ -1796,7 +1828,13 @@ def add_section(name, shape, render_cfg): png_cfg.get("prefix", "."), name + ".png", ) - columns += ['' % image_path] + img_text = ( + '' + % image_path + ) + if path: + img_text = '%s' % (path, img_text) + columns += [img_text] else: image_path = None test_image_path = None @@ -1814,22 +1852,34 @@ def add_section(name, shape, render_cfg): columns += [config["desc"]] if "parameters" in config: - parameters = "Parameters:
    " + parameters = "Parameters:
      \n" for param_name, param in config["parameters"].items(): if "enum" in param: - value = "
        " + value = "
          \n" for enum_value in param["enum"]: if enum_value == param["default"]: - value += "
        • %s
        • " % enum_value + value += "
        • %s
        • \n" % enum_value else: value += "
        • %s
        • " % enum_value - value += "
        " + value += "
      \n" else: value = param["default"] - parameters += "
    • %s: %s
    • " % (param_name, value) - parameters += "
    " + parameters += "
  • %s: %s
  • \n" % (param_name, value) + parameters += "
\n" columns += [parameters] + if "images" in config: + images = "Input images:\n" + for image in config["images"]: + images += ( + '
%s\n' + % ( + image, + image, + ) + ) + columns += [images] + if "aliases" in config: aliases = "Aliases:
    " for alias in config["aliases"]: @@ -1856,25 +1906,33 @@ def add_section(name, shape, render_cfg): if self.assemblies and not "assemblies" in exclude: lines += ["## Assemblies"] lines += [""] - for name, shape in self.assemblies.items(): + shape_names = sorted(self.assemblies.keys()) + for name in shape_names: + shape = self.assemblies[name] lines += add_section(name, shape, render_cfg) if self.parts and not "parts" in exclude: lines += ["## Parts"] lines += [""] - for name, shape in self.parts.items(): + shape_names = sorted(self.parts.keys()) + for name in shape_names: + shape = self.parts[name] lines += add_section(name, shape, render_cfg) if self.interfaces and not "interfaces" in exclude: lines += ["## Interfaces"] lines += [""] - for name, shape in self.interfaces.items(): + shape_names = sorted(self.interfaces.keys()) + for name in shape_names: + shape = self.interfaces[name] lines += add_section(name, shape, render_cfg) if self.sketches and not "sketches" in exclude: lines += ["## Sketches"] lines += [""] - for name, shape in self.sketches.items(): + shape_names = sorted(self.sketches.keys()) + for name in shape_names: + shape = self.sketches[name] lines += add_section(name, shape, render_cfg) lines += [