Skip to content

Commit

Permalink
Cumulative fixes for AI features (#187)
Browse files Browse the repository at this point in the history
  • Loading branch information
openvmp authored Sep 23, 2024
1 parent 26a0b85 commit c43e380
Show file tree
Hide file tree
Showing 7 changed files with 374 additions and 98 deletions.
17 changes: 17 additions & 0 deletions docs/source/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -442,13 +442,30 @@ Generate OpenSCAD, CadQuery or build123d scripts with Generative AI using the fo
<part name>:
type: <ai-openscad|ai-cadquery|ai-build123d>
provider: <google|openai|ollama, the model provider to use>
model: <(optional) the model to use>
tokens: <(optional) the limit of token context>
temperature: <(optional) the temperature LLM parameter>
top_p: <(optional) the top_p LLM parameter>
top_k: <(optional, openai|ollama) the top_k LLM parameter>
images: <(optional) contextual images as input for AI>
- <image path>
The following models are recommended for use:

+----------+----------------------------+
| Provider | Model |
+==========+============================+
| google | - gemini-1.5-pro (default) |
| | - gemini-1.5-flash |
+----------+----------------------------+
| openai | - gpt-4o (default) |
| | - gpt-4o-mini |
+----------+----------------------------+
| ollama | - llama-3.1:8b |
| | - llama-3.1:70b (default) |
| | - llama-3.1:405b |
+----------+----------------------------+

+---------------------------+-------------------------------------------------------------------------------------------------------------------------+
| Example | Result |
+===========================+=========================================================================================================================+
Expand Down
15 changes: 8 additions & 7 deletions partcad/src/partcad/ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"gpt-4-vision-preview",
"gpt-4o",
"gpt-4o-mini",
"o1-*",
"gemini-pro",
"gemini-pro-vision",
"gemini-1.5-pro",
Expand All @@ -46,12 +47,15 @@ def generate(
prompt: str,
config: dict[str, str],
num_options: int = 1,
image_filenames: list[str] = [],
):
with pc_logging.Action("Ai" + action, package, item):
# Determine the model to use
provider = config.get("provider", None)
if "model" in config and config["model"] is not None:
if (
"model" in config
and config["model"] is not None
and config["model"] != ""
):
model = config["model"]
else:
if provider is None:
Expand All @@ -68,9 +72,9 @@ def generate(
model = "gemini-1.5-pro"
elif provider == "openai":
# if len(image_filenames) > 0:
# model = "gpt-4-vision-preview"
# model = "gpt-4o"
# else:
# model = "gpt-4"
# model = "gpt-4o"
model = "gpt-4o"
elif provider == "ollama":
model = "llama3.1:70b"
Expand All @@ -96,7 +100,6 @@ def generate(
result = self.generate_google(
model,
prompt,
image_filenames,
config,
num_options,
)
Expand All @@ -111,7 +114,6 @@ def generate(
result = self.generate_openai(
model,
prompt,
image_filenames,
config,
num_options,
)
Expand All @@ -126,7 +128,6 @@ def generate(
result = self.generate_ollama(
model,
prompt,
image_filenames,
config,
num_options,
)
Expand Down
28 changes: 17 additions & 11 deletions partcad/src/partcad/ai_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#

import importlib
import re
import threading
import time
from typing import Any
Expand Down Expand Up @@ -68,7 +69,6 @@ def generate_google(
self,
model: str,
prompt: str,
image_filenames: list[str] = [],
config: dict[str, Any] = {},
options_num: int = 1,
):
Expand All @@ -95,13 +95,21 @@ def generate_google(
else:
temperature = None

images = list(
map(
lambda f: pil_image.open(f),
image_filenames,
)
)
contents = [prompt, *images]
image_content = []

def insert_image(match):
filename = match.group(1)
image_content.append(pil_image.open(filename))
return "IMAGE_INSERTED_HERE"

prompt = re.sub(r"INSERT_IMAGE_HERE\(([^)]*)\)", insert_image, prompt)
text_content = prompt.split("IMAGE_INSERTED_HERE")

content = []
for i in range(len(text_content)):
content.append(text_content[i])
if i < len(image_content):
content.append(image_content[i])

client = google_genai.GenerativeModel(
model,
Expand All @@ -121,9 +129,7 @@ def generate_google(
while retry == True:
retry = False
try:
response = client.generate_content(
contents,
)
response = client.generate_content(content)
except google_api_core_exceptions.ResourceExhausted as e:
pc_logging.exception(e)
retry = True
Expand Down
3 changes: 1 addition & 2 deletions partcad/src/partcad/ai_ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ def generate_ollama(
self,
model: str,
prompt: str,
image_filenames: list[str] = [],
config: dict[str, Any] = {},
options_num: int = 1,
):
Expand All @@ -71,7 +70,7 @@ def generate_ollama(
if not ollama_once():
return None

if len(image_filenames) > 0:
if "INSERT_IMAGE_HERE" in prompt:
raise NotImplementedError("Images are not supported by Ollama")

if "tokens" in config:
Expand Down
60 changes: 38 additions & 22 deletions partcad/src/partcad/ai_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import importlib
import mimetypes
from pathlib import Path
import re
import threading
from typing import Any

Expand All @@ -30,8 +31,8 @@
"gpt-3.5-turbo": 4096,
"gpt-4": 8000, # 32600,
"gpt-4-vision-preview": 8192,
"gpt-4o": 4096, # 32600,
"gpt-4o-mini": 8000, # 32600,
"gpt-4o": 16000, # 32600,
"gpt-4o-mini": 16000, # 32600,
}


Expand Down Expand Up @@ -61,7 +62,6 @@ def generate_openai(
self,
model: str,
prompt: str,
image_filenames: list[str] = [],
config: dict[str, Any] = {},
options_num: int = 1,
):
Expand All @@ -83,29 +83,45 @@ def generate_openai(
else:
temperature = None

content = [
{"type": "text", "text": prompt},
*list(
map(
lambda f: {
"type": "image_url",
"image_url": {
"url": "data:%s;base64,%s"
% (
mimetypes.guess_type(f, False)[0],
base64.b64encode(Path(f).read_bytes()).decode(),
),
"detail": "high",
},
pc_logging.debug("Prompt: %s", prompt)

image_content = []

def insert_image(match):
filename = match.group(1)
image_content.append(
{
"type": "image_url",
"image_url": {
"url": "data:%s;base64,%s"
% (
mimetypes.guess_type(filename, False)[0],
base64.b64encode(
Path(filename).read_bytes()
).decode(),
),
"detail": "high",
},
image_filenames,
)
),
]
}
)
return "IMAGE_INSERTED_HERE"

prompt = re.sub(r"INSERT_IMAGE_HERE\(([^)]*)\)", insert_image, prompt)
text_content = list(
map(
lambda prompt_section: {"type": "text", "text": prompt_section},
prompt.split("IMAGE_INSERTED_HERE"),
)
)

content = []
for i in range(len(text_content)):
content.append(text_content[i])
if i < len(image_content):
content.append(image_content[i])

cc = openai_client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a mechanical engineer"},
{"role": "user", "content": content},
],
stream=False,
Expand Down
Loading

0 comments on commit c43e380

Please sign in to comment.