diff --git a/javascript-sdk/src/sdk.ts b/javascript-sdk/src/sdk.ts index ba407a5..2afbec7 100644 --- a/javascript-sdk/src/sdk.ts +++ b/javascript-sdk/src/sdk.ts @@ -88,6 +88,9 @@ export default class RebuffSdk implements Rebuff { } const threshold = tacticOverride?.threshold ?? tactic.defaultThreshold; const execution = await tactic.execute(userInput, threshold); + if (isNaN(execution.score)) { + throw new RebuffError(`Tactic ${tactic.name} returned NaN`); + } const result = { name: tactic.name, score: execution.score, diff --git a/javascript-sdk/src/tactics/OpenAI.ts b/javascript-sdk/src/tactics/OpenAI.ts index 7e473a3..e781727 100644 --- a/javascript-sdk/src/tactics/OpenAI.ts +++ b/javascript-sdk/src/tactics/OpenAI.ts @@ -29,8 +29,11 @@ export default class OpenAI implements Tactic { throw new Error("completion.data.choices[0].message is undefined"); } - // FIXME: Handle when parseFloat returns NaN. - const score = parseFloat(completion.data.choices[0].message.content || ""); + let score = parseFloat(completion.data.choices[0].message.content || ""); + if (isNaN(score)) { + // If the model doesn't return a number, assume an attacker is manipulating the Rebuff prompt. + score = 1; + } return { score }; } catch (error) { console.error("Error in callOpenAiToDetectPI:", error); diff --git a/python-sdk/rebuff/sdk.py b/python-sdk/rebuff/sdk.py index 2e0ba93..398892a 100644 --- a/python-sdk/rebuff/sdk.py +++ b/python-sdk/rebuff/sdk.py @@ -97,7 +97,11 @@ def detect_injection( rendered_input, self.openai_model, self.openai_apikey ) - rebuff_model_score = float(model_response.get("completion", 0)) + try: + rebuff_model_score = float(model_response.get("completion", 0)) + except ValueError: + # If the model doesn't return a number, assume an attacker is manipulating the Rebuff prompt. + rebuff_model_score = 1 else: rebuff_model_score = 0