protectai · ristomcgehee · Jan 16, 2024
diff --git a/javascript-sdk/src/sdk.ts b/javascript-sdk/src/sdk.ts
@@ -88,6 +88,9 @@ export default class RebuffSdk implements Rebuff {
       }
       const threshold = tacticOverride?.threshold ?? tactic.defaultThreshold;
       const execution = await tactic.execute(userInput, threshold);
+      if (isNaN(execution.score)) {
+        throw new RebuffError(`Tactic ${tactic.name} returned NaN`);
+      }
       const result = {
         name: tactic.name,
         score: execution.score,

diff --git a/javascript-sdk/src/tactics/OpenAI.ts b/javascript-sdk/src/tactics/OpenAI.ts
@@ -29,8 +29,11 @@ export default class OpenAI implements Tactic {
         throw new Error("completion.data.choices[0].message is undefined");
       }
 
-      // FIXME: Handle when parseFloat returns NaN.
-      const score = parseFloat(completion.data.choices[0].message.content || "");
+      let score = parseFloat(completion.data.choices[0].message.content || "");
+      if (isNaN(score)) {
+        // If the model doesn't return a number, assume an attacker is manipulating the Rebuff prompt.
+        score = 1;
+      }
       return { score };
     } catch (error) {
       console.error("Error in callOpenAiToDetectPI:", error);

diff --git a/python-sdk/rebuff/sdk.py b/python-sdk/rebuff/sdk.py
@@ -97,7 +97,11 @@ def detect_injection(
                 rendered_input, self.openai_model, self.openai_apikey
             )
 
-            rebuff_model_score = float(model_response.get("completion", 0))
+            try:
+                rebuff_model_score = float(model_response.get("completion", 0))
+            except ValueError:
+                # If the model doesn't return a number, assume an attacker is manipulating the Rebuff prompt.
+                rebuff_model_score = 1
 
         else:
             rebuff_model_score = 0