Improve synthetic data gen to follow human guidance for output genera…

…tion, not just topic+input generation. Add Dolphin, an uncensored model, to help with generating datasets for toxicity and bias.
Kiln-AI · Feb 28, 2025 · 7f19ffe · 7f19ffe
1 parent cfb732c
commit 7f19ffe
Show file tree

Hide file tree

Showing 6 changed files with 79 additions and 3 deletions.
diff --git a/app/desktop/studio_server/data_gen_api.py b/app/desktop/studio_server/data_gen_api.py
@@ -5,6 +5,7 @@
     DataGenCategoriesTaskInput,
     DataGenSampleTask,
     DataGenSampleTaskInput,
+    wrap_task_with_guidance,
 )
 from kiln_ai.datamodel import DataSource, DataSourceType, PromptId, TaskRun
 from kiln_server.run_api import model_provider_from_string
@@ -62,6 +63,10 @@ class DataGenSaveSamplesApiInput(BaseModel):
     prompt_method: PromptId = Field(
         description="The prompt method used to generate the output"
     )
+    human_guidance: str | None = Field(
+        description="Optional human guidance for generation",
+        default=None,
+    )
 
 
 def connect_data_gen_api(app: FastAPI):
@@ -121,6 +126,12 @@ async def save_sample(
     ) -> TaskRun:
         task = task_from_id(project_id, task_id)
 
+        # Wrap the task instuctions with human guidance, if provided
+        if sample.human_guidance is not None and sample.human_guidance.strip() != "":
+            task.instruction = wrap_task_with_guidance(
+                task.instruction, sample.human_guidance
+            )
+
         tags = ["synthetic"]
         if session_id:
             tags.append(f"synthetic_session_{session_id}")

diff --git a/app/web_ui/src/lib/api_schema.d.ts b/app/web_ui/src/lib/api_schema.d.ts
@@ -1184,6 +1184,11 @@ export interface components {
              * @description The prompt method used to generate the output
              */
             prompt_method: string;
+            /**
+             * Human Guidance
+             * @description Optional human guidance for generation
+             */
+            human_guidance?: string | null;
         };
         /**
          * DataSource

diff --git a/app/web_ui/src/routes/(app)/evals/[project_id]/[task_id]/[eval_id]/+page.svelte b/app/web_ui/src/routes/(app)/evals/[project_id]/[task_id]/[eval_id]/+page.svelte
@@ -500,12 +500,12 @@
             <div class="text-xl font-bold">Compare Run Methods</div>
 
             <div class="text-xs text-gray-500">
-              Compare to find the best method of running your task (various
-              prompts, models, fine-tunes, etc).
+              Find the best method of running your task including various
+              prompts, models, fine-tunes, and more.
             </div>
             <div class="text-xs text-gray-500 pt-2">
               Scores are generated by running the 'run method' on each item of
-              your Eval Dataset, generatring task outputs, then evaluating those
+              your eval dataset, generating task outputs, then evaluating those
               outputs with the selected evaluation method{current_eval_config
                 ? ` (${current_eval_config.name})`
                 : ""}.

diff --git a/app/web_ui/src/routes/(app)/generate/[project_id]/[task_id]/+page.svelte b/app/web_ui/src/routes/(app)/generate/[project_id]/[task_id]/+page.svelte
@@ -15,6 +15,7 @@
   import FormContainer from "$lib/utils/form_container.svelte"
   import { type SampleData } from "./gen_model"
   import FormElement from "$lib/utils/form_element.svelte"
+  import Warning from "$lib/ui/warning.svelte"
 
   let session_id = Math.floor(Math.random() * 1000000000000).toString()
 
@@ -284,6 +285,10 @@
       const formatted_input = task?.input_json_schema
         ? JSON.parse(sample.input)
         : sample.input
+      const save_sample_guidance =
+        guidance_enabled && human_guidance.length > 0
+          ? human_guidance
+          : undefined
       const {
         error: post_error,
         data,
@@ -308,6 +313,7 @@
             output_provider: provider,
             prompt_method,
             topic_path: topic_path || [],
+            human_guidance: save_sample_guidance,
           },
         },
       )
@@ -485,6 +491,18 @@
             {/if}
           </div>
         </div>
+        {#if guidance_enabled && human_guidance.length > 0}
+          {#if prompt_method.includes("::")}
+            <Warning
+              warning_message="Human guidance is enabled, but you've selected a custom prompt with a fixed string. Human guidance will not be applied."
+            />
+          {:else}
+            <Warning
+              warning_message="Human guidance is enabled. Your guidance will be passed to the model and used to influence output."
+              warning_color="warning"
+            />
+          {/if}
+        {/if}
         <AvailableModelsDropdown
           requires_structured_output={task?.output_json_schema ? true : false}
           bind:model

diff --git a/libs/core/kiln_ai/adapters/data_gen/data_gen_task.py b/libs/core/kiln_ai/adapters/data_gen/data_gen_task.py
@@ -183,3 +183,21 @@ def __init__(self, target_task: Task, num_samples: int = 8):
             input_json_schema=json.dumps(DataGenSampleTaskInput.model_json_schema()),
             output_json_schema=list_json_schema_for_task(target_task),
         )
+
+
+def wrap_task_with_guidance(original_instruction: str, guidance: str) -> str:
+    """Wrap the original instruction with human guidance.
+
+    Args:
+        original_instruction: The original instruction to wrap
+        guidance: The human guidance to wrap the instruction with
+    """
+    return f"""{original_instruction}
+
+# Special Instructions
+
+The above instructions are the original instructions for this task. For this execution, we've been given additional instructions. Follow both, but prioritize the additional instructions when they conflict. The additional instructions are:
+<additional_instructions>
+{guidance}
+</additional_instructions>
+"""
diff --git a/libs/core/kiln_ai/adapters/ml_model_list.py b/libs/core/kiln_ai/adapters/ml_model_list.py
@@ -43,6 +43,7 @@ class ModelFamily(str, Enum):
     mixtral = "mixtral"
     qwen = "qwen"
     deepseek = "deepseek"
+    dolphin = "dolphin"
 
 
 # Where models have instruct and raw versions, instruct is default and raw is specified
@@ -88,6 +89,7 @@ class ModelName(str, Enum):
     deepseek_r1_distill_qwen_1p5b = "deepseek_r1_distill_qwen_1p5b"
     deepseek_r1_distill_qwen_7b = "deepseek_r1_distill_qwen_7b"
     deepseek_r1_distill_llama_8b = "deepseek_r1_distill_llama_8b"
+    dolphin_2_9_8x22b = "dolphin_2_9_8x22b"
 
 
 class ModelParserID(str, Enum):
@@ -962,4 +964,26 @@ class KilnModel(BaseModel):
             ),
         ],
     ),
+    # Dolphin 2.9 Mixtral 8x22B
+    KilnModel(
+        family=ModelFamily.dolphin,
+        name=ModelName.dolphin_2_9_8x22b,
+        friendly_name="Dolphin 2.9 8x22B",
+        providers=[
+            KilnModelProvider(
+                name=ModelProviderName.ollama,
+                structured_output_mode=StructuredOutputMode.json_schema,
+                supports_data_gen=True,
+                provider_options={"model": "dolphin-mixtral:8x22b"},
+            ),
+            KilnModelProvider(
+                name=ModelProviderName.openrouter,
+                provider_options={
+                    "model": "cognitivecomputations/dolphin-mixtral-8x22b"
+                },
+                supports_data_gen=True,
+                structured_output_mode=StructuredOutputMode.json_instruction_and_object,
+            ),
+        ],
+    ),
 ]