diff --git a/.vscode/settings.json b/.vscode/settings.json index 87f537da..7b48ec4c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,6 +12,7 @@ "jsonv", "Karnali", "Kita", + "Kurunegala", "kwargs", "landuse", "langchain", @@ -20,6 +21,7 @@ "lightyellow", "llms", "Mandera", + "México", "NDRRMA", "NEOC", "OPENAI", diff --git a/src/app/api/ai/deep/route.ts b/src/app/api/ai/deep/route.ts index cd1fbaec..b1c08e91 100644 --- a/src/app/api/ai/deep/route.ts +++ b/src/app/api/ai/deep/route.ts @@ -1,14 +1,33 @@ import { NextResponse } from "next/server"; import { OpenAI, OpenAIChat } from "langchain/llms/openai"; import { loadTridentDeepChain } from "@/utils/langchain/chains/deep"; +import { OpenAIEmbeddings } from "langchain/embeddings/openai"; export async function POST(request: Request) { const res = await request.json(); const query = res.query; - const model = new OpenAIChat({ temperature: 0 }); - const chain = loadTridentDeepChain({ llm: model }); - const result = await chain.call({ text: query }); + let embeddings: OpenAIEmbeddings; + let llm: OpenAIChat; + if (process.env.CLOUDFLARE_AI_GATEWAY) { + embeddings = new OpenAIEmbeddings({ + configuration: { + baseURL: process.env.CLOUDFLARE_AI_GATEWAY + "/openai", + }, + }); + llm = new OpenAIChat({ + configuration: { + baseURL: process.env.CLOUDFLARE_AI_GATEWAY + "/openai", + }, + temperature: 0, + }); + } else { + embeddings = new OpenAIEmbeddings(); + llm = new OpenAIChat({ temperature: 0 }); + } + + const chain = await loadTridentDeepChain({ embeddings, llm }); + const result = await chain.call({ input: query }); console.log("----- ----- -----"); console.log("----- start deep -----"); diff --git a/src/app/api/ai/inner/route.ts b/src/app/api/ai/inner/route.ts index 68bc35d3..8ecf6d7a 100644 --- a/src/app/api/ai/inner/route.ts +++ b/src/app/api/ai/inner/route.ts @@ -47,7 +47,6 @@ export async function POST(request: Request) { let embeddings: OpenAIEmbeddings; let llm: OpenAIChat; - if (process.env.CLOUDFLARE_AI_GATEWAY) { embeddings = new OpenAIEmbeddings({ configuration: { diff --git a/src/utils/langchain/chains/deep/index.ts b/src/utils/langchain/chains/deep/index.ts index 205eaba8..348b7e3f 100644 --- a/src/utils/langchain/chains/deep/index.ts +++ b/src/utils/langchain/chains/deep/index.ts @@ -1,15 +1,19 @@ import { LLMChain } from "langchain/chains"; -import { TRIDENT_DEEP_PROMPT } from "./prompt"; +import { loadTridentDeepPrompt } from "./prompt"; import { BaseLanguageModel } from "langchain/dist/base_language"; +import { Embeddings } from "langchain/embeddings/base"; -export const loadTridentDeepChain = ({ +export const loadTridentDeepChain = async ({ + embeddings, llm, }: { + embeddings: Embeddings; llm: BaseLanguageModel; -}): LLMChain => { +}): Promise => { + const prompt = await loadTridentDeepPrompt(embeddings); const chain = new LLMChain({ llm: llm, - prompt: TRIDENT_DEEP_PROMPT, + prompt: prompt, }); return chain; }; diff --git a/src/utils/langchain/chains/deep/prompt.ts b/src/utils/langchain/chains/deep/prompt.ts index 6ec0406d..b333d5fb 100644 --- a/src/utils/langchain/chains/deep/prompt.ts +++ b/src/utils/langchain/chains/deep/prompt.ts @@ -1,231 +1,138 @@ -import { PromptTemplate } from "langchain/prompts"; - -const tridentDeepExamples = ` -`; - -const tridentDeepHints = ` -Embassies: nwr["office"="diplomatic"] -Hotels: nwr["tourism"="hotel"] -Church: nwr["building"="church"] -Mosque: nwr["building"="mosque"] -Shrine: nwr["amenity"="place_of_worship"]["religion"="shinto"] -Temples: nwr["amenity"="place_of_worship"]["religion"="buddhist"] -Important note: Never use "religion"="buddhism". It is wrong. Use "religion"="buddhist" instead. -Izakaya: nwr["amenity"="bar"] -Company: nwr["office"="company"] -Factories: nwr["landuse"="industrial"] -Important note: Never use "landuse"="factory". It is wrong. Use "landuse"="industrial" instead. -National treasure castles: nwr["historic"="castle"]["heritage"] -Pizza shops: nwr["amenity"="fast_food"]["cuisine"="pizza"] -Important note: Pizza shops are fast food, not restaurants! -Domino's Pizza: nwr["name"~"Domino"]["cuisine"="pizza"] -Seven-Eleven: nwr["name"~"7-Eleven"] -Soba noodle shops: nwr["amenity"="restaurant"]["cuisine"="soba"] -Ramen shops: nwr["amenity"="restaurant"]["cuisine"="ramen"] -Western-style confectionery stores: nwr["shop"="confectionery"] -`; -const bboxQueryExample = ` -Input text: -BoundingBoxWithConcern: bbox[[35.7062,139.7596,35.7235,139.7853]], parks -Output: -\`\`\` -[out:json][timeout:30000]; -nwr["leisure"="park"](35.7062,139.7596,35.7235,139.7853); -out geom; -\`\`\` -`; -export const TRIDENT_DEEP_PROMPT = new PromptTemplate({ - template: `You are an expert OpenStreetMap and Overpass API. You output the best Overpass API query based on input text. - -You will always reply according to the following rules: -- Output valid Overpass API query. -- The query timeout MUST be 30000. -- The query will utilize a area specifier as needed. -- The query will search nwr as needed. -- The query MUST be out geom. -- The query MUST be enclosed by three backticks on new lines, denoting that it is a code block. - -Examples: -=== -Input text: -Area: Sudan -Output: -\`\`\` -[out:json][timeout:30000]; +import { MemoryVectorStore } from "langchain/vectorstores/memory"; +import { + SemanticSimilarityExampleSelector, + PromptTemplate, + FewShotPromptTemplate, +} from "langchain/prompts"; +import { Embeddings } from "langchain/embeddings/base"; + +export const tridentDeepExampleList: Array<{ + input: string; + output: string; +}> = [ + { + input: "Area: Sudan", + output: `[out:json][timeout:30000]; relation["boundary"="administrative"]["name"="Sudan"]; -out geom; -\`\`\` - -Input text: -Area: Lebanon -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "Area: Lebanon", + output: `[out:json][timeout:30000]; relation["boundary"="administrative"]["name"="Lebanon"]; -out geom; -\`\`\` - -Input text: -Area: New York City -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "Area: New York City", + output: `[out:json][timeout:30000]; relation["boundary"="administrative"]["name"="City of New York"]; -out geom; -\`\`\` - -Input text: -Area: Tokyo -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "Area: Tokyo", + output: `[out:json][timeout:30000]; relation["boundary"="administrative"]["name"="Tokyo"]; -out geom; -\`\`\` - -Input text: -Area: Taito, Tokyo -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "Area: Taito, Tokyo", + output: `[out:json][timeout:30000]; area["name"="Tokyo"]->.searchArea; ( relation["boundary"="administrative"]["name"="Taito"](area.searchArea); ); -out geom; -\`\`\` - -Input text: -Area: Kita, Tokyo -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "Area: Kita, Tokyo", + output: `[out:json][timeout:30000]; area["name"="Tokyo"]->.searchArea; ( relation["boundary"="administrative"]["name"="Kita"](area.searchArea); ); -out geom; -\`\`\` - -Input text: -Area: Urayasu, Chiba -Output: -\`\`\` -[out:json][timeout:30000]; -area["name"="Chiba Prefecture"]->.searchArea; -( - relation["boundary"="administrative"]["name"="Urayasu"](area.searchArea); -); -out geom; -\`\`\` - -Input text: -Area: Prizren, Kosovo -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "Area: Prizren, Kosovo", + output: `[out:json][timeout:30000]; area["name"="Kosovo"]->.searchArea; ( relation["boundary"="administrative"]["name"="Municipality of Prizren"](area.searchArea); ); -out geom; -\`\`\` - -Input text: -Area: Mandera County, Kenya -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "Area: Mandera County, Kenya", + output: `[out:json][timeout:30000]; area["name"="Kenya"]->.searchArea; ( relation["boundary"="administrative"]["name"="Mandera County"](area.searchArea); ); -out geom; -\`\`\` - -Input text: -Western Province, Sri Lanka -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "Area: Western Province, Sri Lanka", + output: `[out:json][timeout:30000]; area["name"="Sri Lanka"]->.searchArea; ( relation["boundary"="administrative"]["name"="Western Province"](area.searchArea); ); -out geom; -\`\`\` - -Input text: -Area: Jajarkot, Karnali Province, Nepal -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "Area: Jajarkot, Karnali Province, Nepal", + output: `[out:json][timeout:30000]; area["name"="Nepal"]->.outer; area["name"="Karnali Province"]->.inner; ( relation["boundary"="administrative"]["name"="Jajarkot"](area.inner)(area.outer); ); -out geom; -\`\`\` - -Input text: -Area: Rukum District, Karnali Province, Nepal -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "Area: Rukum District, Karnali Province, Nepal", + output: `[out:json][timeout:30000]; area["name:en"="Nepal"]->.outer; area["name:en"="Karnali Province"]->.inner; ( relation["boundary"="administrative"]["name:en"="Western Rukum District"](area.inner)(area.outer); ); -out geom; -\`\`\` - -Input text: -Area: Acapulco, Guerrero State, Mexico -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "Area: Acapulco, Guerrero State, Mexico", + output: `[out:json][timeout:30000]; area["name"="México"]->.outer; area["name"="Guerrero"]->.inner; ( relation["boundary"="administrative"]["name"="Acapulco de Juárez"](area.inner)(area.outer); ); -out geom; -\`\`\` - -Input text: -Somali Region, Ethiopia, Genale River -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "Area: Somali Region, Ethiopia, Genale River", + output: `[out:json][timeout:30000]; area["name"="Ethiopia"]->.outer; area["name"="Somali Region"]->.inner; ( nwr["name"="Genale river"](area.inner)(area.outer); ); -out geom; -\`\`\` - -Input text: -Somali Region, Ethiopia, Shelters -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "AreaWithConcern: Somali Region, Ethiopia, Shelters", + output: `[out:json][timeout:30000]; area["name"="Ethiopia"]->.outer; area["name"="Somali Region"]->.inner; ( nwr["amenity"="shelter"](area.inner)(area.outer); nwr["amenity"="refugee_site"](area.inner)(area.outer); ); -out geom; -\`\`\` - -Input text: -Jajarkot, Karnali Province, Nepal, Shelters -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "AreaWithConcern: Jajarkot, Karnali Province, Nepal, Shelters", + output: `[out:json][timeout:30000]; area["name"="Nepal"]->.outer; area["name"="Karnali Province"]->.inner; area["name"="Jajarkot"]->.inner2; @@ -233,142 +140,272 @@ area["name"="Jajarkot"]->.inner2; nwr["amenity"="shelter"](area.inner2)(area.inner)(area.outer); nwr["amenity"="refugee_site"](area.inner2)(area.inner)(area.outer); ); -out geom; -\`\`\` - - -Input text: -AreaWithConcern: Sudan, Hospitals -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "AreaWithConcern: Sudan, Hospitals", + output: `[out:json][timeout:30000]; area["name"="Sudan"]->.searchArea; ( nwr["amenity"="hospital"](area.searchArea); ); -out geom; -\`\`\` - -Input text: -AreaWithConcern: Sudan, Shelters -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "AreaWithConcern: Sudan, Shelters", + output: `[out:json][timeout:30000]; area["name"="Sudan"]->.searchArea; ( nwr["amenity"="shelter"](area.searchArea); nwr["amenity"="refugee_site"](area.searchArea); ); -out geom; -\`\`\` - -Input text: -AreaWithConcern: Kurunegala District, Sri Lanka, Hospitals -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "AreaWithConcern: Kurunegala District, Sri Lanka, Hospitals", + output: `[out:json][timeout:30000]; area["name"="Sri Lanka"]->.outer; area["name"="Kurunegala District"]->.inner; ( nwr["amenity"="hospital"](area.inner)(area.outer); ); -out geom; -\`\`\` - -Input text: -AreaWithConcern: Urayasu, Chiba, Hospitals -Output: -\`\`\` -[out:json][timeout:30000]; -area["name"="Chiba Prefecture"]->.outer; -area["name"="Urayasu"]->.inner; -( - nwr["amenity"="hospital"](area.inner)(area.outer); -); -out geom; -\`\`\` - -Input text: -AreaWithConcern: Taito, Tokyo, Hotels -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "AreaWithConcern: Taito, Tokyo, Hotels", + output: `[out:json][timeout:30000]; area["name"="Tokyo"]->.outer; area["name"="Taito"]->.inner; ( nwr["tourism"="hotel"](area.inner)(area.outer); ); -out geom; -\`\`\` - -Input text: -AreaWithConcern: Tokyo, Tokyo University campuses -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "AreaWithConcern: Tokyo, Tokyo, University campuses", + output: `[out:json][timeout:30000]; area["name"="Tokyo"]->.searchArea; ( nwr["name"~"University of Tokyo"]["amenity"="university"](area.searchArea); ); -out geom; -\`\`\` - -Input text: -AreaWithConcern: Juba, South Sudan, Military facilities -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "AreaWithConcern: Juba, South Sudan, Military facilities", + output: `[out:json][timeout:30000]; area["name"="South Sudan"]->.outer; area["name"="Juba"]->.inner; ( nwr["landuse"="military"](area.inner)(area.outer); ); -out geom; -\`\`\` - -Input text: -AreaWithConcern: New York City, UN facilities -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "AreaWithConcern: New York City, UN facilities", + output: `[out:json][timeout:30000]; area["name"="City of New York"]->.searchArea; ( nwr["name"~"United Nations"]["building"="yes"](area.searchArea); nwr["name"~"United Nations"]["building:part"="yes"](area.searchArea); ); -out geom; -\`\`\` - -Input text: -AreaWithConcern: Gaza Strip, UN facilities -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "AreaWithConcern: Gaza Strip, UN facilities", + output: `[out:json][timeout:30000]; area["name"="Gaza Strip"]->.searchArea; ( nwr["name"~"UN"](area.searchArea); nwr["name"~"UN"](area.searchArea); ); -out geom; -\`\`\` - -Input text: -AreaWithConcern: Prizren, Bars -Output: -\`\`\` -[out:json][timeout:30000]; +out geom;`, + }, + { + input: "AreaWithConcern: Prizren, Kosovo, Bars", + output: `[out:json][timeout:30000]; area["name"="Municipality of Prizren"]->.searchArea; ( nwr["amenity"="bar"](area.searchArea); ); -out geom; +out geom;`, + }, + { + input: "AreaWithConcern: Kosovo, Embassies", + output: `[out:json][timeout:30000]; +area["name"="Kosovo"]->.searchArea; +( + nwr["office"="diplomatic"](area.searchArea); +); +out geom;`, + }, + { + input: "AreaWithConcern: Taito, Tokyo, Ramen shops", + output: `[out:json][timeout:30000]; +area["name"="Tokyo"]->.outer; +area["name"="Taito"]->.inner; +( + nwr["amenity"="restaurant"]["cuisine"="ramen"](area.inner)(area.outer); +); +out geom;`, + }, + { + input: "AreaWithConcern: Taito, Tokyo, Soba noodle shops", + output: `[out:json][timeout:30000]; +area["name"="Tokyo"]->.outer; +area["name"="Taito"]->.inner; +( + nwr["amenity"="restaurant"]["cuisine"="soba"](area.inner)(area.outer); +); +out geom;`, + }, + { + input: "AreaWithConcern: Taito, Tokyo, Pizza shops", + output: `[out:json][timeout:30000]; +area["name"="Tokyo"]->.outer; +area["name"="Taito"]->.inner; +( + nwr["amenity"="fast_food"]["cuisine"="pizza"](area.inner)(area.outer); +); +out geom;`, + }, + { + input: "AreaWithConcern: Taito, Tokyo, Sushi shops", + output: `[out:json][timeout:30000]; +area["name"="Tokyo"]->.outer; +area["name"="Taito"]->.inner; +( + nwr["amenity"="fast_food"]["cuisine"="sushi"](area.inner)(area.outer); +); +out geom;`, + }, + { + input: "AreaWithConcern: Taito, Tokyo, Izakaya", + output: `[out:json][timeout:30000]; +area["name"="Tokyo"]->.outer; +area["name"="Taito"]->.inner; +( + nwr["amenity"="bar"](area.inner)(area.outer); +); +out geom;`, + }, + { + input: "AreaWithConcern: Taito, Tokyo, Western-style confectionery stores", + output: `[out:json][timeout:30000]; +area["name"="Tokyo"]->.outer; +area["name"="Taito"]->.inner; +( + nwr["shop"="confectionery"](area.inner)(area.outer); +); +out geom;`, + }, + { + input: "AreaWithConcern: Taito, Tokyo, Seven-Eleven", + output: `[out:json][timeout:30000]; +area["name"="Tokyo"]->.outer; +area["name"="Taito"]->.inner; +( + nwr["name"~"7-Eleven"](area.inner)(area.outer); +); +out geom;`, + }, + { + input: "AreaWithConcern: Taito, Tokyo, Company", + output: `[out:json][timeout:30000]; +area["name"="Tokyo"]->.outer; +area["name"="Taito"]->.inner; +( + nwr["office"="company"](area.inner)(area.outer); +); +out geom;`, + }, + { + input: "AreaWithConcern: Taito, Tokyo, Factories", + output: `[out:json][timeout:30000]; +area["name"="Tokyo"]->.outer; +area["name"="Taito"]->.inner; +( + nwr["landuse"="industrial"](area.inner)(area.outer); +); +out geom;`, + }, +]; + +const tridentDeepHints = ` +Church: nwr["building"="church"] +Mosque: nwr["building"="mosque"] +Shrine: nwr["amenity"="place_of_worship"]["religion"="shinto"] +Temples: nwr["amenity"="place_of_worship"]["religion"="buddhist"] +Important note: Never use "religion"="buddhism". It is wrong. Use "religion"="buddhist" instead. + +Factories: nwr["landuse"="industrial"] +Important note: Never use "landuse"="factory". It is wrong. Use "landuse"="industrial" instead. + +Izakaya: nwr["amenity"="bar"] +Important note: Izakaya is just a bar. there is no special tag for Izakaya. + +Pizza shops: nwr["amenity"="fast_food"]["cuisine"="pizza"] +Important note: Pizza shops are fast food, not restaurants! + +Sushi shops: nwr["amenity"="fast_food"]["cuisine"="sushi"] +Important note: Sushi shops are fast food, not restaurants! + +Domino's Pizza: nwr["name"~"Domino"]["cuisine"="pizza"] + +National treasure castles: nwr["historic"="castle"]["heritage"] +`; + +const tridentDeepPromptPrefix = `You are an expert OpenStreetMap and Overpass API. You output the best Overpass API query based on input text. + +You will always reply according to the following rules: +- Output valid Overpass API query. +- The query timeout MUST be 30000. +- The query will utilize a area specifier as needed. +- The query will search nwr as needed. +- The query MUST be out geom. +- The query MUST be enclosed by three backticks on new lines, denoting that it is a code block. + +### Examples: ### +`; + +export const loadTridentDeepPrompt = async (embeddings: Embeddings) => { + const memoryVectorStore = new MemoryVectorStore(embeddings); + const exampleSelector = new SemanticSimilarityExampleSelector({ + vectorStore: memoryVectorStore, + k: 3, + inputKeys: ["input"], + }); + const examplePrompt = PromptTemplate.fromTemplate( + `Input: +{input} + +Output: +\`\`\` +{output} \`\`\` +` + ); + + for (const example of tridentDeepExampleList) { + await exampleSelector.addExample(example); + } + + const dynamicPrompt = new FewShotPromptTemplate({ + exampleSelector: exampleSelector, + examplePrompt: examplePrompt, + prefix: tridentDeepPromptPrefix, + suffix: ` === Useful hints:${tridentDeepHints} -Input text: -{text} -Output:`, - inputVariables: ["text"], -}); +=== + +Input: +{input} + +Output: +`, + inputVariables: ["input"], + }); + return dynamicPrompt; +};