[Aid24]
diff --git a/tamingllms/_build/html/searchindex.js b/tamingllms/_build/html/searchindex.js
index 98d6dd9..3a0623f 100644
--- a/tamingllms/_build/html/searchindex.js
+++ b/tamingllms/_build/html/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["markdown/intro", "markdown/toc", "notebooks/evals", "notebooks/output_size_limit", "notebooks/structured_output"], "filenames": ["markdown/intro.md", "markdown/toc.md", "notebooks/evals.ipynb", "notebooks/output_size_limit.ipynb", "notebooks/structured_output.ipynb"], "titles": ["
1. Introduction", "Taming Large Language Models", "
4. Challenges of Evaluating LLM-based Applications", "
2. Output Size Limitations", "
3. Wrestling with Structured Output"], "terms": {"am": 0, "alwai": [0, 2, 4], "do": [0, 2, 3, 4], "which": [0, 2, 3, 4], "cannot": [0, 2], "order": [0, 2, 4], "mai": [0, 2, 3, 4], "learn": [0, 2, 4], "how": [0, 2, 3, 4], "pablo": 0, "picasso": 0, "In": [0, 2, 3, 4], "recent": [0, 4], "year": [0, 2, 3, 4], "larg": [0, 2, 3, 4], "languag": [0, 2, 3, 4], "model": [0, 2], "llm": [0, 1, 3], "have": [0, 2, 3, 4], "emerg": 0, "transform": [0, 4], "forc": [0, 2], "technologi": [0, 2, 3, 4], "promis": 0, "revolution": 0, "build": [0, 1, 2, 3], "product": [0, 2, 4], "interact": [0, 3, 4], "comput": [0, 2, 3], "from": [0, 2, 3], "chatgpt": [0, 2], "github": [0, 4], "copilot": 0, "claud": [0, 2, 3], "artifact": 0, "cursor": 0, "com": [0, 4], "replit": 0, "other": [0, 2, 3, 4], "system": [0, 1, 2, 3, 4], "captur": [0, 2], "public": 0, "imagin": 0, "spark": 0, "gold": 0, "rush": 0, "ai": [0, 4], "power": [0, 2, 3, 4], "applic": [0, 3, 4], "howev": [0, 2, 3, 4], "beneath": 0, "surfac": 0, "technolog": 0, "revolut": 0, "li": 0, "complex": [0, 2, 3, 4], "landscap": [0, 2], "practition": 0, "must": [0, 2, 3], "navig": 0, "As": [0, 3, 4], "explor": [0, 2, 4], "engin": [0, 2, 4], "effort": [0, 4], "requir": [0, 2, 3, 4], "manag": [0, 1, 3, 4], "handl": [0, 1, 2, 3, 4], "non": [0, 4], "determinist": [0, 1], "output": [0, 2], "prevent": [0, 4], "hallucin": [0, 4], "overst": 0, "while": [0, 2, 3, 4], "potenti": [0, 2, 3, 4], "remain": [0, 2, 3], "compel": [0, 4], "understand": [0, 1, 2, 3, 4], "hidden": [0, 1], "cost": [0, 2, 4], "reliabl": [0, 1, 2, 4], "enabl": [0, 2, 3, 4], "u": [0, 2, 4], "fulli": [0, 3], "har": [0, 3], "impact": [0, 2, 3, 4], "capabl": [0, 2, 3, 4], "ar": [0, 2, 4], "inde": 0, "remark": 0, "prevail": 0, "narr": 0, "often": [0, 2, 3, 4], "gloss": 0, "over": [0, 2, 3, 4], "fundament": [0, 2], "problem": [0, 1], "organ": [0, 2, 3], "face": [0, 4], "when": [0, 2, 3, 4], "real": [0, 2, 3, 4], "world": [0, 2, 4], "aim": [0, 3, 4], "bridg": 0, "gap": 0, "offer": [0, 2, 4], "clear": [0, 2, 4], "ei": 0, "examin": [0, 3], "pitfal": [0, 1], "work": [0, 2, 3, 4], "throughout": [0, 3, 4], "tackl": 0, "follow": [0, 2, 3, 4], "exhaust": 0, "list": [0, 2, 3, 4], "critic": [0, 2, 3, 4], "behavior": [0, 1, 2], "unlik": [0, 2], "tradit": 0, "softwar": [0, 4], "can": [0, 2, 3, 4], "produc": [0, 2, 4], "differ": [0, 2, 3, 4], "ident": [0, 2], "input": [0, 1, 2, 3, 4], "make": [0, 2, 3, 4], "test": [0, 1, 2], "assur": 0, "particularli": [0, 2, 3, 4], "structur": [0, 2, 3], "un": 0, "struggl": [0, 4], "maintain": [0, 2, 3, 4], "consist": [0, 1, 2, 3, 4], "format": [0, 1, 3, 4], "complic": 0, "integr": [0, 2, 4], "larger": [0, 2, 3], "error": [0, 1, 4], "more": [0, 2, 3, 4], "These": [0, 2, 3], "gener": [0, 1, 2], "plausibl": 0, "sound": 0, "entir": [0, 3], "fabric": 0, "inform": [0, 2, 3, 4], "creat": [0, 2, 3, 4], "signific": [0, 2, 3, 4], "risk": [0, 2, 3], "optim": [0, 1, 3], "The": [0, 3], "financi": [0, 2, 3, 4], "oper": [0, 2, 3], "base": [0, 4], "quickli": [0, 3], "becom": [0, 2], "prohibit": 0, "without": [0, 2, 3, 4], "care": [0, 2, 4], "methodologi": [0, 4], "break": [0, 2, 3], "down": [0, 2, 3], "deal": 0, "new": [0, 2, 3, 4], "take": [0, 3, 4], "hand": [0, 3, 4], "provid": [0, 2, 3], "concret": 0, "exampl": 0, "you": [0, 2, 3, 4], "run": [0, 4], "modifi": 0, "scenario": [0, 2], "solut": [0, 1, 3], "strategi": [0, 1, 2, 3], "best": [0, 1], "techniqu": [0, 1, 2, 3], "pattern": [0, 1, 2, 4], "anti": 0, "look": 0, "limit": [0, 2, 4], "our": [0, 2, 3, 4], "goal": [0, 3], "discourag": 0, "us": [0, 2, 3, 4], "robust": [0, 3, 4], "implement": [0, 1, 2, 3, 4], "By": [0, 3, 4], "upfront": 0, "better": [0, 3], "equip": 0, "leverag": [0, 3, 4], "effect": [0, 1, 2, 3, 4], "avoid": [0, 2, 4], "current": [0, 3], "discours": 0, "around": [0, 3, 4], "tend": 0, "toward": 0, "extrem": 0, "either": [0, 3], "uncrit": 0, "enthusiasm": 0, "wholesal": 0, "dismiss": 0, "focu": [0, 2, 3], "rather": [0, 2], "than": 0, "theoret": 0, "first": [0, 2, 3], "everi": 0, "concept": 0, "illustr": [0, 2, 3], "execut": [0, 2], "immedi": 0, "analysi": [0, 1, 2, 3], "balanc": [0, 2, 3, 4], "both": [0, 2], "help": [0, 2, 3, 4], "reader": 0, "decis": [0, 4], "about": [0, 2, 3, 4], "design": [0, 3, 4], "lead": [0, 2, 3, 4], "initi": [0, 3], "technic": [0, 2, 3], "leader": 0, "architectur": [0, 3], "anyon": 0, "seek": 0, "typic": [0, 2, 3], "job": 0, "role": [0, 2, 3, 4], "platform": [0, 3], "backend": 0, "develop": [0, 2, 3, 4], "exist": 0, "ml": 0, "transit": [0, 2, 3], "overse": 0, "genai": 0, "motiv": 0, "need": [0, 2, 3, 4], "readi": 0, "desir": [0, 4], "overcom": [0, 3], "perform": [0, 1, 2, 3, 4], "ensur": [0, 2, 3, 4], "safeti": [0, 4], "after": [0, 3], "read": [0, 2, 3, 4], "abl": [0, 3, 4], "framework": [0, 2, 4], "deploi": [0, 3], "proper": 0, "safeguard": 0, "realist": 0, "estim": 0, "project": 0, "timelin": 0, "To": [0, 3, 4], "most": [0, 2, 3, 4], "should": [0, 2, 3, 4], "basic": [0, 2, 3], "program": [0, 2], "experi": [0, 2, 3, 4], "access": [0, 4], "knowledg": [0, 2], "openai": [0, 2, 4], "anthrop": [0, 4], "similar": [0, 4], "grade": 0, "befor": 0, "dive": 0, "here": [0, 2, 3, 4], "": [0, 2, 3, 4], "get": [0, 2, 3, 4], "start": 0, "activ": 0, "virtual": 0, "m": 0, "venv": 0, "env": [0, 2, 3, 4], "sourc": [0, 2, 4], "bin": 0, "On": [0, 4], "window": [0, 1], "script": [0, 1], "instal": [0, 4], "packag": 0, "pip": [0, 4], "r": [0, 2, 3, 4], "txt": [0, 2, 3, 4], "file": [0, 2, 3, 4], "root": 0, "directori": 0, "add": [0, 3], "sensit": 0, "openai_api_kei": 0, "your_openai_api_key_her": 0, "never": 0, "share": [0, 4], "commit": 0, "version": [0, 4], "control": [0, 2, 4], "It": [0, 3, 4], "contain": [0, 3], "kept": 0, "privat": 0, "clone": 0, "companion": 0, "git": 0, "http": [0, 2, 4], "souzatharsi": 0, "tamingllm": 0, "cd": 0, "If": [0, 4], "encount": 0, "rate": [0, 2], "consid": [0, 2, 3, 4], "smaller": [0, 3, 4], "retri": [0, 4], "logic": [0, 3], "conflict": 0, "try": [0, 2, 4], "fresh": 0, "like": [0, 2, 3, 4], "poetri": 0, "check": 0, "page": 0, "known": [0, 2, 4], "now": [0, 3, 4], "let": [0, 2, 3, 4], "begin": 0, "practic": [1, 2, 3], "guid": [1, 4], "python": [1, 4], "challeng": [1, 3], "why": 1, "thi": [1, 2, 3, 4], "book": 1, "matter": [1, 2], "overview": [1, 4], "kei": [1, 4], "temperatur": [1, 3], "random": [1, 2], "evalu": [1, 3], "measur": [1, 2], "observ": [1, 2, 4], "log": 1, "monitor": 1, "debug": 1, "respons": [1, 2, 3, 4], "workflow": 1, "common": [1, 3, 4], "failur": 1, "mode": 1, "text": [1, 2, 3, 4], "inconsist": [1, 2, 4], "valid": [1, 2, 4], "recoveri": 1, "enforc": [1, 4], "type": [1, 2, 3, 4], "detect": [1, 4], "ground": [1, 2], "retriev": 1, "augment": [1, 2], "rag": 1, "context": [1, 2, 3, 4], "select": 1, "index": [1, 3], "vector": 1, "store": [1, 3], "chunk": 1, "method": [1, 2, 3, 4], "pipelin": 1, "token": [1, 2, 4], "cach": 1, "invalid": [1, 4], "predict": [1, 2, 4], "issu": [1, 2, 3, 4], "guard": 1, "content": 1, "filter": 1, "sanit": 1, "alert": 1, "constraint": [1, 3], "long": 1, "form": [1, 2, 4], "vendor": [1, 2], "lock": 1, "self": 1, "host": 1, "llama": 1, "llamafil": 1, "setup": 1, "usag": 1, "ollama": 1, "deploy": 1, "consider": [1, 4], "migrat": 1, "complet": [1, 2, 3, 4], "util": [1, 3], "function": [1, 2, 3, 4], "configur": [1, 2], "templat": [1, 2], "recommend": [1, 3, 4], "librari": [1, 3, 4], "commun": 1, "surprisingli": 2, "all": [2, 3, 4], "greg": 2, "brockman": 2, "presid": 2, "One": 2, "i": [2, 3, 4], "natur": [2, 3, 4], "where": [2, 3], "same": [2, 3, 4], "each": [2, 3], "time": [2, 3, 4], "thei": [2, 3, 4], "re": [2, 3, 4], "queri": 2, "even": [2, 3, 4], "prompt": 2, "data": [2, 3, 4], "characterist": 2, "strength": 2, "ask": [2, 4], "ani": [2, 3, 4], "question": [2, 4], "multipl": [2, 3], "ll": 2, "isn": 2, "t": [2, 3, 4], "bug": 2, "featur": [2, 4], "paramet": [2, 4], "allow": [2, 3, 4], "creativ": [2, 4], "divers": [2, 3, 4], "incredibli": 2, "difficult": 2, "testabl": 2, "servic": [2, 3, 4], "compani": [2, 3, 4], "invest": [2, 4], "advic": 2, "mean": [2, 3, 4], "market": [2, 3, 4], "could": [2, 3], "yield": 2, "conclus": 2, "exceedingli": 2, "compar": [2, 3], "regulatori": 2, "complianc": [2, 4], "guarante": [2, 4], "user": [2, 3, 4], "trust": 2, "affect": [2, 4], "primari": 2, "determin": [2, 3, 4], "come": [2, 3, 4], "dure": [2, 4], "calcul": 2, "probabl": [2, 4], "distribut": [2, 4], "next": [2, 4], "set": [2, 3, 4], "nucleu": 2, "coher": [2, 3], "0": [2, 3, 4], "repetit": [2, 3], "1": [2, 4], "increas": [2, 3, 4], "incoher": 2, "dotenv": [2, 3, 4], "import": [2, 3, 4], "load_dotenv": [2, 3, 4], "o": [2, 3, 4], "load": [2, 3, 4], "environ": [2, 3, 4], "variabl": [2, 3, 4], "panda": 2, "pd": 2, "def": [2, 3, 4], "generate_respons": 2, "model_nam": [2, 3], "str": [2, 3, 4], "float": [2, 3], "attempt": [2, 3], "int": [2, 3], "3": [2, 4], "datafram": 2, "demonstr": [2, 3, 4], "client": [2, 4], "result": [2, 3, 4], "temp": 2, "rang": [2, 3, 4], "chat": [2, 3, 4], "messag": [2, 4], "max_token": 2, "50": 2, "append": [2, 3], "choic": 2, "displai": 2, "group": [2, 3], "df_result": 2, "print": [2, 3, 4], "f": [2, 3, 4], "ntemperatur": 2, "40": 2, "temp_respons": 2, "_": 2, "row": 2, "iterrow": 2, "return": [2, 3, 4], "max_length": [2, 4], "10000": [2, 3, 4], "we": [2, 3, 4], "length": [2, 4], "open": [2, 3, 4], "appl": [2, 3, 4], "sec_fil": [2, 4], "gpt": [2, 3, 4], "5": [2, 3, 4], "turbo": [2, 3, 4], "write": 2, "singl": [2, 3, 4], "summari": [2, 4], "2": [2, 4], "inc": [2, 3, 4], "its": [2, 3, 4], "10": [2, 3, 4], "k": [2, 3, 4], "fiscal": [2, 3], "end": [2, 3], "septemb": [2, 3], "28": [2, 3], "2024": [2, 3, 4], "sec": [2, 3, 4], "detail": [2, 3, 4], "busi": 2, "well": [2, 4], "season": 2, "issuer": 2, "california": [2, 4], "manufactur": 2, "smartphon": 2, "person": [2, 4], "tablet": 2, "wearabl": [2, 4], "accessori": 2, "innov": [2, 3], "report": [2, 4], "condit": 2, "secur": [2, 3], "exchang": [2, 3], "commiss": [2, 3], "outlin": 2, "factor": [2, 3], "futur": 2, "invdestacksmeticsisdict": 2, "setispect": 2, "20cyan": 2, "evaluationseld": 2, "anvis": 2, "droitent": 2, "discernminerv": 2, "versbobprefvers": 2, "vo\u8be5": 2, "option\u548c": 2, "meio": 2, "forecast": 2, "\u0432\u0440\u0435\u043ccisco": 2, "dellaischenpoihscap": 2, "geme": 2, "gettim": 2, "comprehens": [2, 3, 4], "simpl": [2, 3], "reveal": 2, "dramat": [2, 4], "alter": 2, "wai": [2, 3, 4], "systemat": 2, "At": 2, "too": [2, 3], "rigid": 2, "vari": 2, "less": 2, "wildli": 2, "approach": [2, 3, 4], "inadequ": 2, "implic": 2, "profound": 2, "one": [2, 3, 4], "an": [2, 3, 4], "radic": 2, "reli": [2, 4], "grappl": 2, "probabilist": 2, "lower": [2, 4], "seem": [2, 4], "safer": 2, "don": [2, 3, 4], "elimin": 2, "underli": [2, 4], "uncertainti": 2, "mere": 2, "mask": 2, "highlight": [2, 3, 4], "paradigm": 2, "aspect": [2, 3, 4], "beyond": 2, "present": [2, 3, 4], "anoth": 2, "fascin": 2, "abil": [2, 4], "spontan": 2, "aris": 2, "scale": [2, 4], "up": [2, 3, 4], "size": [2, 4], "answer": [2, 3, 4], "reason": [2, 3, 4], "aren": 2, "explicitli": 2, "grow": [2, 4], "train": 2, "code": [2, 4], "against": 2, "specif": [2, 3], "wtb": 2, "22": 2, "fig": [2, 3, 4], "4": 2, "relationship": 2, "between": [2, 3], "linear": 2, "below": [2, 3], "certain": [2, 3, 4], "threshold": 2, "absent": 2, "simpli": [2, 3, 4], "task": [2, 3, 4], "much": 2, "coax": 2, "them": [2, 3, 4], "out": [2, 3], "onc": [2, 3], "reach": [2, 3, 4], "point": [2, 3], "journei": 2, "suddenli": 2, "manifest": 2, "what": [2, 4], "research": [2, 3, 4], "call": [2, 3, 4], "phase": 2, "shift": 2, "inabl": 2, "unpredict": [2, 4], "stand": 2, "stark": 2, "contrast": 2, "deliber": 2, "convent": 2, "stabl": 2, "suit": 2, "defin": [2, 3, 4], "accept": 2, "criteria": 2, "contend": 2, "constantli": 2, "7b": 2, "70b": 2, "ha": [2, 4], "dynam": 2, "rethink": 2, "custom": [2, 4], "support": [2, 4], "chatbot": 2, "would": [2, 3], "refund": 2, "request": [2, 3, 4], "track": 2, "verifi": 2, "But": 2, "just": [2, 3, 4], "predefin": [2, 4], "convers": [2, 3, 4], "appropri": [2, 3, 4], "emot": 2, "rais": [2, 3], "weren": 2, "evolv": [2, 3], "accuraci": 2, "subject": 2, "qualiti": [2, 3, 4], "kind": 2, "account": 2, "uniqu": 2, "across": 2, "sever": [2, 3, 4], "dimens": 2, "necessirali": [2, 4], "pre": 2, "extend": 2, "explicit": [2, 4], "usual": 2, "precis": 2, "involv": [2, 4], "resist": 2, "straightforward": [2, 3], "quantif": 2, "numer": 2, "score": [2, 4], "judgment": 2, "inher": [2, 3, 4], "human": [2, 3, 4], "depend": 2, "contamin": 2, "carefulli": [2, 4], "craft": [2, 4], "case": [2, 3, 4], "expect": [2, 3, 4], "e": [2, 3, 4], "g": [2, 3, 4], "unit": [2, 3], "massiv": 2, "internet": 2, "alreadi": 2, "seen": 2, "memor": 2, "artifici": 2, "inflat": 2, "curat": 2, "truli": 2, "unseen": 2, "rigor": 2, "cross": 2, "benchmark": 2, "evolut": 2, "continu": [2, 3, 4], "advanc": [2, 3], "longitudin": 2, "comparison": [2, 4], "obsolet": 2, "older": 2, "autom": [2, 4], "demand": 2, "oversight": 2, "bias": [2, 4], "through": [2, 3, 4], "annot": 2, "review": 2, "process": [2, 3, 4], "mostli": 2, "distinct": 2, "versu": 2, "latter": 2, "foundat": [2, 3], "purpos": [2, 4], "former": 2, "tailor": 2, "particular": [2, 4], "combin": [2, 3], "associ": [2, 3], "solv": [2, 4], "That": [2, 4], "differenti": 2, "becaus": 2, "chang": 2, "scope": [2, 3], "includ": [2, 3, 4], "thing": [2, 4], "meet": 2, "close": 2, "ti": 2, "align": [2, 3, 4], "object": [2, 4], "A": [2, 3], "great": [2, 4], "doesn": [2, 3, 4], "three": 2, "app": 2, "imag": 2, "audio": 2, "etc": [2, 4], "outcom": 2, "truth": 2, "option": [2, 3, 4], "standard": 2, "repres": [2, 4], "palm": 2, "individu": [2, 3], "target": [2, 4], "appli": [2, 3], "note": [2, 3, 4], "further": [2, 3, 4], "see": [2, 4], "avail": [2, 3, 4], "addition": 2, "shown": 2, "fix": [2, 3], "default": [2, 4], "quantifi": 2, "easi": [2, 3], "two": [2, 3, 4], "addit": [2, 3], "quantit": 2, "among": 2, "per": [2, 3], "aggreg": 2, "heavili": 2, "plan": 2, "pertain": 2, "previous": [2, 3], "discuss": 2, "doe": [2, 3, 4], "cover": [2, 3], "edg": 2, "good": [2, 4], "bia": 2, "separ": [2, 3], "synthet": 2, "updat": [2, 3], "reflect": 2, "post": 2, "launch": 2, "fair": 2, "timeout": 2, "variat": 2, "maxim": 2, "valu": [2, 3, 4], "success": 2, "inter": 2, "rater": 2, "scalabl": [2, 3], "weight": 2, "rel": 2, "priorit": 2, "normal": [2, 4], "absolut": [2, 4], "fail": 2, "confid": [2, 4], "interv": 2, "veri": 2, "tier": 2, "hollist": 2, "built": [2, 4], "mind": 2, "x": 2, "fast": 2, "promot": 2, "rapid": 2, "experiment": [2, 4], "iter": [2, 3], "final": [2, 3, 4], "keep": [2, 3], "itself": 2, "confirm": 2, "vi": 2, "jason": 2, "wei": 2, "yi": [2, 4], "tai": 2, "rishi": 2, "bommasani": 2, "colin": 2, "raffel": 2, "barret": 2, "zoph": 2, "sebastian": 2, "borgeaud": 2, "dani": 2, "yogatama": 2, "maarten": 2, "bosma": 2, "denni": 2, "zhou": 2, "donald": 2, "metzler": 2, "ed": 2, "h": 2, "chi": 2, "tatsunori": 2, "hashimoto": 2, "oriol": 2, "vinyal": 2, "perci": 2, "liang": 2, "jeff": 2, "dean": 2, "william": 2, "fedu": 2, "2022": 2, "url": [2, 4], "arxiv": [2, 4], "org": [2, 4], "ab": [2, 4], "2206": 2, "07682": 2, "onli": [3, 4], "those": [3, 4], "who": 3, "go": [3, 4], "far": 3, "possibli": 3, "find": [3, 4], "eliot": 3, "short": 3, "charact": 3, "word": [3, 4], "english": 3, "rule": 3, "thumb": 3, "\u00be": 3, "max_output_token": 3, "modern": 3, "maximum": 3, "tabl": 3, "show": [3, 4], "4096": 3, "16384": 3, "contrari": 3, "might": [3, 4], "summar": 3, "surpass": 3, "instead": [3, 4], "stop": 3, "mid": 3, "sentenc": [3, 4], "truncat": 3, "max_input_token": 3, "input_cost_per_token": 3, "output_cost_per_token": 3, "meta": 3, "llama3": 3, "11b": 3, "instruct": [3, 4], "v1": 3, "128000": 3, "5e": 3, "7": 3, "sonnet": 3, "20241022": 3, "8192": 3, "200000": 3, "3e": 3, "6": [3, 4], "0613": 3, "6e": 3, "04": 3, "09": 3, "1e": 3, "4o": [3, 4], "mini": [3, 4], "gemini": 3, "flash": 3, "002": 3, "1048576": 3, "8": 3, "pro": 3, "2097152": 3, "05e": 3, "pose": [3, 4], "incomplet": 3, "extens": [3, 4], "articl": 3, "abruptli": 3, "cut": 3, "off": [3, 4], "due": 3, "disrupt": 3, "flow": 3, "shallow": 3, "thorough": 3, "receiv": 3, "partial": 3, "dissatisfact": 3, "frustrat": 3, "especi": [3, 4], "true": [3, 4], "educ": 3, "tool": 3, "creation": 3, "address": [3, 4], "feasibl": 3, "effici": [3, 4], "section": [3, 4], "split": 3, "focus": [3, 4], "previou": 3, "For": [3, 4], "analyz": 3, "10k": 3, "schemat": 3, "represent": 3, "diagram": 3, "charactertextsplitt": 3, "tiktoken": 3, "sequenti": 3, "chain": 3, "newlin": 3, "There": 3, "situat": 3, "broadli": [3, 4], "decid": 3, "number": [3, 4], "whether": [3, 4], "overlap": 3, "want": 3, "some": [3, 4], "sure": 3, "semant": 3, "lost": 3, "path": 3, "mani": [3, 4], "computation": 3, "cheap": 3, "sinc": 3, "speciali": 3, "awar": 3, "advantag": [3, 4], "sophist": 3, "embed": 3, "level": [3, 4], "naiv": 3, "period": 3, "nltk": 3, "spaci": 3, "recurs": 3, "divid": 3, "hierarch": 3, "manner": [3, 4], "class": [3, 4], "extract": [3, 4], "your": [3, 4], "made": [3, 4], "talk": 3, "theme": 3, "topic": [3, 4], "langchain": 3, "count": 3, "get_chunk": 3, "chunk_siz": 3, "chunk_overlap": 3, "specifi": [3, 4], "arg": 3, "langchain_text_splitt": 3, "text_splitt": 3, "from_tiktoken_encod": 3, "split_text": 3, "serv": [3, 4], "persona": 3, "assum": 3, "background": 3, "action": 3, "input_text": 3, "actual": [3, 4], "langchain_cor": [3, 4], "prompttempl": 3, "get_base_prompt_templ": 3, "base_prompt": 3, "from_templ": 3, "llmchain": 3, "togeth": 3, "parser": [3, 4], "output_pars": 3, "stroutputpars": 3, "langchain_commun": 3, "chat_model": 3, "chatlitellm": 3, "get_llm_chain": 3, "prompt_templ": [3, 4], "instanc": 3, "name": [3, 4], "llm_chain": [3, 4], "api_key_label": 3, "upper": 3, "_api_kei": 3, "api_kei": 3, "get_dynamic_prompt_templ": 3, "dict": 3, "get_dynamic_prompt_param": 3, "prompt_param": 3, "part_idx": 3, "total_part": 3, "chat_context": 3, "origin": [3, 4], "part": [3, 4], "total": [3, 4], "param": 3, "dynamic_prompt_param": 3, "copi": 3, "save": 3, "introduct": 3, "yet": 3, "elif": 3, "last": [3, 4], "second": 3, "main": [3, 4], "given": [3, 4], "els": 3, "merg": 3, "concaten": 3, "generate_report": 3, "input_cont": 3, "llm_model_nam": 3, "report_part": 3, "num_part": 3, "len": 3, "dinam": 3, "priovid": 3, "enumer": 3, "invok": [3, 4], "cummul": 3, "n": 3, "join": 3, "sampl": [3, 4], "max_chunk_s": 3, "max_chunk_overlap": 3, "latest": [3, 4], "analyst": 3, "readabl": 3, "move": 3, "insight": [3, 4], "local": [3, 4], "apple_report": 3, "w": 3, "300": 3, "posit": [3, 4], "disclos": 3, "state": 3, "identifi": [3, 4], "luation": 3, "term": 3, "oblig": 3, "cash": 3, "disciplin": 3, "deeper": 3, "granular": 3, "assess": 3, "few": [3, 4], "interest": [3, 4], "high": 3, "smooth": 3, "upon": 3, "head": 3, "subhead": 3, "clariti": 3, "document": [3, 4], "adher": [3, 4], "variou": [3, 4], "revenu": [3, 4], "segment": [3, 4], "profit": [3, 4], "liquid": 3, "capit": [3, 4], "resourc": 3, "inclus": 3, "despit": [3, 4], "depth": 3, "wide": [3, 4], "expert": [3, 4], "nuanc": [3, 4], "overlook": 3, "mitig": [3, 4], "fit": 3, "within": [3, 4], "altern": 3, "meaning": [3, 4], "preserv": 3, "easier": [3, 4], "preprocess": 3, "significantli": 3, "enhanc": 3, "own": 3, "introduc": [3, 4], "layer": [3, 4], "necessit": 3, "meticul": 3, "retain": 3, "necessari": 3, "seamlessli": 3, "circumv": 3, "therebi": 3, "contribut": 3, "overal": [3, 4], "escal": 3, "frequenc": 3, "volum": 3, "bottleneck": 3, "latenc": 3, "reduc": 3, "prepar": 3, "friendli": 3, "improv": [3, 4], "mustafa": 3, "suleyman": 3, "infinit": 3, "memori": 3, "convei": 3, "amount": [3, 4], "fewer": 3, "compress": 3, "progress": 3, "essenti": 3, "condens": 3, "adapt": 3, "adjust": [3, 4], "flexibl": [3, 4], "constrain": [3, 4], "collect": 3, "versatil": 3, "also": [3, 4], "drive": 3, "grace": 3, "fallback": 3, "empow": 3, "crucial": [3, 4], "stai": 3, "full": [3, 4], "splitter": 3, "freedom": 4, "thrive": 4, "julia": 4, "b": 4, "cameron": 4, "excel": 4, "easili": 4, "databas": 4, "sometim": 4, "unstructur": 4, "notebook": 4, "overrid": 4, "response_cont": 4, "wow": 4, "lot": 4, "breakdown": 4, "stream": 4, "portfolio": 4, "iphon": 4, "mac": 4, "ipad": 4, "impress": 4, "trend": 4, "notic": 4, "trillion": 4, "march": 4, "29": 4, "huge": 4, "investor": 4, "definit": 4, "figur": 4, "compli": 4, "regul": 4, "ye": 4, "accur": 4, "date": 4, "transpar": 4, "industri": 4, "serious": 4, "is_json": 4, "myjson": 4, "except": 4, "valueerror": 4, "fals": 4, "clearli": 4, "obtain": 4, "deviat": 4, "lack": 4, "correct": 4, "emploi": 4, "schema": 4, "guidanc": 4, "blueprint": 4, "achiev": 4, "pars": 4, "nativ": 4, "regular": 4, "express": 4, "dedic": 4, "json_format": 4, "person1": 4, "alic": 4, "q1": 4, "20": 4, "person2": 4, "bob": 4, "net": 4, "margin": 4, "materi": 4, "though": 4, "suffici": 4, "nest": 4, "restrict": 4, "programmat": 4, "depict": 4, "thellm": 4, "conceptu": 4, "via": 4, "unend": 4, "whitespac": 4, "until": 4, "forget": 4, "throw": 4, "string": 4, "appear": 4, "somewher": 4, "response_format": 4, "json_object": 4, "approxim": 4, "628": 4, "553": 4, "000": 4, "held": 4, "affili": 4, "sheer": 4, "mention": 4, "15": 4, "115": 4, "823": 4, "stock": 4, "outstand": 4, "octob": 4, "18": 4, "circul": 4, "plai": 4, "googl": 4, "vertex": 4, "match": 4, "releas": 4, "suppli": 4, "so": 4, "worri": 4, "omit": 4, "enum": 4, "benefit": 4, "No": 4, "incorrectli": 4, "refus": 4, "simpler": 4, "strongli": 4, "entiti": 4, "ii": 4, "place": 4, "doc": 4, "07": 4, "08": 4, "06": 4, "later": 4, "basemodel": 4, "secextract": 4, "mentioned_ent": 4, "mentioned_plac": 4, "extract_from_sec_fil": 4, "sec_filing_text": 4, "beta": 4, "explan": 4, "hint": 4, "send": 4, "attribut": 4, "conform": 4, "prompt_extract": 4, "convert": 4, "sec_extract": 4, "nasdaq": 4, "llc": 4, "washington": 4, "d": 4, "c": 4, "cupertino": 4, "wa": 4, "usabl": 4, "beg": 4, "simplifi": 4, "abstract": 4, "with_structured_output": 4, "directli": 4, "descript": 4, "runnabl": 4, "correspond": 4, "typeddict": 4, "dictionari": 4, "qu": 4, "langchain_openai": 4, "chatopenai": 4, "chatprompttempl": 4, "extract_from_sec_filing_langchain": 4, "structured_llm": 4, "from_messag": 4, "sec_extraction_langchain": 4, "found": 4, "out24": 4, "under": 4, "hood": 4, "logit": 4, "raw": 4, "neural": 4, "network": 4, "prefer": 4, "fine": 4, "grain": 4, "regex": 4, "qwen2": 4, "5b": 4, "lightweight": 4, "alibaba": 4, "cloud": 4, "strong": 4, "small": 4, "being": 4, "enough": 4, "hug": 4, "qwen": 4, "top": 4, "100": 4, "sentiment": 4, "label": 4, "assist": 4, "special": 4, "neg": 4, "back": 4, "pass": 4, "modul": 4, "sec_extraction_outlin": 4, "zsp": 4, "zicorp": 4, "phenomenon": 4, "were": 4, "tune": 4, "simplic": 4, "v": 4, "greater": 4, "steeper": 4, "curv": 4, "quit": 4, "wrapper": 4, "fomer": 4, "wider": 4, "structuredoutputpars": 4, "overhead": 4, "infer": 4, "done": 4, "know": 4, "exactli": 4, "field": 4, "element": 4, "chanc": 4, "connect": 4, "highli": 4, "encourag": 4, "correctli": 4, "downstream": 4, "xml": 4, "area": 4, "twt": 4, "24": 4, "suggest": 4, "impos": 4, "intens": 4, "evid": 4, "aid24": 4, "degrad": 4, "decod": 4, "multi": 4, "step": 4, "thought": 4, "overli": 4, "strict": 4, "hinder": 4, "outweigh": 4, "team": 4, "paper": 4, "me": 4, "speak": 4, "freeli": 4, "studi": 4, "claim": 4, "rebutt": 4, "argu": 4, "reproduct": 4, "paint": 4, "pictur": 4, "publicli": 4, "independ": 4, "verif": 4, "dot24": 4, "flaw": 4, "believ": 4, "led": 4, "inaccur": 4, "classif": 4, "reconcil": 4, "uneven": 4, "didn": 4, "properli": 4, "conflat": 4, "perspect": 4, "trade": 4, "surround": 4, "ideal": 4, "drawback": 4, "unlock": 4, "valuabl": 4, "aider": 4, "14": 4, "html": 4, "dottxt": 4, "sai": 4, "demo": 4, "tree": 4, "safe": 4, "io": 4, "zhi": 4, "rui": 4, "tam": 4, "cheng": 4, "kuang": 4, "wu": 4, "lin": 4, "tsai": 4, "chieh": 4, "yen": 4, "hung": 4, "lee": 4, "yun": 4, "nung": 4, "chen": 4, "2408": 4, "02442": 4}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"introduct": [0, 1], "content": [0, 2, 3, 4], "core": 0, "challeng": [0, 2, 4], "we": 0, "ll": 0, "address": 0, "A": [0, 1, 4], "practic": [0, 4], "approach": 0, "note": 0, "perspect": 0, "who": 0, "thi": 0, "book": 0, "i": 0, "For": 0, "outcom": 0, "prerequisit": 0, "set": 0, "up": 0, "your": 0, "environ": 0, "1": [0, 1, 3], "python": 0, "setup": 0, "2": [0, 1, 3], "api": [0, 4], "kei": [0, 2, 3], "configur": 0, "3": [0, 1, 3], "code": [0, 1], "repositori": 0, "troubleshoot": 0, "common": 0, "issu": 0, "tame": 1, "larg": 1, "languag": 1, "model": [1, 3, 4], "chapter": 1, "non": [1, 2], "determin": 1, "eval": [1, 2], "wrestl": [1, 4], "structur": [1, 4], "output": [1, 3, 4], "4": [1, 3], "hallucin": 1, "The": [1, 2, 4], "realiti": 1, "gap": 1, "5": 1, "cost": [1, 3], "factor": 1, "6": 1, "safeti": 1, "concern": 1, "7": 1, "size": [1, 3], "length": [1, 3], "limit": [1, 3], "8": 1, "break": 1, "free": 1, "from": [1, 4], "cloud": 1, "provid": [1, 4], "appendix": 1, "exampl": [1, 2, 3, 4], "b": 1, "tool": [1, 2, 4], "resourc": 1, "evalu": 2, "llm": [2, 4], "base": [2, 3], "applic": 2, "determinist": 2, "machin": 2, "temperatur": 2, "sampl": 2, "spectrum": 2, "emerg": 2, "properti": 2, "problem": [2, 3, 4], "statement": [2, 3, 4], "tradit": 2, "softwar": 2, "v": 2, "design": 2, "conceptu": 2, "overview": 2, "consider": [2, 3], "compon": 2, "dataset": 2, "metric": 2, "layer": 2, "assess": 2, "leaderboard": 2, "rank": 2, "refer": [2, 3, 4], "what": 3, "ar": 3, "token": 3, "comparison": 3, "across": 3, "chunk": 3, "contextu": 3, "link": 3, "gener": [3, 4], "long": 3, "form": 3, "step": 3, "write": 3, "prompt": [3, 4], "templat": 3, "construct": 3, "dynam": 3, "paramet": 3, "report": 3, "usag": 3, "discuss": [3, 4], "implic": 3, "futur": 3, "conclus": [3, 4], "solut": 4, "strategi": 4, "techniqu": 4, "One": 4, "shot": 4, "specif": 4, "json": 4, "mode": 4, "langchain": 4, "outlin": 4, "simpl": 4, "multipl": 4, "choic": 4, "pydant": 4, "compar": 4, "best": 4, "ongo": 4, "debat": 4}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 57}, "alltitles": {"Introduction": [[0, "introduction"]], "Contents": [[0, "contents"], [2, "contents"], [3, "contents"], [4, "contents"]], "Core Challenges We\u2019ll Address": [[0, "core-challenges-we-ll-address"]], "A Practical Approach": [[0, "a-practical-approach"]], "A Note on Perspective": [[0, "a-note-on-perspective"]], "Who This Book Is For": [[0, "who-this-book-is-for"]], "Outcomes": [[0, "outcomes"]], "Prerequisites": [[0, "prerequisites"]], "Setting Up Your Environment": [[0, "setting-up-your-environment"]], "1. Python Environment Setup": [[0, "python-environment-setup"]], "2. API Keys Configuration": [[0, "api-keys-configuration"]], "3. Code Repository": [[0, "code-repository"]], "Troubleshooting Common Issues": [[0, "troubleshooting-common-issues"]], "Taming Large Language Models": [[1, "taming-large-language-models"]], "Chapter 1: Introduction": [[1, "chapter-1-introduction"]], "Chapter 2: Non-determinism & Evals": [[1, "chapter-2-non-determinism-evals"]], "Chapter 3: Wrestling with Structured Output": [[1, "chapter-3-wrestling-with-structured-output"]], "Chapter 4: Hallucination: The Reality Gap": [[1, "chapter-4-hallucination-the-reality-gap"]], "Chapter 5: The Cost Factor": [[1, "chapter-5-the-cost-factor"]], "Chapter 6: Safety Concerns": [[1, "chapter-6-safety-concerns"]], "Chapter 7: Size and Length Limitations": [[1, "chapter-7-size-and-length-limitations"]], "Chapter 8: Breaking Free from Cloud Providers": [[1, "chapter-8-breaking-free-from-cloud-providers"]], "Appendix A: Code Examples": [[1, "appendix-a-code-examples"]], "Appendix B: Tools and Resources": [[1, "appendix-b-tools-and-resources"]], "Challenges of Evaluating LLM-based Applications": [[2, "challenges-of-evaluating-llm-based-applications"]], "Non-Deterministic Machines": [[2, "non-deterministic-machines"]], "Temperature and Sampling": [[2, "temperature-and-sampling"]], "The Temperature Spectrum": [[2, "the-temperature-spectrum"]], "Emerging Properties": [[2, "emerging-properties"]], "Problem Statement": [[2, "problem-statement"], [3, "problem-statement"], [4, "problem-statement"]], "Evals of Traditional Software vs LLMs": [[2, "evals-table"]], "Evals Design": [[2, "evals-design"]], "Conceptual Overview": [[2, "conceptual-overview"]], "Design Considerations": [[2, "design-considerations"]], "Key Components": [[2, "key-components"]], "Examples: The Dataset": [[2, "examples-the-dataset"]], "Metrics: The Metrics Layer": [[2, "metrics-the-metrics-layer"]], "Evaluator: The Assessment Layer": [[2, "evaluator-the-assessment-layer"]], "Leaderboard: The Ranking Layer": [[2, "leaderboard-the-ranking-layer"]], "Tools": [[2, "tools"]], "References": [[2, "references"], [3, "references"], [4, "references"]], "Output Size Limitations": [[3, "output-size-limitations"]], "What are Token Limits?": [[3, "what-are-token-limits"]], "Token Cost and Length Limitation Comparison Across Key Models": [[3, "token-cost-table"]], "Content Chunking with Contextual Linking": [[3, "content-chunking-with-contextual-linking"]], "Generating long-form content": [[3, "generating-long-form-content"]], "Step 1: Chunking the Content": [[3, "step-1-chunking-the-content"]], "Step 2: Writing the Base Prompt Template": [[3, "step-2-writing-the-base-prompt-template"]], "Step 3: Constructing Dynamic Prompt Parameters": [[3, "step-3-constructing-dynamic-prompt-parameters"]], "Step 4: Generating the Report": [[3, "step-4-generating-the-report"]], "Example Usage": [[3, "example-usage"]], "Discussion": [[3, "discussion"], [4, "discussion"]], "Implications": [[3, "implications"]], "Future Considerations": [[3, "future-considerations"]], "Conclusion": [[3, "conclusion"], [4, "conclusion"]], "Wrestling with Structured Output": [[4, "wrestling-with-structured-output"]], "The Structured Output Challenges": [[4, "the-structured-output-challenges"]], "Solutions": [[4, "solutions"]], "Strategies": [[4, "strategies"]], "Techniques and Tools": [[4, "techniques-and-tools"]], "One-Shot Prompts": [[4, "one-shot-prompts"]], "Structured Output with Provider-Specific APIs": [[4, "structured-output-with-provider-specific-apis"]], "JSON Mode": [[4, "json-mode"]], "LangChain": [[4, "langchain"]], "Outlines": [[4, "outlines"]], "A Simple Example: Multiple Choice Generation": [[4, "a-simple-example-multiple-choice-generation"]], "Pydantic model": [[4, "pydantic-model"]], "Comparing Solutions": [[4, "comparing-solutions"]], "Best Practices": [[4, "best-practices"]], "Ongoing Debate on Structured Output from LLMs": [[4, "ongoing-debate-on-structured-output-from-llms"]]}, "indexentries": {}})
\ No newline at end of file
+Search.setIndex({"docnames": ["markdown/intro", "markdown/toc", "notebooks/evals", "notebooks/output_size_limit", "notebooks/structured_output"], "filenames": ["markdown/intro.md", "markdown/toc.md", "notebooks/evals.ipynb", "notebooks/output_size_limit.ipynb", "notebooks/structured_output.ipynb"], "titles": ["
1. Introduction", "Taming Large Language Models", "
4. Challenges of Evaluating LLM-based Applications", "
2. Output Size Limitations", "
3. Wrestling with Structured Output"], "terms": {"am": 0, "alwai": [0, 2, 4], "do": [0, 2, 3, 4], "which": [0, 2, 3, 4], "cannot": [0, 2], "order": [0, 2, 4], "mai": [0, 2, 3, 4], "learn": [0, 2, 4], "how": [0, 2, 3, 4], "pablo": 0, "picasso": 0, "In": [0, 2, 3, 4], "recent": [0, 4], "year": [0, 2, 3, 4], "larg": [0, 2, 3, 4], "languag": [0, 2, 3, 4], "model": [0, 2], "llm": [0, 1, 3], "have": [0, 2, 3, 4], "emerg": 0, "transform": [0, 4], "forc": [0, 2], "technologi": [0, 2, 3, 4], "promis": 0, "revolution": 0, "build": [0, 1, 2, 3], "product": [0, 2, 4], "interact": [0, 3, 4], "comput": [0, 2, 3], "from": [0, 2, 3, 4], "chatgpt": [0, 2], "github": [0, 4], "copilot": 0, "claud": [0, 2, 3], "artifact": 0, "cursor": 0, "com": [0, 4], "replit": 0, "other": [0, 2, 3, 4], "system": [0, 1, 2, 3, 4], "captur": [0, 2], "public": 0, "imagin": 0, "spark": 0, "gold": 0, "rush": 0, "ai": [0, 4], "power": [0, 2, 3, 4], "applic": [0, 3, 4], "howev": [0, 2, 3, 4], "beneath": 0, "surfac": 0, "technolog": 0, "revolut": 0, "li": 0, "complex": [0, 2, 3, 4], "landscap": [0, 2], "practition": 0, "must": [0, 2, 3], "navig": 0, "As": [0, 3, 4], "explor": [0, 2, 4], "engin": [0, 2, 4], "effort": [0, 4], "requir": [0, 2, 3, 4], "manag": [0, 1, 3, 4], "handl": [0, 1, 2, 3, 4], "non": [0, 4], "determinist": [0, 1], "output": [0, 2], "prevent": [0, 4], "hallucin": [0, 4], "overst": 0, "while": [0, 2, 3, 4], "potenti": [0, 2, 3, 4], "remain": [0, 2, 3], "compel": [0, 4], "understand": [0, 1, 2, 3, 4], "hidden": [0, 1], "cost": [0, 2, 4], "reliabl": [0, 1, 2, 4], "enabl": [0, 2, 3, 4], "u": [0, 2, 4], "fulli": [0, 3], "har": [0, 3], "impact": [0, 2, 3, 4], "capabl": [0, 2, 3, 4], "ar": [0, 2, 4], "inde": 0, "remark": 0, "prevail": 0, "narr": 0, "often": [0, 2, 3, 4], "gloss": 0, "over": [0, 2, 3, 4], "fundament": [0, 2], "problem": [0, 1], "organ": [0, 2, 3], "face": [0, 4], "when": [0, 2, 3, 4], "real": [0, 2, 3, 4], "world": [0, 2, 4], "aim": [0, 3, 4], "bridg": 0, "gap": 0, "offer": [0, 2, 4], "clear": [0, 2, 4], "ei": 0, "examin": [0, 3], "pitfal": [0, 1], "work": [0, 2, 3, 4], "throughout": [0, 3, 4], "tackl": 0, "follow": [0, 2, 3, 4], "exhaust": 0, "list": [0, 2, 3, 4], "critic": [0, 2, 3, 4], "behavior": [0, 1, 2], "unlik": [0, 2], "tradit": 0, "softwar": [0, 4], "can": [0, 2, 3, 4], "produc": [0, 2, 4], "differ": [0, 2, 3, 4], "ident": [0, 2], "input": [0, 1, 2, 3, 4], "make": [0, 2, 3, 4], "test": [0, 1, 2], "assur": 0, "particularli": [0, 2, 3, 4], "structur": [0, 2, 3], "un": 0, "struggl": [0, 4], "maintain": [0, 2, 3, 4], "consist": [0, 1, 2, 3, 4], "format": [0, 1, 3, 4], "complic": 0, "integr": [0, 2, 4], "larger": [0, 2, 3], "error": [0, 1, 4], "more": [0, 2, 3, 4], "These": [0, 2, 3], "gener": [0, 1, 2], "plausibl": 0, "sound": 0, "entir": [0, 3], "fabric": 0, "inform": [0, 2, 3, 4], "creat": [0, 2, 3, 4], "signific": [0, 2, 3, 4], "risk": [0, 2, 3], "optim": [0, 1, 3], "The": [0, 3], "financi": [0, 2, 3, 4], "oper": [0, 2, 3], "base": [0, 4], "quickli": [0, 3], "becom": [0, 2], "prohibit": 0, "without": [0, 2, 3, 4], "care": [0, 2, 4], "methodologi": [0, 4], "break": [0, 2, 3], "down": [0, 2, 3], "deal": 0, "new": [0, 2, 3, 4], "take": [0, 3, 4], "hand": [0, 3, 4], "provid": [0, 2, 3], "concret": 0, "exampl": 0, "you": [0, 2, 3, 4], "run": [0, 4], "modifi": 0, "scenario": [0, 2], "solut": [0, 1, 3], "strategi": [0, 1, 2, 3], "best": [0, 1], "techniqu": [0, 1, 2, 3], "pattern": [0, 1, 2, 4], "anti": 0, "look": 0, "limit": [0, 2, 4], "our": [0, 2, 3, 4], "goal": [0, 3], "discourag": 0, "us": [0, 2, 3, 4], "robust": [0, 3, 4], "implement": [0, 1, 2, 3, 4], "By": [0, 3, 4], "upfront": 0, "better": [0, 3], "equip": 0, "leverag": [0, 3, 4], "effect": [0, 1, 2, 3, 4], "avoid": [0, 2, 4], "current": [0, 3], "discours": 0, "around": [0, 3, 4], "tend": 0, "toward": 0, "extrem": 0, "either": [0, 3], "uncrit": 0, "enthusiasm": 0, "wholesal": 0, "dismiss": 0, "focu": [0, 2, 3], "rather": [0, 2], "than": 0, "theoret": 0, "first": [0, 2, 3], "everi": 0, "concept": 0, "illustr": [0, 2, 3], "execut": [0, 2], "immedi": 0, "analysi": [0, 1, 2, 3], "balanc": [0, 2, 3, 4], "both": [0, 2], "help": [0, 2, 3, 4], "reader": 0, "decis": [0, 4], "about": [0, 2, 3, 4], "design": [0, 3, 4], "lead": [0, 2, 3, 4], "initi": [0, 3], "technic": [0, 2, 3], "leader": 0, "architectur": [0, 3], "anyon": 0, "seek": 0, "typic": [0, 2, 3], "job": 0, "role": [0, 2, 3, 4], "platform": [0, 3], "backend": 0, "develop": [0, 2, 3, 4], "exist": 0, "ml": 0, "transit": [0, 2, 3], "overse": 0, "genai": 0, "motiv": 0, "need": [0, 2, 3, 4], "readi": 0, "desir": [0, 4], "overcom": [0, 3], "perform": [0, 1, 2, 3, 4], "ensur": [0, 2, 3, 4], "safeti": [0, 4], "after": [0, 3], "read": [0, 2, 3, 4], "abl": [0, 3, 4], "framework": [0, 2, 4], "deploi": [0, 3], "proper": [0, 4], "safeguard": 0, "realist": 0, "estim": 0, "project": 0, "timelin": 0, "To": [0, 3, 4], "most": [0, 2, 3, 4], "should": [0, 2, 3, 4], "basic": [0, 2, 3], "program": [0, 2], "experi": [0, 2, 3, 4], "access": [0, 4], "knowledg": [0, 2], "openai": [0, 2, 4], "anthrop": [0, 4], "similar": [0, 4], "grade": 0, "befor": 0, "dive": 0, "here": [0, 2, 3, 4], "": [0, 2, 3, 4], "get": [0, 2, 3, 4], "start": 0, "activ": 0, "virtual": 0, "m": 0, "venv": 0, "env": [0, 2, 3, 4], "sourc": [0, 2, 4], "bin": 0, "On": [0, 4], "window": [0, 1], "script": [0, 1], "instal": [0, 4], "packag": 0, "pip": [0, 4], "r": [0, 2, 3, 4], "txt": [0, 2, 3, 4], "file": [0, 2, 3, 4], "root": 0, "directori": 0, "add": [0, 3], "sensit": 0, "openai_api_kei": 0, "your_openai_api_key_her": 0, "never": 0, "share": [0, 4], "commit": 0, "version": [0, 4], "control": [0, 2, 4], "It": [0, 3, 4], "contain": [0, 3], "kept": 0, "privat": 0, "clone": 0, "companion": 0, "git": 0, "http": [0, 2, 4], "souzatharsi": 0, "tamingllm": 0, "cd": 0, "If": [0, 4], "encount": 0, "rate": [0, 2], "consid": [0, 2, 3, 4], "smaller": [0, 3, 4], "retri": [0, 4], "logic": [0, 3], "conflict": 0, "try": [0, 2, 4], "fresh": 0, "like": [0, 2, 3, 4], "poetri": 0, "check": 0, "page": 0, "known": [0, 2, 4], "now": [0, 3, 4], "let": [0, 2, 3, 4], "begin": 0, "practic": [1, 2, 3], "guid": [1, 4], "python": [1, 4], "challeng": [1, 3], "why": 1, "thi": [1, 2, 3, 4], "book": 1, "matter": [1, 2], "overview": [1, 4], "kei": [1, 4], "temperatur": [1, 3], "random": [1, 2], "evalu": [1, 3], "measur": [1, 2], "observ": [1, 2, 4], "log": 1, "monitor": 1, "debug": 1, "respons": [1, 2, 3, 4], "workflow": 1, "common": [1, 3, 4], "failur": 1, "mode": 1, "text": [1, 2, 3, 4], "inconsist": [1, 2, 4], "valid": [1, 2, 4], "recoveri": 1, "enforc": [1, 4], "type": [1, 2, 3, 4], "detect": [1, 4], "ground": [1, 2], "retriev": 1, "augment": [1, 2], "rag": 1, "context": [1, 2, 3, 4], "select": 1, "index": [1, 3], "vector": 1, "store": [1, 3], "chunk": 1, "method": [1, 2, 3, 4], "pipelin": 1, "token": [1, 2, 4], "cach": 1, "invalid": [1, 4], "predict": [1, 2, 4], "issu": [1, 2, 3, 4], "guard": 1, "content": 1, "filter": 1, "sanit": 1, "alert": 1, "constraint": [1, 3], "long": 1, "form": [1, 2, 4], "vendor": [1, 2], "lock": 1, "self": 1, "host": 1, "llama": 1, "llamafil": 1, "setup": 1, "usag": 1, "ollama": 1, "deploy": 1, "consider": [1, 4], "migrat": 1, "complet": [1, 2, 3, 4], "util": [1, 3], "function": [1, 2, 3, 4], "configur": [1, 2], "templat": [1, 2], "recommend": [1, 3, 4], "librari": [1, 3, 4], "commun": 1, "surprisingli": 2, "all": [2, 3, 4], "greg": 2, "brockman": 2, "presid": 2, "One": 2, "i": [2, 3, 4], "natur": [2, 3, 4], "where": [2, 3], "same": [2, 3, 4], "each": [2, 3], "time": [2, 3, 4], "thei": [2, 3, 4], "re": [2, 3, 4], "queri": 2, "even": [2, 3, 4], "prompt": 2, "data": [2, 3, 4], "characterist": 2, "strength": 2, "ask": [2, 4], "ani": [2, 3, 4], "question": [2, 4], "multipl": [2, 3], "ll": 2, "isn": 2, "t": [2, 3, 4], "bug": 2, "featur": [2, 4], "paramet": [2, 4], "allow": [2, 3, 4], "creativ": [2, 4], "divers": [2, 3, 4], "incredibli": 2, "difficult": 2, "testabl": 2, "servic": [2, 3, 4], "compani": [2, 3, 4], "invest": [2, 4], "advic": 2, "mean": [2, 3, 4], "market": [2, 3, 4], "could": [2, 3], "yield": 2, "conclus": 2, "exceedingli": 2, "compar": [2, 3], "regulatori": 2, "complianc": [2, 4], "guarante": [2, 4], "user": [2, 3, 4], "trust": 2, "affect": 2, "primari": 2, "determin": [2, 3, 4], "come": [2, 3, 4], "dure": 2, "calcul": 2, "probabl": [2, 4], "distribut": [2, 4], "next": [2, 4], "set": [2, 3, 4], "nucleu": 2, "coher": [2, 3], "0": [2, 3, 4], "repetit": [2, 3], "1": [2, 4], "increas": [2, 3, 4], "incoher": 2, "dotenv": [2, 3, 4], "import": [2, 3, 4], "load_dotenv": [2, 3, 4], "o": [2, 3, 4], "load": [2, 3, 4], "environ": [2, 3, 4], "variabl": [2, 3, 4], "panda": 2, "pd": 2, "def": [2, 3, 4], "generate_respons": 2, "model_nam": [2, 3], "str": [2, 3, 4], "float": [2, 3], "attempt": [2, 3], "int": [2, 3], "3": [2, 4], "datafram": 2, "demonstr": [2, 3, 4], "client": [2, 4], "result": [2, 3, 4], "temp": 2, "rang": [2, 3, 4], "chat": [2, 3, 4], "messag": [2, 4], "max_token": 2, "50": 2, "append": [2, 3], "choic": 2, "displai": 2, "group": [2, 3], "df_result": 2, "print": [2, 3, 4], "f": [2, 3, 4], "ntemperatur": 2, "40": 2, "temp_respons": 2, "_": 2, "row": 2, "iterrow": 2, "return": [2, 3, 4], "max_length": [2, 4], "10000": [2, 3, 4], "we": [2, 3, 4], "length": [2, 4], "open": [2, 3, 4], "appl": [2, 3, 4], "sec_fil": [2, 4], "gpt": [2, 3, 4], "5": [2, 3, 4], "turbo": [2, 3, 4], "write": 2, "singl": [2, 3, 4], "summari": [2, 4], "2": [2, 4], "inc": [2, 3, 4], "its": [2, 3, 4], "10": [2, 3, 4], "k": [2, 3, 4], "fiscal": [2, 3], "end": [2, 3], "septemb": [2, 3], "28": [2, 3], "2024": [2, 3, 4], "sec": [2, 3, 4], "detail": [2, 3, 4], "busi": 2, "well": [2, 4], "season": 2, "issuer": 2, "california": [2, 4], "manufactur": 2, "smartphon": 2, "person": [2, 4], "tablet": 2, "wearabl": [2, 4], "accessori": 2, "innov": [2, 3], "report": [2, 4], "condit": 2, "secur": [2, 3], "exchang": [2, 3], "commiss": [2, 3], "outlin": 2, "factor": [2, 3], "futur": 2, "invdestacksmeticsisdict": 2, "setispect": 2, "20cyan": 2, "evaluationseld": 2, "anvis": 2, "droitent": 2, "discernminerv": 2, "versbobprefvers": 2, "vo\u8be5": 2, "option\u548c": 2, "meio": 2, "forecast": 2, "\u0432\u0440\u0435\u043ccisco": 2, "dellaischenpoihscap": 2, "geme": 2, "gettim": 2, "comprehens": [2, 3, 4], "simpl": [2, 3], "reveal": 2, "dramat": [2, 4], "alter": 2, "wai": [2, 3, 4], "systemat": 2, "At": 2, "too": [2, 3], "rigid": 2, "vari": 2, "less": 2, "wildli": 2, "approach": [2, 3, 4], "inadequ": 2, "implic": 2, "profound": 2, "one": [2, 3, 4], "an": [2, 3, 4], "radic": 2, "reli": [2, 4], "grappl": 2, "probabilist": 2, "lower": [2, 4], "seem": [2, 4], "safer": 2, "don": [2, 3, 4], "elimin": 2, "underli": [2, 4], "uncertainti": 2, "mere": 2, "mask": 2, "highlight": [2, 3, 4], "paradigm": 2, "aspect": [2, 3, 4], "beyond": 2, "present": [2, 3, 4], "anoth": 2, "fascin": 2, "abil": [2, 4], "spontan": 2, "aris": 2, "scale": [2, 4], "up": [2, 3, 4], "size": [2, 4], "answer": [2, 3, 4], "reason": [2, 3, 4], "aren": 2, "explicitli": 2, "grow": [2, 4], "train": 2, "code": [2, 4], "against": 2, "specif": [2, 3], "wtb": 2, "22": 2, "fig": [2, 3, 4], "4": 2, "relationship": 2, "between": [2, 3], "linear": 2, "below": [2, 3], "certain": [2, 3, 4], "threshold": 2, "absent": 2, "simpli": [2, 3, 4], "task": [2, 3, 4], "much": 2, "coax": 2, "them": [2, 3, 4], "out": [2, 3], "onc": [2, 3], "reach": [2, 3, 4], "point": [2, 3], "journei": 2, "suddenli": 2, "manifest": 2, "what": [2, 4], "research": [2, 3, 4], "call": [2, 3, 4], "phase": 2, "shift": 2, "inabl": 2, "unpredict": [2, 4], "stand": 2, "stark": 2, "contrast": 2, "deliber": 2, "convent": 2, "stabl": 2, "suit": 2, "defin": [2, 3, 4], "accept": 2, "criteria": 2, "contend": 2, "constantli": 2, "7b": 2, "70b": 2, "ha": [2, 4], "dynam": 2, "rethink": 2, "custom": [2, 4], "support": [2, 4], "chatbot": 2, "would": [2, 3, 4], "refund": 2, "request": [2, 3, 4], "track": 2, "verifi": [2, 4], "But": 2, "just": [2, 3, 4], "predefin": [2, 4], "convers": [2, 3, 4], "appropri": [2, 3, 4], "emot": 2, "rais": [2, 3], "weren": 2, "evolv": [2, 3], "accuraci": 2, "subject": 2, "qualiti": [2, 3, 4], "kind": 2, "account": 2, "uniqu": 2, "across": 2, "sever": [2, 3, 4], "dimens": 2, "necessirali": 2, "pre": 2, "extend": 2, "explicit": [2, 4], "usual": 2, "precis": 2, "involv": [2, 4], "resist": 2, "straightforward": [2, 3], "quantif": 2, "numer": 2, "score": [2, 4], "judgment": 2, "inher": [2, 3, 4], "human": [2, 3, 4], "depend": 2, "contamin": 2, "carefulli": [2, 4], "craft": [2, 4], "case": [2, 3, 4], "expect": [2, 3, 4], "e": [2, 3, 4], "g": [2, 3, 4], "unit": [2, 3], "massiv": 2, "internet": 2, "alreadi": 2, "seen": 2, "memor": 2, "artifici": 2, "inflat": 2, "curat": 2, "truli": 2, "unseen": 2, "rigor": 2, "cross": 2, "benchmark": 2, "evolut": 2, "continu": [2, 3, 4], "advanc": [2, 3], "longitudin": 2, "comparison": [2, 4], "obsolet": 2, "older": 2, "autom": [2, 4], "demand": 2, "oversight": 2, "bias": [2, 4], "through": [2, 3, 4], "annot": 2, "review": [2, 4], "process": [2, 3, 4], "mostli": 2, "distinct": 2, "versu": 2, "latter": 2, "foundat": [2, 3], "purpos": [2, 4], "former": 2, "tailor": 2, "particular": [2, 4], "combin": [2, 3], "associ": [2, 3], "solv": [2, 4], "That": [2, 4], "differenti": 2, "becaus": 2, "chang": 2, "scope": [2, 3], "includ": [2, 3, 4], "thing": [2, 4], "meet": 2, "close": 2, "ti": 2, "align": [2, 3, 4], "object": [2, 4], "A": [2, 3], "great": [2, 4], "doesn": [2, 3, 4], "three": 2, "app": 2, "imag": 2, "audio": 2, "etc": [2, 4], "outcom": 2, "truth": 2, "option": [2, 3, 4], "standard": 2, "repres": [2, 4], "palm": 2, "individu": [2, 3], "target": [2, 4], "appli": [2, 3], "note": [2, 3, 4], "further": [2, 3, 4], "see": [2, 4], "avail": [2, 3, 4], "addition": 2, "shown": 2, "fix": [2, 3], "default": [2, 4], "quantifi": 2, "easi": [2, 3], "two": [2, 3, 4], "addit": [2, 3], "quantit": 2, "among": 2, "per": [2, 3], "aggreg": 2, "heavili": 2, "plan": 2, "pertain": 2, "previous": [2, 3], "discuss": 2, "doe": [2, 3, 4], "cover": [2, 3], "edg": 2, "good": [2, 4], "bia": 2, "separ": [2, 3], "synthet": 2, "updat": [2, 3], "reflect": 2, "post": 2, "launch": 2, "fair": 2, "timeout": 2, "variat": 2, "maxim": 2, "valu": [2, 3, 4], "success": 2, "inter": 2, "rater": 2, "scalabl": [2, 3], "weight": 2, "rel": 2, "priorit": 2, "normal": [2, 4], "absolut": [2, 4], "fail": 2, "confid": [2, 4], "interv": 2, "veri": 2, "tier": 2, "hollist": 2, "built": [2, 4], "mind": 2, "x": 2, "fast": 2, "promot": 2, "rapid": 2, "experiment": [2, 4], "iter": [2, 3], "final": [2, 3, 4], "keep": [2, 3], "itself": 2, "confirm": 2, "vi": 2, "jason": 2, "wei": 2, "yi": [2, 4], "tai": 2, "rishi": 2, "bommasani": 2, "colin": 2, "raffel": 2, "barret": 2, "zoph": 2, "sebastian": 2, "borgeaud": 2, "dani": 2, "yogatama": 2, "maarten": 2, "bosma": 2, "denni": 2, "zhou": 2, "donald": 2, "metzler": 2, "ed": 2, "h": 2, "chi": 2, "tatsunori": 2, "hashimoto": 2, "oriol": 2, "vinyal": 2, "perci": 2, "liang": 2, "jeff": 2, "dean": 2, "william": 2, "fedu": 2, "2022": 2, "url": [2, 4], "arxiv": [2, 4], "org": [2, 4], "ab": [2, 4], "2206": 2, "07682": 2, "onli": [3, 4], "those": [3, 4], "who": 3, "go": [3, 4], "far": 3, "possibli": 3, "find": [3, 4], "eliot": 3, "short": 3, "charact": 3, "word": [3, 4], "english": 3, "rule": 3, "thumb": 3, "\u00be": 3, "max_output_token": 3, "modern": 3, "maximum": 3, "tabl": 3, "show": [3, 4], "4096": 3, "16384": 3, "contrari": 3, "might": [3, 4], "summar": 3, "surpass": 3, "instead": [3, 4], "stop": 3, "mid": 3, "sentenc": [3, 4], "truncat": 3, "max_input_token": 3, "input_cost_per_token": 3, "output_cost_per_token": 3, "meta": 3, "llama3": 3, "11b": 3, "instruct": [3, 4], "v1": 3, "128000": 3, "5e": 3, "7": 3, "sonnet": 3, "20241022": 3, "8192": 3, "200000": 3, "3e": 3, "6": [3, 4], "0613": 3, "6e": 3, "04": 3, "09": 3, "1e": 3, "4o": [3, 4], "mini": [3, 4], "gemini": 3, "flash": 3, "002": 3, "1048576": 3, "8": 3, "pro": 3, "2097152": 3, "05e": 3, "pose": [3, 4], "incomplet": 3, "extens": [3, 4], "articl": 3, "abruptli": 3, "cut": 3, "off": [3, 4], "due": 3, "disrupt": 3, "flow": 3, "shallow": 3, "thorough": 3, "receiv": 3, "partial": 3, "dissatisfact": 3, "frustrat": 3, "especi": [3, 4], "true": [3, 4], "educ": 3, "tool": 3, "creation": 3, "address": [3, 4], "feasibl": 3, "effici": [3, 4], "section": [3, 4], "split": 3, "focus": [3, 4], "previou": 3, "For": [3, 4], "analyz": 3, "10k": 3, "schemat": 3, "represent": 3, "diagram": 3, "charactertextsplitt": 3, "tiktoken": 3, "sequenti": 3, "chain": 3, "newlin": 3, "There": 3, "situat": 3, "broadli": [3, 4], "decid": 3, "number": [3, 4], "whether": [3, 4], "overlap": 3, "want": 3, "some": [3, 4], "sure": 3, "semant": 3, "lost": 3, "path": 3, "mani": [3, 4], "computation": 3, "cheap": 3, "sinc": 3, "speciali": 3, "awar": 3, "advantag": [3, 4], "sophist": 3, "embed": 3, "level": [3, 4], "naiv": 3, "period": 3, "nltk": 3, "spaci": 3, "recurs": 3, "divid": 3, "hierarch": 3, "manner": [3, 4], "class": [3, 4], "extract": [3, 4], "your": [3, 4], "made": [3, 4], "talk": 3, "theme": 3, "topic": [3, 4], "langchain": 3, "count": 3, "get_chunk": 3, "chunk_siz": 3, "chunk_overlap": 3, "specifi": [3, 4], "arg": 3, "langchain_text_splitt": 3, "text_splitt": 3, "from_tiktoken_encod": 3, "split_text": 3, "serv": [3, 4], "persona": 3, "assum": 3, "background": 3, "action": 3, "input_text": 3, "actual": [3, 4], "langchain_cor": [3, 4], "prompttempl": 3, "get_base_prompt_templ": 3, "base_prompt": 3, "from_templ": 3, "llmchain": 3, "togeth": 3, "parser": [3, 4], "output_pars": 3, "stroutputpars": 3, "langchain_commun": 3, "chat_model": 3, "chatlitellm": 3, "get_llm_chain": 3, "prompt_templ": [3, 4], "instanc": 3, "name": [3, 4], "llm_chain": [3, 4], "api_key_label": 3, "upper": 3, "_api_kei": 3, "api_kei": 3, "get_dynamic_prompt_templ": 3, "dict": 3, "get_dynamic_prompt_param": 3, "prompt_param": 3, "part_idx": 3, "total_part": 3, "chat_context": 3, "origin": [3, 4], "part": [3, 4], "total": [3, 4], "param": 3, "dynamic_prompt_param": 3, "copi": 3, "save": 3, "introduct": 3, "yet": 3, "elif": 3, "last": [3, 4], "second": 3, "main": [3, 4], "given": [3, 4], "els": 3, "merg": 3, "concaten": 3, "generate_report": 3, "input_cont": 3, "llm_model_nam": 3, "report_part": 3, "num_part": 3, "len": 3, "dinam": 3, "priovid": 3, "enumer": 3, "invok": [3, 4], "cummul": 3, "n": 3, "join": 3, "sampl": [3, 4], "max_chunk_s": 3, "max_chunk_overlap": 3, "latest": [3, 4], "analyst": 3, "readabl": 3, "move": 3, "insight": [3, 4], "local": [3, 4], "apple_report": 3, "w": 3, "300": 3, "posit": [3, 4], "disclos": 3, "state": 3, "identifi": [3, 4], "luation": 3, "term": 3, "oblig": 3, "cash": 3, "disciplin": 3, "deeper": 3, "granular": 3, "assess": 3, "few": [3, 4], "interest": [3, 4], "high": 3, "smooth": 3, "upon": 3, "head": 3, "subhead": 3, "clariti": 3, "document": [3, 4], "adher": [3, 4], "variou": [3, 4], "revenu": [3, 4], "segment": [3, 4], "profit": [3, 4], "liquid": 3, "capit": [3, 4], "resourc": 3, "inclus": 3, "despit": [3, 4], "depth": 3, "wide": [3, 4], "expert": [3, 4], "nuanc": [3, 4], "overlook": 3, "mitig": [3, 4], "fit": 3, "within": [3, 4], "altern": 3, "meaning": [3, 4], "preserv": 3, "easier": [3, 4], "preprocess": 3, "significantli": 3, "enhanc": 3, "own": 3, "introduc": [3, 4], "layer": [3, 4], "necessit": 3, "meticul": 3, "retain": 3, "necessari": 3, "seamlessli": 3, "circumv": 3, "therebi": 3, "contribut": 3, "overal": [3, 4], "escal": 3, "frequenc": 3, "volum": 3, "bottleneck": 3, "latenc": 3, "reduc": 3, "prepar": 3, "friendli": 3, "improv": [3, 4], "mustafa": 3, "suleyman": 3, "infinit": 3, "memori": 3, "convei": 3, "amount": [3, 4], "fewer": 3, "compress": 3, "progress": 3, "essenti": 3, "condens": 3, "adapt": 3, "adjust": [3, 4], "flexibl": [3, 4], "constrain": [3, 4], "collect": 3, "versatil": 3, "also": [3, 4], "drive": 3, "grace": 3, "fallback": 3, "empow": 3, "crucial": [3, 4], "stai": 3, "full": [3, 4], "splitter": 3, "freedom": 4, "thrive": 4, "julia": 4, "b": 4, "cameron": 4, "excel": 4, "easili": 4, "databas": 4, "sometim": 4, "unstructur": 4, "notebook": 4, "overrid": 4, "response_cont": 4, "wow": 4, "lot": 4, "breakdown": 4, "stream": 4, "portfolio": 4, "iphon": 4, "mac": 4, "ipad": 4, "impress": 4, "trend": 4, "notic": 4, "trillion": 4, "march": 4, "29": 4, "huge": 4, "investor": 4, "definit": 4, "figur": 4, "compli": 4, "regul": 4, "ye": 4, "accur": 4, "date": 4, "transpar": 4, "industri": 4, "serious": 4, "is_json": 4, "myjson": 4, "except": 4, "valueerror": 4, "fals": 4, "clearli": 4, "obtain": 4, "deviat": 4, "lack": 4, "correct": 4, "emploi": 4, "schema": 4, "guidanc": 4, "blueprint": 4, "achiev": 4, "pars": 4, "nativ": 4, "regular": 4, "express": 4, "dedic": 4, "json_format": 4, "person1": 4, "alic": 4, "q1": 4, "20": 4, "person2": 4, "bob": 4, "net": 4, "margin": 4, "materi": 4, "though": 4, "suffici": 4, "nest": 4, "restrict": 4, "todai": 4, "programmat": 4, "depict": 4, "thellm": 4, "conceptu": 4, "via": 4, "unend": 4, "whitespac": 4, "until": 4, "forget": 4, "throw": 4, "string": 4, "appear": 4, "somewher": 4, "response_format": 4, "json_object": 4, "approxim": 4, "628": 4, "553": 4, "000": 4, "held": 4, "affili": 4, "sheer": 4, "mention": 4, "15": 4, "115": 4, "823": 4, "stock": 4, "outstand": 4, "octob": 4, "18": 4, "circul": 4, "plai": 4, "googl": 4, "vertex": 4, "match": 4, "releas": 4, "suppli": 4, "so": 4, "worri": 4, "omit": 4, "enum": 4, "benefit": 4, "No": 4, "incorrectli": 4, "refus": 4, "simpler": 4, "strongli": 4, "entiti": 4, "ii": 4, "place": 4, "doc": 4, "07": 4, "08": 4, "06": 4, "later": 4, "basemodel": 4, "secextract": 4, "mentioned_ent": 4, "mentioned_plac": 4, "extract_from_sec_fil": 4, "sec_filing_text": 4, "beta": 4, "explan": 4, "hint": 4, "send": 4, "attribut": 4, "conform": 4, "prompt_extract": 4, "convert": 4, "sec_extract": 4, "nasdaq": 4, "llc": 4, "washington": 4, "d": 4, "c": 4, "cupertino": 4, "wa": 4, "usabl": 4, "beg": 4, "simplifi": 4, "abstract": 4, "with_structured_output": 4, "directli": 4, "descript": 4, "runnabl": 4, "correspond": 4, "typeddict": 4, "dictionari": 4, "qu": 4, "langchain_openai": 4, "chatopenai": 4, "chatprompttempl": 4, "extract_from_sec_filing_langchain": 4, "structured_llm": 4, "from_messag": 4, "sec_extraction_langchain": 4, "found": 4, "out24": 4, "under": 4, "hood": 4, "logit": 4, "raw": 4, "neural": 4, "network": 4, "prefer": 4, "fine": 4, "grain": 4, "regex": 4, "major": 4, "proprietari": 4, "vllm": 4, "qwen2": 4, "5b": 4, "lightweight": 4, "alibaba": 4, "cloud": 4, "strong": 4, "small": 4, "being": 4, "enough": 4, "hug": 4, "qwen": 4, "top": 4, "100": 4, "sentiment": 4, "label": 4, "assist": 4, "special": 4, "neg": 4, "unexpect": 4, "malform": 4, "back": 4, "pass": 4, "modul": 4, "sec_extraction_outlin": 4, "zsp": 4, "zicorp": 4, "phenomenon": 4, "were": 4, "tune": 4, "simplic": 4, "v": 4, "greater": 4, "steeper": 4, "curv": 4, "quit": 4, "wrapper": 4, "wider": 4, "structuredoutputpars": 4, "done": 4, "know": 4, "exactli": 4, "field": 4, "element": 4, "chanc": 4, "connect": 4, "highli": 4, "encourag": 4, "correctli": 4, "downstream": 4, "xml": 4, "area": 4, "me": 4, "speak": 4, "freeli": 4, "studi": 4, "twt": 4, "24": 4, "suggest": 4, "impos": 4, "intens": 4, "evid": 4, "aid24": 4, "degrad": 4, "decod": 4, "multi": 4, "step": 4, "thought": 4, "overli": 4, "strict": 4, "hinder": 4, "outweigh": 4, "team": 4, "rebutt": 4, "argu": 4, "reproduct": 4, "paper": 4, "paint": 4, "pictur": 4, "publicli": 4, "independ": 4, "verif": 4, "dot24": 4, "flaw": 4, "believ": 4, "led": 4, "inaccur": 4, "classif": 4, "reconcil": 4, "uneven": 4, "didn": 4, "properli": 4, "conflat": 4, "argument": 4, "favor": 4, "trade": 4, "surround": 4, "ideal": 4, "drawback": 4, "unlock": 4, "valuabl": 4, "thank": 4, "pfiffer": 4, "hi": 4, "feedback": 4, "aider": 4, "14": 4, "html": 4, "dottxt": 4, "sai": 4, "demo": 4, "tree": 4, "safe": 4, "io": 4, "zhi": 4, "rui": 4, "tam": 4, "cheng": 4, "kuang": 4, "wu": 4, "lin": 4, "tsai": 4, "chieh": 4, "yen": 4, "hung": 4, "lee": 4, "yun": 4, "nung": 4, "chen": 4, "2408": 4, "02442": 4}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"introduct": [0, 1], "content": [0, 2, 3, 4], "core": 0, "challeng": [0, 2, 4], "we": 0, "ll": 0, "address": 0, "A": [0, 1, 4], "practic": [0, 4], "approach": 0, "note": 0, "perspect": 0, "who": 0, "thi": 0, "book": 0, "i": 0, "For": 0, "outcom": 0, "prerequisit": 0, "set": 0, "up": 0, "your": 0, "environ": 0, "1": [0, 1, 3], "python": 0, "setup": 0, "2": [0, 1, 3], "api": [0, 4], "kei": [0, 2, 3], "configur": 0, "3": [0, 1, 3], "code": [0, 1], "repositori": 0, "troubleshoot": 0, "common": 0, "issu": 0, "tame": 1, "larg": 1, "languag": 1, "model": [1, 3, 4], "chapter": 1, "non": [1, 2], "determin": 1, "eval": [1, 2], "wrestl": [1, 4], "structur": [1, 4], "output": [1, 3, 4], "4": [1, 3], "hallucin": 1, "The": [1, 2, 4], "realiti": 1, "gap": 1, "5": 1, "cost": [1, 3], "factor": 1, "6": 1, "safeti": 1, "concern": 1, "7": 1, "size": [1, 3], "length": [1, 3], "limit": [1, 3], "8": 1, "break": 1, "free": 1, "from": 1, "cloud": 1, "provid": [1, 4], "appendix": 1, "exampl": [1, 2, 3, 4], "b": 1, "tool": [1, 2, 4], "resourc": 1, "evalu": 2, "llm": [2, 4], "base": [2, 3], "applic": 2, "determinist": 2, "machin": 2, "temperatur": 2, "sampl": 2, "spectrum": 2, "emerg": 2, "properti": 2, "problem": [2, 3, 4], "statement": [2, 3, 4], "tradit": 2, "softwar": 2, "v": 2, "design": 2, "conceptu": 2, "overview": 2, "consider": [2, 3], "compon": 2, "dataset": 2, "metric": 2, "layer": 2, "assess": 2, "leaderboard": 2, "rank": 2, "refer": [2, 3, 4], "what": 3, "ar": 3, "token": 3, "comparison": 3, "across": 3, "chunk": 3, "contextu": 3, "link": 3, "gener": [3, 4], "long": 3, "form": 3, "step": 3, "write": 3, "prompt": [3, 4], "templat": 3, "construct": 3, "dynam": 3, "paramet": 3, "report": 3, "usag": 3, "discuss": [3, 4], "implic": 3, "futur": 3, "conclus": [3, 4], "solut": 4, "strategi": 4, "techniqu": 4, "One": 4, "shot": 4, "specif": 4, "json": 4, "mode": 4, "langchain": 4, "outlin": 4, "simpl": 4, "multipl": 4, "choic": 4, "pydant": 4, "compar": 4, "best": 4, "ongo": 4, "debat": 4, "acknowledg": 4}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 57}, "alltitles": {"Introduction": [[0, "introduction"]], "Contents": [[0, "contents"], [2, "contents"], [3, "contents"], [4, "contents"]], "Core Challenges We\u2019ll Address": [[0, "core-challenges-we-ll-address"]], "A Practical Approach": [[0, "a-practical-approach"]], "A Note on Perspective": [[0, "a-note-on-perspective"]], "Who This Book Is For": [[0, "who-this-book-is-for"]], "Outcomes": [[0, "outcomes"]], "Prerequisites": [[0, "prerequisites"]], "Setting Up Your Environment": [[0, "setting-up-your-environment"]], "1. Python Environment Setup": [[0, "python-environment-setup"]], "2. API Keys Configuration": [[0, "api-keys-configuration"]], "3. Code Repository": [[0, "code-repository"]], "Troubleshooting Common Issues": [[0, "troubleshooting-common-issues"]], "Taming Large Language Models": [[1, "taming-large-language-models"]], "Chapter 1: Introduction": [[1, "chapter-1-introduction"]], "Chapter 2: Non-determinism & Evals": [[1, "chapter-2-non-determinism-evals"]], "Chapter 3: Wrestling with Structured Output": [[1, "chapter-3-wrestling-with-structured-output"]], "Chapter 4: Hallucination: The Reality Gap": [[1, "chapter-4-hallucination-the-reality-gap"]], "Chapter 5: The Cost Factor": [[1, "chapter-5-the-cost-factor"]], "Chapter 6: Safety Concerns": [[1, "chapter-6-safety-concerns"]], "Chapter 7: Size and Length Limitations": [[1, "chapter-7-size-and-length-limitations"]], "Chapter 8: Breaking Free from Cloud Providers": [[1, "chapter-8-breaking-free-from-cloud-providers"]], "Appendix A: Code Examples": [[1, "appendix-a-code-examples"]], "Appendix B: Tools and Resources": [[1, "appendix-b-tools-and-resources"]], "Challenges of Evaluating LLM-based Applications": [[2, "challenges-of-evaluating-llm-based-applications"]], "Non-Deterministic Machines": [[2, "non-deterministic-machines"]], "Temperature and Sampling": [[2, "temperature-and-sampling"]], "The Temperature Spectrum": [[2, "the-temperature-spectrum"]], "Emerging Properties": [[2, "emerging-properties"]], "Problem Statement": [[2, "problem-statement"], [3, "problem-statement"], [4, "problem-statement"]], "Evals of Traditional Software vs LLMs": [[2, "evals-table"]], "Evals Design": [[2, "evals-design"]], "Conceptual Overview": [[2, "conceptual-overview"]], "Design Considerations": [[2, "design-considerations"]], "Key Components": [[2, "key-components"]], "Examples: The Dataset": [[2, "examples-the-dataset"]], "Metrics: The Metrics Layer": [[2, "metrics-the-metrics-layer"]], "Evaluator: The Assessment Layer": [[2, "evaluator-the-assessment-layer"]], "Leaderboard: The Ranking Layer": [[2, "leaderboard-the-ranking-layer"]], "Tools": [[2, "tools"]], "References": [[2, "references"], [3, "references"], [4, "references"]], "Output Size Limitations": [[3, "output-size-limitations"]], "What are Token Limits?": [[3, "what-are-token-limits"]], "Token Cost and Length Limitation Comparison Across Key Models": [[3, "token-cost-table"]], "Content Chunking with Contextual Linking": [[3, "content-chunking-with-contextual-linking"]], "Generating long-form content": [[3, "generating-long-form-content"]], "Step 1: Chunking the Content": [[3, "step-1-chunking-the-content"]], "Step 2: Writing the Base Prompt Template": [[3, "step-2-writing-the-base-prompt-template"]], "Step 3: Constructing Dynamic Prompt Parameters": [[3, "step-3-constructing-dynamic-prompt-parameters"]], "Step 4: Generating the Report": [[3, "step-4-generating-the-report"]], "Example Usage": [[3, "example-usage"]], "Discussion": [[3, "discussion"], [4, "discussion"]], "Implications": [[3, "implications"]], "Future Considerations": [[3, "future-considerations"]], "Conclusion": [[3, "conclusion"], [4, "conclusion"]], "Wrestling with Structured Output": [[4, "wrestling-with-structured-output"]], "The Structured Output Challenges": [[4, "the-structured-output-challenges"]], "Solutions": [[4, "solutions"]], "Strategies": [[4, "strategies"]], "Techniques and Tools": [[4, "techniques-and-tools"]], "One-Shot Prompts": [[4, "one-shot-prompts"]], "Structured Output with Provider-Specific APIs": [[4, "structured-output-with-provider-specific-apis"]], "JSON Mode": [[4, "json-mode"]], "LangChain": [[4, "langchain"]], "Outlines": [[4, "outlines"]], "A Simple Example: Multiple Choice Generation": [[4, "a-simple-example-multiple-choice-generation"]], "Pydantic model": [[4, "pydantic-model"]], "Comparing Solutions": [[4, "comparing-solutions"]], "Best Practices": [[4, "best-practices"]], "Ongoing Debate on LLMs Structured Output": [[4, "ongoing-debate-on-llms-structured-output"]], "Acknowledgements": [[4, "acknowledgements"]]}, "indexentries": {}})
\ No newline at end of file
diff --git a/tamingllms/_build/jupyter_execute/markdown/intro.ipynb b/tamingllms/_build/jupyter_execute/markdown/intro.ipynb
index 1bfecae..dd250f5 100644
--- a/tamingllms/_build/jupyter_execute/markdown/intro.ipynb
+++ b/tamingllms/_build/jupyter_execute/markdown/intro.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "ba9462dd",
+ "id": "d255ae16",
"metadata": {},
"source": [
"(intro)=\n",
diff --git a/tamingllms/_build/jupyter_execute/notebooks/structured_output.ipynb b/tamingllms/_build/jupyter_execute/notebooks/structured_output.ipynb
index 5acf0e2..5c15d06 100644
--- a/tamingllms/_build/jupyter_execute/notebooks/structured_output.ipynb
+++ b/tamingllms/_build/jupyter_execute/notebooks/structured_output.ipynb
@@ -290,12 +290,12 @@
"\n",
"Provider-specific APIs can offer ways to handle those challenges. We will explore two approaches here using OpenAI's API:\n",
"\n",
- "* **JSON Mode**: Some LLM APIs offer features specifically designed for generating JSON output.\n",
+ "* **JSON Mode**: Most LLM APIs today offer features specifically designed for generating JSON output.\n",
"* **Structured Outputs**: Some LLM APIs offer features specifically designed for generating structured outputs with type safety.\n",
"\n",
"#### JSON Mode\n",
"\n",
- "JSON mode is a feature provided by some LLM APIs, such as OpenAI's, that allows the model to generate output in JSON format. This is particularly useful when you need structured data as a result, such as when parsing the output programmatically or integrating it with other systems that require JSON input. As depicted in {numref}`json-mode`, JSON mode is implemented by instructing theLLM model to use JSON as response format and optionally defining a target schema.\n",
+ "JSON mode is a feature provided by most LLM API providers, such as OpenAI, that allows the model to generate output in JSON format. This is particularly useful when you need structured data as a result, such as when parsing the output programmatically or integrating it with other systems that require JSON input. As depicted in {numref}`json-mode`, JSON mode is implemented by instructing theLLM model to use JSON as response format and optionally defining a target schema.\n",
"\n",
"\n",
"```{figure} ../_static/structured_output/json.svg\n",
@@ -610,7 +610,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Outlines supports the OpenAI API. However, one of its key advantages is the ability to ensure structured output for Open Source models, which often lack such guarantees by default."
+ "Outlines can support major proprietary LLM APIs (e.g. OpenAI's via vLLM). However, one of its key advantages is the ability to ensure structured output for Open Source models, which often lack such guarantees by default."
]
},
{
@@ -674,6 +674,13 @@
"print(answer)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this simple example, we use Outlines' `choice` method to constrain the model output to a predefined set of options (\"Positive\" or \"Negative\"). This ensures the model can only return one of these values, avoiding any unexpected or malformed responses.\n"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -735,11 +742,9 @@
"\n",
"* **Simplicity vs. Control**: One-shot prompts are simple but offer limited control. `LangChain`, and Outlines provide greater control but might have a steeper learning curve though quite manageable.\n",
"\n",
- "* **Native LLM Support**: `with_structured_output` in LangChain relies on the underlying LLM having built-in support for structured output APIs, i.e. LangChain is just a wrapper around the LLM's structured output API. Other methods, like parsing or using Outlines, are more broadly applicable with the fomer enabling a wider range of Open Source models.\n",
- "\n",
- "* **Flexibility**: Outlines and LangChain's `StructuredOutputParser` offer the most flexibility for defining custom output structures.\n",
+ "* **Native LLM Support**: `with_structured_output` in LangChain relies on the underlying LLM having built-in support for structured output APIs, i.e. LangChain is a wrapper around the underlying LLM's structured output API. Outlines, on the other hand, is more broadly applicable enabling a wider range of Open Source models.\n",
"\n",
- "* **Performance**: Outlines' may offer performance benefits as it introduces no overhead during inference a feature not necessiraly shared by other methods."
+ "* **Flexibility**: Outlines and LangChain's `StructuredOutputParser` offer the most flexibility for defining custom output structures."
]
},
{
@@ -763,11 +768,11 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "### Ongoing Debate on Structured Output from LLMs\n",
+ "### Ongoing Debate on LLMs Structured Output\n",
"\n",
"The use of structured output, like JSON or XML, for Large Language Models (LLMs) is a developing area. While structured output offers clear benefits in parsing, robustness, and integration, there is growing debate on whether it also potentially comes at the cost of reasoning abilities. \n",
"\n",
- "Recent research {cite}`tam2024letspeakfreelystudy` suggests that imposing format restrictions on LLMs might impact their performance, particularly in reasoning-intensive tasks. Further evidence {cite}`aider2024codejson` suggests LLMs may produce lower quality code if they’re asked to return it as part of a structured JSON response:\n",
+ "Recent research \"Let Me Speak Freely? A Study on the Impact of Format Restrictions on Performance of Large Language Models\" {cite}`tam2024letspeakfreelystudy` suggests that imposing format restrictions on LLMs might impact their performance, particularly in reasoning-intensive tasks. Further evidence {cite}`aider2024codejson` suggests LLMs may produce lower quality code if they’re asked to return it as part of a structured JSON response, in particular:\n",
"\n",
"* **Potential performance degradation:** Enforcing structured output, especially through constrained decoding methods like JSON-mode, can negatively impact an LLM's reasoning abilities. This is particularly evident in tasks that require multi-step reasoning or complex thought processes.\n",
"\n",
@@ -775,7 +780,7 @@
"\n",
"* **Increased complexity in prompt engineering:** Crafting prompts that effectively guide LLMs to generate structured outputs while maintaining performance can be challenging. It often requires careful consideration of the schema, the task instructions, and the desired level of detail in the response.\n",
"\n",
- "On the other hand, those findings are not without criticism. The .txt team challenges the findings of the paper \"Let Me Speak Freely? A Study on the Impact of Format Restrictions on Performance of Large Language Models\" {cite}`tam2024letspeakfreelystudy`. The paper claims that structured output formats, like JSON, negatively affect the performance of LLMs, particularly when it comes to reasoning. The rebuttal argues that **structured generation, when done correctly, actually *improves* performance**.\n",
+ "On the other hand, those findings are not without criticism. The .txt team challenges the work of {cite}`tam2024letspeakfreelystudy`. The rebuttal argues that **structured generation, when done correctly, actually *improves* performance**.\n",
"\n",
"\n",
"```{figure} ../_static/structured_output/rebuttal.png\n",
@@ -787,6 +792,7 @@
"---\n",
"Structured vs Unstructured Results by .txt team.\n",
"```\n",
+ "\n",
"The .txt team presents compelling evidence through their reproduction of the paper's experiments. While their unstructured results align with the original paper's findings, their structured results paint a dramatically different picture - demonstrating that structured generation actually improves performance (see {numref}`structured_vs_unstructured`). The team has made their experimental notebooks publicly available on GitHub for independent verification {cite}`dottxt2024demos`.\n",
"\n",
"\n",
@@ -797,11 +803,11 @@
"* The prompts used for structured generation, particularly in JSON-mode, didn't provide the LLM with sufficient information to properly complete the task. \n",
"* The paper conflates \"structured generation\" with \"JSON-mode\", when they are not the same thing. \n",
"\n",
- "It is important to note that, as with any rebuttal, .txt presents a specific perspective on the issue of structured output from LLMs. While their findings suggest potential benefits to structured generation, further research and exploration are needed to comprehensively understand the nuances and trade-offs involved in using structured output for various LLM tasks and applications.\n",
+ "It is important to note that while .txt provides a compelling and verifiable argument in favor of (proper) structured output generation in LLMs further research and exploration are needed to comprehensively understand the nuances and trade-offs involved in using structured output for various LLM tasks and applications.\n",
"\n",
"In summary, the debate surrounding structured output highlights the ongoing challenges in balancing LLM capabilities with real-world application requirements. While structured outputs offer clear benefits in parsing, robustness, and integration, their potential impact on performance, particularly in reasoning tasks is a topic of ongoing debate. \n",
"\n",
- "The ideal approach likely involves a nuanced strategy that considers the specific task, the desired level of structure, and the available LLM capabilities. Further research and development efforts are needed to mitigate the potential drawbacks of structured output and unlock the full potential of LLMs for a wider range of applications. \n"
+ "The ideal approach likely involves a nuanced strategy that considers the specific task, the desired level of structure, and the available LLM capabilities. Further research and development efforts are needed to mitigate potential drawbacks and unlock the full potential of LLMs for a wider range of applications. \n"
]
},
{
@@ -810,7 +816,11 @@
"source": [
"## Conclusion\n",
"\n",
- "Extracting structured output from LLMs is crucial for integrating them into real-world applications. By understanding the challenges and employing appropriate strategies and tools, developers can improve the reliability and usability of LLM-powered systems, unlocking their potential to automate complex tasks and generate valuable insights. \n"
+ "Extracting structured output from LLMs is crucial for integrating them into real-world applications. By understanding the challenges and employing appropriate strategies and tools, developers can improve the reliability and usability of LLM-powered systems, unlocking their potential to automate complex tasks and generate valuable insights. \n",
+ "\n",
+ "## Acknowledgements\n",
+ "\n",
+ "We would like to thank Cameron Pfiffer from the .txt team for his insightful review and feedback.\n"
]
},
{
diff --git a/tamingllms/notebooks/structured_output.ipynb b/tamingllms/notebooks/structured_output.ipynb
index a998abe..1e382c4 100644
--- a/tamingllms/notebooks/structured_output.ipynb
+++ b/tamingllms/notebooks/structured_output.ipynb
@@ -290,12 +290,12 @@
"\n",
"Provider-specific APIs can offer ways to handle those challenges. We will explore two approaches here using OpenAI's API:\n",
"\n",
- "* **JSON Mode**: Some LLM APIs offer features specifically designed for generating JSON output.\n",
+ "* **JSON Mode**: Most LLM APIs today offer features specifically designed for generating JSON output.\n",
"* **Structured Outputs**: Some LLM APIs offer features specifically designed for generating structured outputs with type safety.\n",
"\n",
"#### JSON Mode\n",
"\n",
- "JSON mode is a feature provided by some LLM APIs, such as OpenAI's, that allows the model to generate output in JSON format. This is particularly useful when you need structured data as a result, such as when parsing the output programmatically or integrating it with other systems that require JSON input. As depicted in {numref}`json-mode`, JSON mode is implemented by instructing theLLM model to use JSON as response format and optionally defining a target schema.\n",
+ "JSON mode is a feature provided by most LLM API providers, such as OpenAI, that allows the model to generate output in JSON format. This is particularly useful when you need structured data as a result, such as when parsing the output programmatically or integrating it with other systems that require JSON input. As depicted in {numref}`json-mode`, JSON mode is implemented by instructing theLLM model to use JSON as response format and optionally defining a target schema.\n",
"\n",
"\n",
"```{figure} ../_static/structured_output/json.svg\n",
@@ -610,7 +610,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Outlines supports the OpenAI API. However, one of its key advantages is the ability to ensure structured output for Open Source models, which often lack such guarantees by default."
+ "Outlines can support major proprietary LLM APIs (e.g. OpenAI's via vLLM). However, one of its key advantages is the ability to ensure structured output for Open Source models, which often lack such guarantees by default."
]
},
{
@@ -674,6 +674,13 @@
"print(answer)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this simple example, we use Outlines' `choice` method to constrain the model output to a predefined set of options (\"Positive\" or \"Negative\"). This ensures the model can only return one of these values, avoiding any unexpected or malformed responses.\n"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -735,11 +742,9 @@
"\n",
"* **Simplicity vs. Control**: One-shot prompts are simple but offer limited control. `LangChain`, and Outlines provide greater control but might have a steeper learning curve though quite manageable.\n",
"\n",
- "* **Native LLM Support**: `with_structured_output` in LangChain relies on the underlying LLM having built-in support for structured output APIs, i.e. LangChain is just a wrapper around the LLM's structured output API. Other methods, like parsing or using Outlines, are more broadly applicable with the fomer enabling a wider range of Open Source models.\n",
- "\n",
- "* **Flexibility**: Outlines and LangChain's `StructuredOutputParser` offer the most flexibility for defining custom output structures.\n",
+ "* **Native LLM Support**: `with_structured_output` in LangChain relies on the underlying LLM having built-in support for structured output APIs, i.e. LangChain is a wrapper around the underlying LLM's structured output API. Outlines, on the other hand, is more broadly applicable enabling a wider range of Open Source models.\n",
"\n",
- "* **Performance**: Outlines' may offer performance benefits as it introduces no overhead during inference a feature not necessiraly shared by other methods."
+ "* **Flexibility**: Outlines and LangChain's `StructuredOutputParser` offer the most flexibility for defining custom output structures."
]
},
{
@@ -763,11 +768,11 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "### Ongoing Debate on Structured Output from LLMs\n",
+ "### Ongoing Debate on LLMs Structured Output\n",
"\n",
"The use of structured output, like JSON or XML, for Large Language Models (LLMs) is a developing area. While structured output offers clear benefits in parsing, robustness, and integration, there is growing debate on whether it also potentially comes at the cost of reasoning abilities. \n",
"\n",
- "Recent research {cite}`tam2024letspeakfreelystudy` suggests that imposing format restrictions on LLMs might impact their performance, particularly in reasoning-intensive tasks. Further evidence {cite}`aider2024codejson` suggests LLMs may produce lower quality code if they’re asked to return it as part of a structured JSON response:\n",
+ "Recent research \"Let Me Speak Freely? A Study on the Impact of Format Restrictions on Performance of Large Language Models\" {cite}`tam2024letspeakfreelystudy` suggests that imposing format restrictions on LLMs might impact their performance, particularly in reasoning-intensive tasks. Further evidence {cite}`aider2024codejson` suggests LLMs may produce lower quality code if they’re asked to return it as part of a structured JSON response, in particular:\n",
"\n",
"* **Potential performance degradation:** Enforcing structured output, especially through constrained decoding methods like JSON-mode, can negatively impact an LLM's reasoning abilities. This is particularly evident in tasks that require multi-step reasoning or complex thought processes.\n",
"\n",
@@ -775,7 +780,7 @@
"\n",
"* **Increased complexity in prompt engineering:** Crafting prompts that effectively guide LLMs to generate structured outputs while maintaining performance can be challenging. It often requires careful consideration of the schema, the task instructions, and the desired level of detail in the response.\n",
"\n",
- "On the other hand, those findings are not without criticism. The .txt team challenges the findings of the paper \"Let Me Speak Freely? A Study on the Impact of Format Restrictions on Performance of Large Language Models\" {cite}`tam2024letspeakfreelystudy`. The paper claims that structured output formats, like JSON, negatively affect the performance of LLMs, particularly when it comes to reasoning. The rebuttal argues that **structured generation, when done correctly, actually *improves* performance**.\n",
+ "On the other hand, those findings are not without criticism. The .txt team challenges the work of {cite}`tam2024letspeakfreelystudy`. The rebuttal argues that **structured generation, when done correctly, actually *improves* performance**.\n",
"\n",
"\n",
"```{figure} ../_static/structured_output/rebuttal.png\n",
@@ -787,6 +792,7 @@
"---\n",
"Structured vs Unstructured Results by .txt team.\n",
"```\n",
+ "\n",
"The .txt team presents compelling evidence through their reproduction of the paper's experiments. While their unstructured results align with the original paper's findings, their structured results paint a dramatically different picture - demonstrating that structured generation actually improves performance (see {numref}`structured_vs_unstructured`). The team has made their experimental notebooks publicly available on GitHub for independent verification {cite}`dottxt2024demos`.\n",
"\n",
"\n",
@@ -797,11 +803,11 @@
"* The prompts used for structured generation, particularly in JSON-mode, didn't provide the LLM with sufficient information to properly complete the task. \n",
"* The paper conflates \"structured generation\" with \"JSON-mode\", when they are not the same thing. \n",
"\n",
- "It is important to note that, as with any rebuttal, .txt presents a specific perspective on the issue of structured output from LLMs. While their findings suggest potential benefits to structured generation, further research and exploration are needed to comprehensively understand the nuances and trade-offs involved in using structured output for various LLM tasks and applications.\n",
+ "It is important to note that while .txt provides a compelling and verifiable argument in favor of (proper) structured output generation in LLMs further research and exploration are needed to comprehensively understand the nuances and trade-offs involved in using structured output for various LLM tasks and applications.\n",
"\n",
"In summary, the debate surrounding structured output highlights the ongoing challenges in balancing LLM capabilities with real-world application requirements. While structured outputs offer clear benefits in parsing, robustness, and integration, their potential impact on performance, particularly in reasoning tasks is a topic of ongoing debate. \n",
"\n",
- "The ideal approach likely involves a nuanced strategy that considers the specific task, the desired level of structure, and the available LLM capabilities. Further research and development efforts are needed to mitigate the potential drawbacks of structured output and unlock the full potential of LLMs for a wider range of applications. \n"
+ "The ideal approach likely involves a nuanced strategy that considers the specific task, the desired level of structure, and the available LLM capabilities. Further research and development efforts are needed to mitigate potential drawbacks and unlock the full potential of LLMs for a wider range of applications. \n"
]
},
{
@@ -810,7 +816,11 @@
"source": [
"## Conclusion\n",
"\n",
- "Extracting structured output from LLMs is crucial for integrating them into real-world applications. By understanding the challenges and employing appropriate strategies and tools, developers can improve the reliability and usability of LLM-powered systems, unlocking their potential to automate complex tasks and generate valuable insights. \n"
+ "Extracting structured output from LLMs is crucial for integrating them into real-world applications. By understanding the challenges and employing appropriate strategies and tools, developers can improve the reliability and usability of LLM-powered systems, unlocking their potential to automate complex tasks and generate valuable insights. \n",
+ "\n",
+ "## Acknowledgements\n",
+ "\n",
+ "We would like to thank Cameron Pfiffer from the .txt team for his insightful review and feedback.\n"
]
},
{