-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample.py
150 lines (119 loc) · 7.58 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# import dotenv
#
# dotenv.load_dotenv()
import dspy
import json
from dspy import Retry
from dspy.primitives.assertions import assert_transform_module, backtrack_handler
from ouroboros import Ouroboros
turbo = dspy.OpenAI(model='gpt-3.5-turbo', max_tokens=400)
dspy.settings.configure(lm=turbo)
# Example adapted from the DSPy assertions paper
def format_checker(choice_string):
try:
json.loads(choice_string)
return True
except:
return False
def is_correct_answer_included(answer, choice_string):
return answer in choice_string
def is_plausibility_yes(assessment_answer):
return "yes" in assessment_answer.lower()
# QuizChoiceGenerationWithAssertions is implemented using the DSPy retrying
# mechanisms. Internally what happens is that the program is run linearly (normally)
# at each suggestion place, DSPy takes over and determines if the suggestion is hit
# (like the condition is true). If it is, the code restarts at the forward function
# and the feedback is silently incorporated by changing the signiature of choice_string
# There is an internal limit to retries for a given module and once it exceeded that
# it bascially gives up
class QuizChoiceGenerationWithAssertions(dspy.Module):
def __init__(self):
super().__init__()
self.generate_choices = dspy.ChainOfThought(
"question, correct_answer, number_of_choices -> answer_choices")
# has specified instruction to guide inputs -> outputs
def forward(self, question, answer):
choice_string = self.generate_choices(question=question, correct_answer=answer,
number_of_choices="4").answer_choices
dspy.Suggest(format_checker(choice_string),
"The format of the answer choices should be in JSON format. Please revise accordingly.")
dspy.Suggest(is_correct_answer_included(answer, choice_string),
"The answer choices do not include the correct answer to the question. Please revise accordingly.")
plausibility_question = ('Are the distractors in the answer choices plausible and not easily identifiable as '
'incorrect? Reply with "Yes" or "No"')
plausibility_assessment = dspy.Predict("question, answer_choices, assessment_question -> assessment_answer"
)(question=question, answer_choices=choice_string,
assessment_question=plausibility_question)
dspy.Suggest(is_plausibility_yes(plausibility_assessment.assessment_answer), "The answer choices are not "
"plausible distractors or are "
"too easily identifiable as "
"incorrect. Please revise to "
"provide more challenging and "
"plausible distractors.")
return dspy.Prediction(choices=choice_string)
quiz_choice_with_assertion = assert_transform_module(QuizChoiceGenerationWithAssertions().map_named_predictors(Retry),
backtrack_handler)
print(quiz_choice_with_assertion(
question="How long does a FAA first-class medical certificate last for a 41 years old?",
answer="6 months"))
effect_our = Ouroboros()
# My implementation tries to replicate the behavior of the DSPy internal ad-hoc implementation
# and see how someone could use effect handlers as a way to exert more control over the error handling process
# in this example, we opt to have more flexibility by collecting the feedbacks then, determine if we really want to
# retry. And if we decide to indeed retry we could up the LM setting to a much more powerful model.
# There is a slight technical complication is that the control flow mechanism that i am powering the effect handlers
# doesn't play well with the temporary setting mechanism that DSPy currently employs
class QuizChoiceGenerationWithEffect(dspy.Module):
def __init__(self):
super().__init__()
self.generate_choices = dspy.ChainOfThought(
"question, correct_answer, number_of_choices -> answer_choices")
self.retries = 0
self.feedbacks = []
self.old_output = None
# has specified instruction to guide inputs -> outputs
def handle_feedback(self, message):
self.feedbacks.append(message)
effect_our.resume()
def handle_plausible(self, question, choice_string):
plausibility_question = ('Are the distractors in the answer choices plausible and not easily identifiable as '
'incorrect? Reply with "Yes" or "No"')
plausibility_assessment = dspy.Predict("question, answer_choices, assessment_question -> assessment_answer"
)(question=question, answer_choices=choice_string,
assessment_question=plausibility_question)
if "yes" in plausibility_assessment.assessment_answer:
self.feedbacks.append("The answer choices are not plausible distractors or are "
"too easily identifiable as incorrect. Please revise to "
"provide more challenging and plausible distractors.")
effect_our.resume()
def handle_possible_retry(self, previous_answer):
if len(self.feedbacks) == 0:
effect_our.resume()
self.retries += 1
if self.retries < 2:
# TODO Add a way to reset the configs that might have been modified
dspy.settings.configure(lm=dspy.OpenAI(model='gpt-4o', max_tokens=400))
self.generate_choices = dspy.ChainOfThought("question, correct_answer, number_of_choices,"
" old_output, feedback_on_old_output"
" -> answer_choices")
effect_our.restart()
@effect_our.handle(handlers=[("feedback", handle_feedback),
("plausibility", handle_plausible),
("possible_retry", handle_possible_retry)])
def forward(self, question, answer):
choice_string = self.generate_choices(question=question, correct_answer=answer,
number_of_choices="4", old_output=self.old_output,
feedback_on_old_output=";".join(self.feedbacks)).answer_choices
if not format_checker(choice_string):
effect_our.raise_effect("feedback", self, "The format of the answer choices should be in JSON format. "
"Please revise accordingly.")
effect_our.raise_effect("plausibility", self, question, choice_string)
effect_our.raise_effect("possible_retry", self,choice_string)
return dspy.Prediction(choices=choice_string)
print(QuizChoiceGenerationWithEffect()(
question="How long does a FAA first-class medical certificate last for a 41 years old?",
answer="6 months"))
# todo converts into a jupiter (maybe ?)
# todo integration with DSPy affects usabiliyty
# todo how could things hook up to it generically
# it seems in dspy this is done internally, but maybe the API could be made to "query" past effects