haotian-liu · mandlinsarah · Sep 3, 2024
diff --git a/llava/eval/eval_gpt_review.py b/llava/eval/eval_gpt_review.py
@@ -27,9 +27,9 @@ def get_eval(content: str, max_tokens: int):
             )
             break
         except openai.error.RateLimitError:
-            pass
+            print('RateLimitError: Retrying after sleeping')
         except Exception as e:
-            print(e)
+            print(f'Exception occurred: {e}')
         time.sleep(NUM_SECONDS_TO_SLEEP)
 
     print('success!')
@@ -64,50 +64,51 @@ def parse_score(review):
 
     ray.init()
 
-    f_q = open(os.path.expanduser(args.question))
-    f_ans1 = open(os.path.expanduser(args.answer_list[0]))
-    f_ans2 = open(os.path.expanduser(args.answer_list[1]))
-    rule_dict = json.load(open(os.path.expanduser(args.rule), 'r'))
+    with open(os.path.expanduser(args.question)) as f_q, \
+         open(os.path.expanduser(args.answer_list[0])) as f_ans1, \
+         open(os.path.expanduser(args.answer_list[1])) as f_ans2, \
+         open(os.path.expanduser(args.rule), 'r') as f_rule, \
+         open(f'{args.output}', 'w') as review_file:
 
-    review_file = open(f'{args.output}', 'w')
+        rule_dict = json.load(f_rule)
 
-    js_list = []
-    handles = []
-    idx = 0
-    for ques_js, ans1_js, ans2_js in zip(f_q, f_ans1, f_ans2):
-        # if idx == 1:
-        #     break
+        js_list = []
+        handles = []
+        idx = 0
+        for ques_js, ans1_js, ans2_js in zip(f_q, f_ans1, f_ans2):
+            # if idx == 1:
+            #     break
 
-        ques = json.loads(ques_js)
-        ans1 = json.loads(ans1_js)
-        ans2 = json.loads(ans2_js)
+            ques = json.loads(ques_js)
+            ans1 = json.loads(ans1_js)
+            ans2 = json.loads(ans2_js)
 
-        category = json.loads(ques_js)['category']
-        if category in rule_dict:
-            rule = rule_dict[category]
-        else:
-            rule = rule_dict['default']
-        prompt = rule['prompt']
-        role = rule['role']
-        content = (f'[Question]\n{ques["text"]}\n\n'
-                   f'[{role} 1]\n{ans1["text"]}\n\n[End of {role} 1]\n\n'
-                   f'[{role} 2]\n{ans2["text"]}\n\n[End of {role} 2]\n\n'
-                   f'[System]\n{prompt}\n\n')
-        js_list.append({
-            'id': idx+1,
-            'question_id': ques['question_id'],
-            'answer1_id': ans1['answer_id'],
-            'answer2_id': ans2['answer_id'],
-            'category': category})
-        idx += 1
-        handles.append(get_eval.remote(content, args.max_tokens))
-        # To avoid the rate limit set by OpenAI
-        time.sleep(NUM_SECONDS_TO_SLEEP)
+            category = json.loads(ques_js)['category']
+            if category in rule_dict:
+                rule = rule_dict[category]
+            else:
+                rule = rule_dict['default']
+            prompt = rule['prompt']
+            role = rule['role']
+            content = (f'[Question]\n{ques["text"]}\n\n'
+                       f'[{role} 1]\n{ans1["text"]}\n\n[End of {role} 1]\n\n'
+                       f'[{role} 2]\n{ans2["text"]}\n\n[End of {role} 2]\n\n'
+                       f'[System]\n{prompt}\n\n')
+            js_list.append({
+                'id': idx+1,
+                'question_id': ques['question_id'],
+                'answer1_id': ans1['answer_id'],
+                'answer2_id': ans2['answer_id'],
+                'category': category})
+            idx += 1
+            handles.append(get_eval.remote(content, args.max_tokens))
+            # To avoid the rate limit set by OpenAI
+            time.sleep(NUM_SECONDS_TO_SLEEP)
+
+        reviews = ray.get(handles)
+        for idx, review in enumerate(reviews):
+            scores = parse_score(review)
+            js_list[idx]['content'] = review
+            js_list[idx]['tuple'] = scores
+            review_file.write(json.dumps(js_list[idx]) + '\n')
 
-    reviews = ray.get(handles)
-    for idx, review in enumerate(reviews):
-        scores = parse_score(review)
-        js_list[idx]['content'] = review
-        js_list[idx]['tuple'] = scores
-        review_file.write(json.dumps(js_list[idx]) + '\n')
-    review_file.close()