-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathexperiment.py
161 lines (128 loc) · 8.33 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import streamlit as st
import groq
import os
import json
import time
os.environ["GROQ_API_KEY"] = ""
client = groq.Groq()
def make_api_call(messages, max_tokens, is_final_answer=False):
for attempt in range(3):
try:
response = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=messages,
max_tokens=max_tokens,
temperature=0.2,
response_format={"type": "json_object"}
)
return json.loads(response.choices[0].message.content)
except Exception as e:
if attempt == 2:
if is_final_answer:
return {"title": "Error", "content": f"Failed to generate final answer after 3 attempts. Error: {str(e)}"}
else:
return {"title": "Error", "content": f"Failed to generate step after 3 attempts. Error: {str(e)}", "next_action": "final_answer"}
time.sleep(1) # Wait for 1 second before retrying
def generate_response(prompt):
# System prompt incorporating dynamic Chain of Thought (CoT), reflection, and verbal reinforcement learning
messages = [
{"role": "system", "content": """You are an AI assistant that explains your reasoning step by step, incorporating dynamic Chain of Thought (CoT), reflection, and verbal reinforcement learning. Follow these instructions:
1. Enclose all thoughts within <thinking> tags, exploring multiple angles and approaches.
2. Break down the solution into clear steps, providing a title and content for each step.
3. After each step, decide if you need another step or if you're ready to give the final answer.
4. Continuously adjust your reasoning based on intermediate results and reflections, adapting your strategy as you progress.
5. Regularly evaluate your progress, being critical and honest about your reasoning process.
6. Assign a quality score between 0.0 and 1.0 to guide your approach:
- 0.8+: Continue current approach
- 0.5-0.7: Consider minor adjustments
- Below 0.5: Seriously consider backtracking and trying a different approach
7. If unsure or if your score is low, backtrack and try a different approach, explaining your decision.
8. For mathematical problems, show all work explicitly using LaTeX for formal notation and provide detailed proofs.
9. Explore multiple solutions individually if possible, comparing approaches in your reflections.
10. Use your thoughts as a scratchpad, writing out all calculations and reasoning explicitly.
11. Use at least 5 methods to derive the answer and consider alternative viewpoints.
12. Be aware of your limitations as an AI and what you can and cannot do.
After every 3 steps, perform a detailed self-reflection on your reasoning so far, considering potential biases and alternative viewpoints.
Respond in JSON format with 'title', 'content', 'next_action' (either 'continue', 'reflect', or 'final_answer'), and 'confidence' (a number between 0 and 1) keys.
Example of a valid JSON response:
```json
{
"title": "Identifying Key Information",
"content": "To begin solving this problem, we need to carefully examine the given information and identify the crucial elements that will guide our solution process. This involves...",
"next_action": "continue",
"confidence": 0.8
}```
Your goal is to demonstrate a thorough, adaptive, and self-reflective problem-solving process, emphasizing dynamic thinking and learning from your own reasoning.
"""},
{"role": "user", "content": prompt},
{"role": "assistant", "content": "Thank you! I will now think step by step following my instructions, starting at the beginning after decomposing the problem."}
]
steps = []
step_count = 1
total_thinking_time = 0
while True:
start_time = time.time()
step_data = make_api_call(messages, 750) # Increased max_tokens for each step
end_time = time.time()
thinking_time = end_time - start_time
total_thinking_time += thinking_time
# Handle the case where 'confidence' key is not present
confidence = step_data.get('confidence', 0.5) # Default to 0.5 if not present
steps.append((f"Step {step_count}: {step_data.get('title', 'Untitled Step')}",
step_data.get('content', 'No content provided'),
thinking_time,
confidence))
messages.append({"role": "assistant", "content": json.dumps(step_data)})
next_action = step_data.get('next_action', 'continue')
if next_action == 'final_answer' and step_count < 15: # Increased minimum steps to 15
messages.append({"role": "user", "content": "Please continue your analysis with at least 5 more steps before providing the final answer."})
elif next_action == 'final_answer':
break
elif next_action == 'reflect' or step_count % 3 == 0:
messages.append({"role": "user", "content": "Please perform a detailed self-reflection on your reasoning so far, considering potential biases and alternative viewpoints."})
step_count += 1
# Yield after each step for Streamlit to update
yield steps, None # We're not yielding the total time until the end
# Generate final answer
messages.append({"role": "user", "content": "Please provide a comprehensive final answer based on your reasoning above, summarizing key points and addressing any uncertainties."})
start_time = time.time()
final_data = make_api_call(messages, 750, is_final_answer=True) # Increased max_tokens for final answer
end_time = time.time()
thinking_time = end_time - start_time
total_thinking_time += thinking_time
# Handle the case where 'confidence' key is not present in final_data
final_confidence = final_data.get('confidence', 1.0)
steps.append(("Final Answer", final_data.get('content', 'No final answer provided'), thinking_time, final_confidence))
yield steps, total_thinking_time
def main():
st.set_page_config(page_title="g1 prototype", page_icon="🧠", layout="wide")
st.title("g1: Using Llama-3.1 70b on Groq to create o1-like reasoning chains with extended self-reflection")
st.markdown("""
This is an improved prototype of using prompting to create o1-like reasoning chains with extended self-reflection to improve output accuracy. It now thinks for longer periods and provides more detailed analysis. It is powered by Groq for fast reasoning steps!
Open source [repository here](https://github.com/bklieger-groq)
""")
# Text input for user query
user_query = st.text_input("Enter your query:", placeholder="e.g., What are the potential long-term effects of climate change on global agriculture?")
if user_query:
st.write("Generating response... This may take a while due to extended thinking time.")
# Create empty elements to hold the generated text and total time
response_container = st.empty()
time_container = st.empty()
# Generate and display the response
for steps, total_thinking_time in generate_response(user_query):
with response_container.container():
for i, (title, content, thinking_time, confidence) in enumerate(steps):
if title.startswith("Final Answer"):
st.markdown(f"### {title}")
st.markdown(content.replace('\n', '<br>'), unsafe_allow_html=True)
st.markdown(f"**Confidence:** {confidence:.2f}")
else:
with st.expander(title, expanded=True):
st.markdown(content.replace('\n', '<br>'), unsafe_allow_html=True)
st.markdown(f"**Confidence:** {confidence:.2f}")
st.markdown(f"**Thinking time:** {thinking_time:.2f} seconds")
# Only show total time when it's available at the end
if total_thinking_time is not None:
time_container.markdown(f"**Total thinking time: {total_thinking_time:.2f} seconds**")
if __name__ == "__main__":
main()