-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
233 lines (179 loc) · 9.85 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
from openai import OpenAI
import json
import requests
import streamlit as st
import re
from datetime import date
import time
## OPENAI_API_KEY is defined in the secrets in StreamLit
openai_api_key = st.secrets['OPENAI_API_KEY']
endpoint_url = "https://probe.stad.gent/sparql"
today = date.today()
## uncomment to test out with local host
# with st.sidebar:
# openai_api_key = st.text_input("OpenAI API Key", key="chatbot_api_key", type="password")
# "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
# "[View the source code](https://github.com/streamlit/llm-examples/blob/main/Chatbot.py)"
# "[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/streamlit/llm-examples?quickstart=1)"
with open('questions_and_queries.json', 'r', encoding='utf-8') as file:
example_data = json.load(file)
with open('annotations.json', 'r', encoding='utf-8') as file:
label_data = json.load(file)
def generate_sparql_query(user_question, label_data, examples_data):
concepts_and_labels = "\n".join(
f"URI of Label: {pair['uri']}\nLabel: {pair['label']}\n" for pair in label_data
)
example_queries = "\n".join(
f"User Question: {pair['user_question']}\nSPARQL Query: {pair['sparql_query']}\n" for pair in examples_data
)
prompt = f"""
GIVE ONLY THE QUERY AS AN ANSWER TO THE FOLLOWING PROMPT:
The following are all the labels for the decisions in the decisions dataset and their URIs:
{concepts_and_labels}
Based on the user's question: {user_question}, go through all the labels then choose all possible labels that best matches the question's theme and context.
NEXT STEP:
The following are examples of user questions in Dutch and their corresponding SPARQL queries:
{example_queries}
Based on the examples above and the URIs of the chosen labels, generate a SPARQL query for the following user question:
User Question: {user_question}
SPARQL Query:
But please make sure to use the URIs of the chosen labels in the "?annotation oa:hasBody" part of the query like in the examples. If there is more than one label chosen query for them using the same structure in the second example query in {example_queries}. This utilizes VALUES.
If the user doesn't set a limit to the number of decisions they want to see, limit them to 3.
Always choose the most recent decision (by ordering on publication date "eli:date_publication") unless prompted otherwise by the user.
Don't add '`' as you wish.
Then after filtering on label, add a filter for the title or description or motivering with keywords that you extract from the question. (NOTE: Do not use generic words that apply to all decisions about Gent, like 'Gent', as a keyword).
"""
response = client.chat.completions.create(
model="gpt-3.5-turbo",
#model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a helpful assistant that generates SPARQL queries to be run on a SPARQL endpoint containing data about decisions of the city of Gent based on user questions and labels of decisions related to their questions. Your language is Dutch."},
{"role": "user", "content": prompt}
],
max_tokens=1000,
temperature=0
)
return response
def check_sparql_query(query):
prompt = f"""
If the query generated {query} contains looking for keywords, generate the same SPARQL query but remove the keywords filtering.
Don't add '`' to the query as you wish.
"""
response_2 = client.chat.completions.create(
model="gpt-3.5-turbo",
#model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a SPARQL query refiner."},
{"role": "user", "content": prompt}
],
max_tokens=1000,
temperature=0
)
return response_2
def run_query(query):
headers = {
"Accept": "application/sparql-results+json"
}
params = {
"query": query
}
response = requests.get(endpoint_url, headers=headers, params=params)
results = None
if response.status_code == 200:
results = response.json()
#print(f"Results for question '{user_question}':", results)
else:
#print(f"Failed to execute query for question '{user_question}':", response.status_code)
return []
if results is None:
return []
results_content = results['results']['bindings']
cleaned_decisions = []
for decision in results_content:
cleaned_decision = {}
for key, detail in decision.items():
# Extract the value and remove any \n or extra spaces
cleaned_value = re.sub(r'\s+', ' ', detail['value']).strip()
cleaned_decision[key] = cleaned_value
cleaned_decisions.append(cleaned_decision)
return cleaned_decisions
st.title("💬 ChatGent")
st.caption("🚀 Vragen beantwoorden over besluiten van de stad Gent")
if "messages" not in st.session_state:
st.session_state["messages"] = [{"role": "assistant", "content": "Hoe kan ik u helpen?"}]
for msg in st.session_state.messages:
st.chat_message(msg["role"]).write(msg["content"])
if prompt := st.chat_input("Stel hier uw vraag..."):
if not openai_api_key:
st.info("Please add your OpenAI API key to continue.")
st.stop()
client = OpenAI(api_key=openai_api_key)
st.session_state.messages.append({"role": "user", "content": prompt})
st.chat_message("user").write(prompt)
user_question = prompt
with st.spinner('Generating response...'):
sparql_query = generate_sparql_query(user_question, label_data=label_data, examples_data=example_data)
print(sparql_query)
query_content = sparql_query.choices[0].message.content
prefix_position = query_content.find("PREFIX")
if prefix_position != -1:
query_content = query_content[prefix_position:]
cleaned_decisions = run_query(query_content)
print(cleaned_decisions)
if not cleaned_decisions:
sparql_query_2 = check_sparql_query(query_content)
query_content_2 = sparql_query_2.choices[0].message.content
prefix_position = query_content.find("PREFIX")
if prefix_position != -1:
query_content_2 = query_content_2[prefix_position:]
print("SECOND SPARQL:", query_content_2)
cleaned_decisions = run_query(query_content_2)
print("AGAIN:", cleaned_decisions)
resources = []
resources_names = []
# Print or use the cleaned_decisions as needed
for d in cleaned_decisions:
print(d)
for d in cleaned_decisions:
resources.append(d['derivedFrom'])
resources_names.append(d['title'])
prompt_2 = f"""
ELEMENTS TO USE:
1. User's question: {user_question}
2. Data to use for answer: {cleaned_decisions}
YOUR ROLE:
- You answer the user's question. However, chat normally if the user's prompt is not a question.
- You answer in the language the user asked in. For example,if they ask in English, you answer in English, and if they ask in Dutch, you answer in Dutch.
INSTRUCTIONS:
- If it is a question, generate a response using ONLY the given data.
- The date today is {today}. Answer with that in mind.
- If you do not have data, the only external links you are allowed to refer to when looking for an answer are:
- https://stad.gent
- https://gentsefeesten.stad.gent
- If the question is asking about a date of an event and the data provided is from the past, say you do not know.
REQUIREMENTS YOU MUST FOLLOW:
1. If the retrieved decisions' dates do not match the current date's year: {today}, you must tell the user that it's not recent.
2. If you don't have an answer, you are only allowed to refer the user to https://stad.gent website.
3. You are only allowed to use the provided data to compile the answer.
4. Use the word "besluiten" when referring to decisions in the Dutch language. Don't use "beslissingen".
5. Be friendly and use plain language, avoiding bureaucratic terms.
LAST IMPORTANT STEPS YOU MUST FOLLOW: Before showing your answer, ensure it matches the user's question:
- If it relates but doesn't answer exactly, mention: "It might relate but isn't necessarily the answer you want."
- Provide the resources in bullet points: {resources}, formatted with the {resources_names} as links ({resources}).
"""
completion_2 = client.chat.completions.create(
#model="gpt-3.5-turbo",
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a helpful assistant for the citizens of Gent when they ask a question on the city's website regarding the decisions made by the city."},
{"role": "user", "content": prompt_2}
],
temperature=0
)
# response = client.chat.completions.create(model="gpt-3.5-turbo", messages=st.session_state.messages)
msg_2 = completion_2.choices[0].message.content
st.session_state.messages.append({"role": "assistant", "content": msg_2})
st.chat_message("assistant").write(msg_2)
# Clear context button
if st.button("Clear Context"):
st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]