-
Notifications
You must be signed in to change notification settings - Fork 0
/
image_inference.py
117 lines (98 loc) · 3.94 KB
/
image_inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
import base64
import imutils
import requests
import glob2
import json
import pandas as pd
import cv2
from langchain_openai import ChatOpenAI
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
def resize_image(img_path, factor, gray=False, save_path="./temp"):
if not os.path.exists(save_path):
os.makedirs(save_path)
img = cv2.imread(img_path)
resized = imutils.resize(img, width = img.shape[1]//factor, inter=cv2.INTER_AREA)
if gray:
resized = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
cv2.imwrite(f"{save_path}/{img_path.split('/')[-1]}", resized)
return f"{save_path}/{img_path.split('/')[-1]}"
def encode_image(image_path):
"""encodes image for chatgpt"""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def send_image(image_path, resize_factor=2, system_prompt="", user_prompt="", previous_image_path=None):
base64_image = encode_image(resize_image(img_path=image_path, factor=resize_factor))
chat = ChatOpenAI(model = "gpt-4o")
if previous_image_path is not None:
base_64_image_previous = encode_image(resize_image(img_path=previous_image_path, factor=resize_factor))
output = chat.invoke(
[
HumanMessage(
content=[
{"type": "text", "text": system_prompt},
{"type": "text", "text": f"This is the image of previous segment. Defects are already identified and an attempt to solve was made for this segment. Donot consider this part of the print to identify the defects. Identify the defects only present in the current part of the print."},
{"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base_64_image_previous}",
"detail": "auto"
},
},
{"type": "text", "text": user_prompt},
{"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
"detail": "auto"
},
},
]
)
]
)
return output
else:
output = chat.invoke(
[
HumanMessage(
content=[
{"type": "text", "text": system_prompt},
{"type": "text", "text": user_prompt},
{"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
"detail": "auto"
},
},
]
)
]
)
return output
def send_image_to_openai(image_path,system_prompt,user_prompt, resize_factor,api_key):
"""Sends an image to OpenAI's API and prints the response."""
# Encode the image
base64_image = encode_image(resize_image(img_path=image_path, factor=resize_factor))
# base64_image_previous = encode_image(resize_image(img_path=image_path_2, factor=resize_factor))
# Set the headers for the API request
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4o",
"messages": [
{
"role": "system",
"content": system_prompt
},
{
"role": "user",
"content": f"{user_prompt}\n![image](data:image/jpeg;base64,{base64_image})"
}
],
}
# Send the POST request
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
# Print the JSON response from the server
print(response.json())
return response.json()["choices"][0]["message"]["content"], response