-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
75 lines (57 loc) · 1.96 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import streamlit as st
import cv2
from masks_to_text import BlipImageCaptioning
from text_to_embeddings import TextToEmbeddings
from PIL import Image
import numpy as np
# Initialize classes outside of the button press to avoid reinitialization
@st.cache_resource
def init_models():
blip = BlipImageCaptioning()
uae = TextToEmbeddings()
return blip, uae
st.set_page_config("VODKA", "🥷", layout="wide")
blip, uae = init_models()
css = """
<style>
img {
padding: 10%;
}
</style>
"""
st.markdown(css, unsafe_allow_html=True)
st.header("Voting Over Distilled Knowledge Associations (VODKA)")
files = st.file_uploader(
label="Upload the cutouts",
type=["jpg", "png"],
accept_multiple_files=True,
help="Upload the top 5 cutouts.",
)
prompt = st.text_input(
"Enter the prompt for segmentation",
help="Provide a descriptive prompt to guide the segmentation process.",
)
if st.button(label="Process", help="Click to start the segmentation process."):
if files is not None and prompt:
with st.spinner("Processing image..."):
images_cv2 = []
for file in files:
image = Image.open(file)
image_np = np.array(image)
image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
images_cv2.append(image_cv2)
with st.spinner("Generating descriptions..."):
captions = [blip.generate_description(cutout) for cutout in images_cv2]
# st.write("Captions:")
# st.write(captions)
with st.spinner("Calculating similarity..."):
similarities = [uae.get_similarity(prompt, caption) for caption in captions]
# st.write("Similarity Scores:")
# st.write(similarity)
st.image(
files,
width=250,
caption=[f"{c}. {s}" for c, s in zip(captions, similarities)],
)
else:
st.error("Please upload an image and enter a prompt to proceed.")