Skip to content

Commit

Permalink
Search: try custom HTTPS implementation for Pinecone request
Browse files Browse the repository at this point in the history
  • Loading branch information
YuraLukashik committed Oct 29, 2023
1 parent a53820b commit 98846d8
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 5 deletions.
20 changes: 20 additions & 0 deletions src/semantic_search/semantic_search/external_services/pinecone.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging

import pinecone

from ..https_requests import send_https_request
from ..config import get_pinecone_key, get_pinecone_environment, get_pinecone_index_name

pinecone.init(api_key=get_pinecone_key(), environment=get_pinecone_environment(), log_level="debug")
Expand All @@ -12,3 +14,21 @@

def get_pinecone_index() -> 'pinecone.Index':
return pinecone.Index(get_pinecone_index_name())


def query_index(query_vector, top_k, namespace, include_values, include_metadata):
host = f"{get_pinecone_index_name()}-0ddc4d6.svc.{get_pinecone_environment()}.pinecone.io"
path = "/query"
headers = {
"content-type": "application/json",
"api-key": get_pinecone_key(),
"accept": "application/json",
}
data = {
"vector": query_vector,
"top_k": top_k,
"includeMetadata": include_metadata,
"includeValues": include_values,
"namespace": namespace,
}
return send_https_request(host, path, data, headers)
53 changes: 53 additions & 0 deletions src/semantic_search/semantic_search/https_requests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import json
import socket
import ssl


def create_ssl_socket(host, port):
context = ssl.create_default_context()
sock = socket.create_connection((host, port))
return context.wrap_socket(sock, server_hostname=host)


def parse_http_response(raw_response):
# Split response into headers and body
headers, _, body = raw_response.partition('\r\n\r\n')
# Parse the status line and headers
header_lines = headers.split('\r\n')
status_line = header_lines[0]
status_code = int(status_line.split()[1]) # Extract status code
header_dict = {k: v for k, v in (line.split(': ', 1) for line in header_lines[1:])}
return status_code, status_line, header_dict, body


def read_all_from_socket(ssock):
buffer = b""
while True:
data = ssock.recv(4096)
if not data:
break
buffer += data
return buffer.decode()


def send_https_request(host, path, data, headers=None):
if headers is None:
headers = {}
json_data = json.dumps(data)
content_length = len(json_data)
request_headers = {
"Host": host,
"Content-Type": "application/json",
"Content-Length": str(content_length),
"Connection": "close"
}
request_headers.update(headers)
request = f"POST {path} HTTP/1.1\r\n"
request += "".join(f"{key}: {value}\r\n" for key, value in request_headers.items())
request += "\r\n" + json_data
with create_ssl_socket(host, 443) as ssock:
ssock.send(request.encode())
raw_response = read_all_from_socket(ssock)

status_code, status_line, response_headers, response_body = parse_http_response(raw_response)
return status_code, status_line, response_headers, response_body
18 changes: 13 additions & 5 deletions src/semantic_search/semantic_search/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import time
from datetime import date
from .external_services.pinecone import get_pinecone_index
from .external_services.pinecone import get_pinecone_index, query_index
from .external_services.openai import create_embedding, gpt_query


Expand Down Expand Up @@ -36,15 +36,23 @@ def smart_query(namespace, query, username: str):
logging.info(f"Smart Query: embedding created in {round(time.perf_counter() - stage_start_time, 2)}s")

stage_start_time = time.perf_counter()
query_results = get_pinecone_index().query(
queries=[query_vector],
# query_results = get_pinecone_index().query(
# queries=[query_vector],
# top_k=50,
# namespace=namespace,
# include_values=False,
# includeMetadata=True
# )
query_results = query_index(
query_vector=query_vector,
top_k=50,
namespace=namespace,
include_values=False,
includeMetadata=True
include_metadata=True
)
logging.info(f"Smart Query: Pinecone search finished in {round(time.perf_counter() - stage_start_time, 2)}s")
query_matches = query_results['results'][0]['matches']
# query_matches = query_results['results'][0]['matches']
query_matches = json.loads(query_results[3])['matches']

messages_for_gpt = [
{
Expand Down

0 comments on commit 98846d8

Please sign in to comment.