Skip to content

Commit

Permalink
Merge pull request #244 from amosproj/#214--Google-docs-2nd-Attempt
Browse files Browse the repository at this point in the history
Download Google Docs Content
  • Loading branch information
eloinoel authored Jul 6, 2024
2 parents c42267a + 24b536b commit 4e9d41d
Show file tree
Hide file tree
Showing 9 changed files with 215 additions and 7 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,8 @@ google-services.json
google-services.plist

venv

# google docs download
**credentials*.json
**token.pickle
**google_docs_content.txt
Binary file not shown.
10 changes: 8 additions & 2 deletions functions/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ def initialize_vector_store(api_key, token):
- Title: The title of the recipe.
- SubTitle: The subtitle of the recipe.
- Rating: The rating of the recipe, if available.
- Recipe Details: Detailed information about the recipe, including preparation time, cooking time, and serving size.
- Recipe Details: Detailed information about the recipe, including preparation time,
cooking time, and serving size.
- Ingredients: A list of ingredients required for making recipe.
- Steps: Step-by-step instructions to prepare the dish.
- Nutrition Facts: Basic nutritional information about the recipe.
Expand Down Expand Up @@ -156,7 +157,12 @@ def get_health_ai_response(question):

# Prompt Template for Health AI Agent
health_ai_template = """
You are a health AI agent equipped with access to diverse sources of health data, including research articles, nutritional information, medical archives, and more. Your task is to provide informed answers to user queries based on the available data. If you cannot find relevant information, simply state that you do not have enough data to answer accurately. write your response in markdown form and also add reference url so user can know from which source you are answering the questions.
You are a health AI agent equipped with access to diverse sources of health data,
including research articles, nutritional information, medical archives, and more.
Your task is to provide informed answers to user queries based on the available data.
If you cannot find relevant information, simply state that you do not have enough data
to answer accurately. write your response in markdown form and also add reference url
so user can know from which source you are answering the questions.
CONTEXT:
{context}
Expand Down
122 changes: 118 additions & 4 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ dependencies = [
"requests>=2.31.0",
"langchain-astradb>=0.3.3",
"langchain-openai>=0.1.8",
"google-api-python-client>=2.136.0",
"google-auth>=2.31.0",
"google-auth-oauthlib>=1.2.0",
"google-auth-httplib2>=0.2.0",
"pathlib>=1.0.1",
]

[tool.pdm.dev-dependencies]
Expand All @@ -46,6 +51,7 @@ scrape-pubmed = "python -m src.backend.Scrapers.PubMed.main"
scrape-youtube = "python -m src.backend.Scrapers.YouTube.main"
scrape-archive = "python -m src.backend.Scrapers.Archive.main"
scrape-nutritionfacts = "python -m src.backend.Scrapers.Nutritionfacts.main"
google-docs = "python -m src.backend.RAG.LangChain_Implementation.get_google_docs"

[tool.pdm]
distribution = false
Empty file.
76 changes: 76 additions & 0 deletions src/backend/RAG/LangChain_Implementation/get_google_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import io
import os
import pickle
import re
from pathlib import Path

from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload

def extract_document_id_from_url(url):
pattern = r'[A-Za-z0-9]*'
matches = re.findall(pattern, url)
document_id = max(matches, key=len)
return document_id

def authenticate(credentials, scopes):
"""Obtaining auth with needed apis"""
creds = None
# The file token.pickle stores the user's access
# and refresh tokens, and is created automatically
# when the authorization flow completes for the first time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(credentials, scopes)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)

return creds


def download_file(file_id, credentials_path, file_name):
scopes = ['https://www.googleapis.com/auth/drive.readonly']
credentials = authenticate(credentials_path, scopes)
drive_service = build('drive', 'v3', credentials=credentials)

# Export the Google Docs file as plain text
export_mime_type = 'text/plain'
request = drive_service.files().export_media(fileId=file_id, mimeType=export_mime_type)

# Create a file on disk to write the exported content
fh = io.FileIO(file_name, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
status, done = downloader.next_chunk()
print(f'Download {int(status.progress() * 100)}%.')

# Read the content of the exported file
with open(file_name, 'r', encoding='utf-8') as file:
content = file.read()

return content


# Example usage
document_id = extract_document_id_from_url("https://docs.google.com/document/d/1xrfrwyRCTrxiCupiKSSFgKUxiCTXgr45gPJYybnY23w/edit")
credentials_json = 'credentials.json'

# Define the file path in a cross-platform manner
file_name = Path('data') / 'google_docs_content.txt'
file_name.parent.mkdir(parents=True, exist_ok=True)

# TODO: make this callable from typescript with url

content = download_file(document_id, credentials_json, file_name)
print(content)
Empty file added src/backend/RAG/__init__.py
Empty file.
3 changes: 2 additions & 1 deletion src/frontend/screens/ChatUI/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import {
useActiveChatId,
useCreateChat,
LLM_MODELS,
useLLMs
useLLMs,
} from 'src/frontend/hooks';
import { Timestamp } from 'firebase/firestore';
import { ActivityIndicator, IconButton, Button } from 'react-native-paper';
Expand All @@ -36,6 +36,7 @@ export type ChatUiProps = {
};

export function ChatUI(/*props: ChatUiProps*/) {

const { colors } = useTheme();
const scrollViewRef = useRef<ScrollView>(null);

Expand Down

0 comments on commit 4e9d41d

Please sign in to comment.