Merge pull request #244 from amosproj/#214--Google-docs-2nd-Attempt

Download Google Docs Content
amosproj · Jul 6, 2024 · 4e9d41d · 4e9d41d
2 parents c42267a + 24b536b
commit 4e9d41d
Show file tree

Hide file tree

Showing 9 changed files with 215 additions and 7 deletions.
diff --git a/.gitignore b/.gitignore
@@ -82,3 +82,8 @@ google-services.json
 google-services.plist
 
 venv
+
+# google docs download
+**credentials*.json
+**token.pickle
+**google_docs_content.txt
diff --git a/Documentation/Demo_Videos/sprint11/214_download_google_docs.m4v b/Documentation/Demo_Videos/sprint11/214_download_google_docs.m4v
diff --git a/functions/main.py b/functions/main.py
@@ -124,7 +124,8 @@ def initialize_vector_store(api_key, token):
 - Title: The title of the recipe.
 - SubTitle: The subtitle of the recipe.
 - Rating: The rating of the recipe, if available.
-- Recipe Details: Detailed information about the recipe, including preparation time, cooking time, and serving size.
+- Recipe Details: Detailed information about the recipe, including preparation time,
+cooking time, and serving size.
 - Ingredients: A list of ingredients required for making recipe.
 - Steps: Step-by-step instructions to prepare the dish.
 - Nutrition Facts: Basic nutritional information about the recipe.
@@ -156,7 +157,12 @@ def get_health_ai_response(question):
 
     # Prompt Template for Health AI Agent
     health_ai_template = """
-    You are a health AI agent equipped with access to diverse sources of health data, including research articles, nutritional information, medical archives, and more. Your task is to provide informed answers to user queries based on the available data. If you cannot find relevant information, simply state that you do not have enough data to answer accurately. write your response in markdown form and also add reference url so user can know from which source you are answering the questions.
+    You are a health AI agent equipped with access to diverse sources of health data,
+    including research articles, nutritional information, medical archives, and more.
+    Your task is to provide informed answers to user queries based on the available data.
+    If you cannot find relevant information, simply state that you do not have enough data
+    to answer accurately. write your response in markdown form and also add reference url
+    so user can know from which source you are answering the questions.
 
     CONTEXT:
     {context}

diff --git a/pdm.lock b/pdm.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,6 +29,11 @@ dependencies = [
     "requests>=2.31.0",
     "langchain-astradb>=0.3.3",
     "langchain-openai>=0.1.8",
+    "google-api-python-client>=2.136.0",
+    "google-auth>=2.31.0",
+    "google-auth-oauthlib>=1.2.0",
+    "google-auth-httplib2>=0.2.0",
+    "pathlib>=1.0.1",
 ]
 
 [tool.pdm.dev-dependencies]
@@ -46,6 +51,7 @@ scrape-pubmed = "python -m src.backend.Scrapers.PubMed.main"
 scrape-youtube = "python -m src.backend.Scrapers.YouTube.main"
 scrape-archive = "python -m src.backend.Scrapers.Archive.main"
 scrape-nutritionfacts = "python -m src.backend.Scrapers.Nutritionfacts.main"
+google-docs = "python -m src.backend.RAG.LangChain_Implementation.get_google_docs"
 
 [tool.pdm]
 distribution = false
diff --git a/src/backend/RAG/LangChain_Implementation/__init__.py b/src/backend/RAG/LangChain_Implementation/__init__.py
diff --git a/src/backend/RAG/LangChain_Implementation/get_google_docs.py b/src/backend/RAG/LangChain_Implementation/get_google_docs.py
@@ -0,0 +1,76 @@
+import io
+import os
+import pickle
+import re
+from pathlib import Path
+
+from google.auth.transport.requests import Request
+from google_auth_oauthlib.flow import InstalledAppFlow
+from googleapiclient.discovery import build
+from googleapiclient.http import MediaIoBaseDownload
+
+def extract_document_id_from_url(url):
+    pattern = r'[A-Za-z0-9]*'
+    matches = re.findall(pattern, url)
+    document_id = max(matches, key=len)
+    return document_id
+
+def authenticate(credentials, scopes):
+    """Obtaining auth with needed apis"""
+    creds = None
+    # The file token.pickle stores the user's access
+    # and refresh tokens, and is created automatically
+    # when the authorization flow completes for the first time.
+    if os.path.exists('token.pickle'):
+        with open('token.pickle', 'rb') as token:
+            creds = pickle.load(token)
+    # If there are no (valid) credentials available, let the user log in.
+    if not creds or not creds.valid:
+        if creds and creds.expired and creds.refresh_token:
+            creds.refresh(Request())
+        else:
+            flow = InstalledAppFlow.from_client_secrets_file(credentials, scopes)
+            creds = flow.run_local_server(port=0)
+        # Save the credentials for the next run
+        with open('token.pickle', 'wb') as token:
+            pickle.dump(creds, token)
+
+    return creds
+
+
+def download_file(file_id, credentials_path, file_name):
+    scopes = ['https://www.googleapis.com/auth/drive.readonly']
+    credentials = authenticate(credentials_path, scopes)
+    drive_service = build('drive', 'v3', credentials=credentials)
+
+    # Export the Google Docs file as plain text
+    export_mime_type = 'text/plain'
+    request = drive_service.files().export_media(fileId=file_id, mimeType=export_mime_type)
+
+    # Create a file on disk to write the exported content
+    fh = io.FileIO(file_name, 'wb')
+    downloader = MediaIoBaseDownload(fh, request)
+    done = False
+    while not done:
+        status, done = downloader.next_chunk()
+        print(f'Download {int(status.progress() * 100)}%.')
+
+    # Read the content of the exported file
+    with open(file_name, 'r', encoding='utf-8') as file:
+        content = file.read()
+
+    return content
+
+
+# Example usage
+document_id = extract_document_id_from_url("https://docs.google.com/document/d/1xrfrwyRCTrxiCupiKSSFgKUxiCTXgr45gPJYybnY23w/edit")
+credentials_json = 'credentials.json'
+
+# Define the file path in a cross-platform manner
+file_name = Path('data') / 'google_docs_content.txt'
+file_name.parent.mkdir(parents=True, exist_ok=True)
+
+# TODO: make this callable from typescript with url
+
+content = download_file(document_id, credentials_json, file_name)
+print(content)
diff --git a/src/backend/RAG/__init__.py b/src/backend/RAG/__init__.py
diff --git a/src/frontend/screens/ChatUI/index.tsx b/src/frontend/screens/ChatUI/index.tsx
@@ -23,7 +23,7 @@ import {
   useActiveChatId,
   useCreateChat,
   LLM_MODELS,
-  useLLMs
+  useLLMs,
 } from 'src/frontend/hooks';
 import { Timestamp } from 'firebase/firestore';
 import { ActivityIndicator, IconButton, Button } from 'react-native-paper';
@@ -36,6 +36,7 @@ export type ChatUiProps = {
 };
 
 export function ChatUI(/*props: ChatUiProps*/) {
+
   const { colors } = useTheme();
   const scrollViewRef = useRef<ScrollView>(null);