diff --git a/.gitignore b/.gitignore index ee525a1..e3bfca8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # password files: +experiments/ mysql_password.txt # Byte-compiled / optimized / DLL files diff --git a/experiments/experiments.ipynb b/experiments/experiments.ipynb index 66d706e..6d92b8c 100644 --- a/experiments/experiments.ipynb +++ b/experiments/experiments.ipynb @@ -344,6 +344,471 @@ "df5" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# New version" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### CSV" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import pandas as pd\n", + "# connect to csv:\n", + "# Connect to csv:\n", + "class CSV:\n", + "\n", + " # load csv file:\n", + " def load_csv(self, filepath: Path, delimiter: str) -> pd.DataFrame:\n", + " filepath = Path(filepath)\n", + " return pd.read_csv(filepath, delimiter=delimiter)\n", + " \n", + " # convert dataframe to csv file:\n", + " def to_csv(self, data:pd.DataFrame, filepath: Path):\n", + " filepath = Path(filepath)\n", + " data.to_csv(filepath, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AnimalsType
0TigerWild
1LionWild
2CowDomestic
3DogDomestic
\n", + "
" + ], + "text/plain": [ + " Animals Type\n", + "0 Tiger Wild\n", + "1 Lion Wild\n", + "2 Cow Domestic\n", + "3 Dog Domestic" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "obj =CSV()\n", + "data = obj.load_csv(\"sample.csv\", \",\")\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "obj.to_csv(data, \"sample2.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "# Connect to Excel Sheet:\n", + "class Excel:\n", + " # load excel sheet:\n", + " def load_excelsheet(self, filepath:Path, sheet_name:str) -> pd.DataFrame:\n", + " filepath = Path(filepath)\n", + " return pd.read_excel(filepath, sheet_name=sheet_name)\n", + " \n", + " # convert dataframe to excel sheet:\n", + " def to_excel(self, data:pd.DataFrame, filepath:Path, sheet_name:str):\n", + " filepath = Path(filepath)\n", + " data.to_excel(filepath, sheet_name, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AnimalsType
0TigerWild
1LionWild
2CowDomestic
3DogDomestic
\n", + "
" + ], + "text/plain": [ + " Animals Type\n", + "0 Tiger Wild\n", + "1 Lion Wild\n", + "2 Cow Domestic\n", + "3 Dog Domestic" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "obj = Excel()\n", + "data = obj.load_excelsheet(\"sample.xlsx\", \"sample_sheet\")\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\ADMIN\\AppData\\Local\\Temp\\ipykernel_41452\\3802458429.py:11: FutureWarning: Starting with pandas version 3.0 all arguments of to_excel except for the argument 'excel_writer' will be keyword-only.\n", + " data.to_excel(filepath, sheet_name, index=False)\n" + ] + } + ], + "source": [ + "obj.to_excel(data, \"yuv.xlsx\", \"yuv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Connect to Google Sheet:\n", + "class GSheet:\n", + " # load google sheet\n", + " def load_gsheet(self, gsheet_id:str, sheet_name:str) -> pd.DataFrame:\n", + " base_url = 'https://docs.google.com/spreadsheets/d'\n", + " sheet_csv = 'gviz/tq?tqx=out:csv&sheet='\n", + " url = f'{base_url}/{gsheet_id}/{sheet_csv}{sheet_name}'\n", + " return pd.read_csv(url)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDept
0YuvaneshCSBS
1AsgarCSBS
2AakashCSBS
\n", + "
" + ], + "text/plain": [ + " Name Dept\n", + "0 Yuvanesh CSBS\n", + "1 Asgar CSBS\n", + "2 Aakash CSBS" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "obj = GSheet()\n", + "x = obj.load_gsheet(\"1Brw0GNaeiI46RQejNOGnkjDdNHw62ppU8vFRYm1iJUM\", \"Sheet1\")\n", + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List, Dict\n", + "from pymongo import MongoClient\n", + "\n", + "# Connect to MOngoDB:\n", + "class MongoDB:\n", + " # initialize the mongo client:\n", + " def __init__(self, host_url:str):\n", + " self.mongoclinet = MongoClient(host_url)\n", + "\n", + " # load data from mongodb:\n", + " def load_data(self, database:str, collection_name:str) -> pd.DataFrame:\n", + " database_connect = self.mongoclinet[database]\n", + " collection = database_connect[collection_name]\n", + " records = collection.find()\n", + " data = list(records)\n", + " df = pd.DataFrame(data)\n", + " df.drop(\"_id\", axis=1, inplace=True)\n", + " return df\n", + " \n", + " # upload data to mongodb:\n", + " def upload_data(self, database:str, collection_name:str):\n", + " database_connect = self.mongoclinet[database]\n", + " collection = database_connect[collection_name]\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameAgeCity
0Alice25New York
1Bob30Los Angeles
2Charlie35Chicago
3Alice25New York
4Bob30Los Angeles
5Charlie35Chicago
\n", + "
" + ], + "text/plain": [ + " Name Age City\n", + "0 Alice 25 New York\n", + "1 Bob 30 Los Angeles\n", + "2 Charlie 35 Chicago\n", + "3 Alice 25 New York\n", + "4 Bob 30 Los Angeles\n", + "5 Charlie 35 Chicago" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = MongoDB(\"mongodb://localhost:27017\").load_data(\"dmy001\", \"people\")\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'Name': 'Alice', 'Age': 25, 'City': 'New York'},\n", + " {'Name': 'Bob', 'Age': 30, 'City': 'Los Angeles'},\n", + " {'Name': 'Charlie', 'Age': 35, 'City': 'Chicago'},\n", + " {'Name': 'Alice', 'Age': 25, 'City': 'New York'},\n", + " {'Name': 'Bob', 'Age': 30, 'City': 'Los Angeles'},\n", + " {'Name': 'Charlie', 'Age': 35, 'City': 'Chicago'}]" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dic = data.to_dict(orient=\"records\")\n", + "dic" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/src/dbsconnector/databases.py b/src/dbsconnector/databases.py index dbf3232..f798800 100644 --- a/src/dbsconnector/databases.py +++ b/src/dbsconnector/databases.py @@ -1,33 +1,92 @@ # importing necessary libraries import pandas as pd -from pymongo.mongo_client import MongoClient -from typing import List, Dict +from pymongo import MongoClient from pathlib import Path +import warnings -# load csv file: -def load_csv(filepath:Path, delimiter:str): - filepath = Path(filepath) - return pd.read_csv(str(filepath), delimiter=delimiter) - -# load excel sheet -def load_excelsheet(filepath:Path, sheet_name:str): - filepath = Path(filepath) - return pd.read_excel(str(filepath), sheet_name=sheet_name) - -# load google sheet -def load_gsheet(gsheet_id:str, sheet_name:str): - base_url = 'https://docs.google.com/spreadsheets/d' - sheet_csv = 'gviz/tq?tqx=out:csv&sheet=' - url = f'{base_url}/{gsheet_id}/{sheet_csv}{sheet_name}' - return pd.read_csv(url) - -# load mongodb data: -def load_mongodbdata(host:str, database:str, collection:str): - client: MongoClient = MongoClient(host) - db = client[database] - col = db[collection] - records = col.find() - data: List[Dict] = list(records) - df = pd.DataFrame(data) - df.drop('_id', axis=1, inplace=True) - return df +warnings.filterwarnings("ignore") + + +# Connect to csv: +class CSV: + def __init__(self): + pass + + # load csv file: + def load_csv(self, filepath: Path, delimiter: str) -> pd.DataFrame: + filepath = Path(filepath) + return pd.read_csv(filepath, delimiter=delimiter) + + # convert dataframe to csv file: + def to_csv(self, data: pd.DataFrame, filepath: Path): + filepath = Path(filepath) + data.to_csv(filepath, index=False) + + +# Connect to Excel Sheet: +class Excel: + def __init__(self): + pass + + # load excel sheet: + def load_excelsheet(self, filepath: Path, sheet_name: str) -> pd.DataFrame: + filepath = Path(filepath) + return pd.read_excel(filepath, sheet_name=sheet_name) + + # convert dataframe to excel sheet: + def to_excel(self, data: pd.DataFrame, filepath: Path, sheet_name: str): + filepath = Path(filepath) + data.to_excel(filepath, sheet_name, index=False) + + +# Connect to Google Sheet: +class GSheet: + def __init__(self): + pass + + # load google sheet + def load_gsheet(self, gsheet_id: str, sheet_name: str) -> pd.DataFrame: + base_url = "https://docs.google.com/spreadsheets/d" + sheet_csv = "gviz/tq?tqx=out:csv&sheet=" + url = f"{base_url}/{gsheet_id}/{sheet_csv}{sheet_name}" + return pd.read_csv(url) + + +# Connect to MOngoDB: +class MongoDB: + # initialize the mongo client: + def __init__(self, host_url: str): + self.mongoclinet: MongoClient = MongoClient(host_url) + + # load data from mongodb: + def load_data(self, database: str, collection_name: str) -> pd.DataFrame: + database_connect = self.mongoclinet[database] + collection = database_connect[collection_name] + records = collection.find() + data = list(records) + df = pd.DataFrame(data) + df.drop("_id", axis=1, inplace=True) + return df + + # upload data to mongodb: + def upload_data(self, database: str, collection_name: str, data: pd.DataFrame): + database_connect = self.mongoclinet[database] + collection = database_connect[collection_name] + data_dict = data.to_dict(orient="records") + collection.insert_many(data_dict) + + # upload object to mongodb: + def upload_object( + self, database: str, collection_name: str, object_name: str, object_ + ): + database_connect = self.mongoclinet[database] + collection = database_connect[collection_name] + record = {"object_name": object_name, "object": object_} + collection.insert_one(record) + + # load object from mongodb: + def load_object(self, database: str, collection_name: str, object_name: str): + database_connect = self.mongoclinet[database] + collection = database_connect[collection_name] + object_ = collection.find_one({"obajct_name": object_name}) + return object_