diff --git a/.gitignore b/.gitignore
index ee525a1..e3bfca8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
# password files:
+experiments/
mysql_password.txt
# Byte-compiled / optimized / DLL files
diff --git a/experiments/experiments.ipynb b/experiments/experiments.ipynb
index 66d706e..6d92b8c 100644
--- a/experiments/experiments.ipynb
+++ b/experiments/experiments.ipynb
@@ -344,6 +344,471 @@
"df5"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# New version"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### CSV"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "import pandas as pd\n",
+ "# connect to csv:\n",
+ "# Connect to csv:\n",
+ "class CSV:\n",
+ "\n",
+ " # load csv file:\n",
+ " def load_csv(self, filepath: Path, delimiter: str) -> pd.DataFrame:\n",
+ " filepath = Path(filepath)\n",
+ " return pd.read_csv(filepath, delimiter=delimiter)\n",
+ " \n",
+ " # convert dataframe to csv file:\n",
+ " def to_csv(self, data:pd.DataFrame, filepath: Path):\n",
+ " filepath = Path(filepath)\n",
+ " data.to_csv(filepath, index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Animals | \n",
+ " Type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Tiger | \n",
+ " Wild | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Lion | \n",
+ " Wild | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Cow | \n",
+ " Domestic | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Dog | \n",
+ " Domestic | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Animals Type\n",
+ "0 Tiger Wild\n",
+ "1 Lion Wild\n",
+ "2 Cow Domestic\n",
+ "3 Dog Domestic"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "obj =CSV()\n",
+ "data = obj.load_csv(\"sample.csv\", \",\")\n",
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "obj.to_csv(data, \"sample2.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Connect to Excel Sheet:\n",
+ "class Excel:\n",
+ " # load excel sheet:\n",
+ " def load_excelsheet(self, filepath:Path, sheet_name:str) -> pd.DataFrame:\n",
+ " filepath = Path(filepath)\n",
+ " return pd.read_excel(filepath, sheet_name=sheet_name)\n",
+ " \n",
+ " # convert dataframe to excel sheet:\n",
+ " def to_excel(self, data:pd.DataFrame, filepath:Path, sheet_name:str):\n",
+ " filepath = Path(filepath)\n",
+ " data.to_excel(filepath, sheet_name, index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Animals | \n",
+ " Type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Tiger | \n",
+ " Wild | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Lion | \n",
+ " Wild | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Cow | \n",
+ " Domestic | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Dog | \n",
+ " Domestic | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Animals Type\n",
+ "0 Tiger Wild\n",
+ "1 Lion Wild\n",
+ "2 Cow Domestic\n",
+ "3 Dog Domestic"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "obj = Excel()\n",
+ "data = obj.load_excelsheet(\"sample.xlsx\", \"sample_sheet\")\n",
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\ADMIN\\AppData\\Local\\Temp\\ipykernel_41452\\3802458429.py:11: FutureWarning: Starting with pandas version 3.0 all arguments of to_excel except for the argument 'excel_writer' will be keyword-only.\n",
+ " data.to_excel(filepath, sheet_name, index=False)\n"
+ ]
+ }
+ ],
+ "source": [
+ "obj.to_excel(data, \"yuv.xlsx\", \"yuv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Connect to Google Sheet:\n",
+ "class GSheet:\n",
+ " # load google sheet\n",
+ " def load_gsheet(self, gsheet_id:str, sheet_name:str) -> pd.DataFrame:\n",
+ " base_url = 'https://docs.google.com/spreadsheets/d'\n",
+ " sheet_csv = 'gviz/tq?tqx=out:csv&sheet='\n",
+ " url = f'{base_url}/{gsheet_id}/{sheet_csv}{sheet_name}'\n",
+ " return pd.read_csv(url)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Dept | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Yuvanesh | \n",
+ " CSBS | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Asgar | \n",
+ " CSBS | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Aakash | \n",
+ " CSBS | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Dept\n",
+ "0 Yuvanesh CSBS\n",
+ "1 Asgar CSBS\n",
+ "2 Aakash CSBS"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "obj = GSheet()\n",
+ "x = obj.load_gsheet(\"1Brw0GNaeiI46RQejNOGnkjDdNHw62ppU8vFRYm1iJUM\", \"Sheet1\")\n",
+ "x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from typing import List, Dict\n",
+ "from pymongo import MongoClient\n",
+ "\n",
+ "# Connect to MOngoDB:\n",
+ "class MongoDB:\n",
+ " # initialize the mongo client:\n",
+ " def __init__(self, host_url:str):\n",
+ " self.mongoclinet = MongoClient(host_url)\n",
+ "\n",
+ " # load data from mongodb:\n",
+ " def load_data(self, database:str, collection_name:str) -> pd.DataFrame:\n",
+ " database_connect = self.mongoclinet[database]\n",
+ " collection = database_connect[collection_name]\n",
+ " records = collection.find()\n",
+ " data = list(records)\n",
+ " df = pd.DataFrame(data)\n",
+ " df.drop(\"_id\", axis=1, inplace=True)\n",
+ " return df\n",
+ " \n",
+ " # upload data to mongodb:\n",
+ " def upload_data(self, database:str, collection_name:str):\n",
+ " database_connect = self.mongoclinet[database]\n",
+ " collection = database_connect[collection_name]\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Age | \n",
+ " City | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Alice | \n",
+ " 25 | \n",
+ " New York | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Bob | \n",
+ " 30 | \n",
+ " Los Angeles | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Charlie | \n",
+ " 35 | \n",
+ " Chicago | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Alice | \n",
+ " 25 | \n",
+ " New York | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Bob | \n",
+ " 30 | \n",
+ " Los Angeles | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Charlie | \n",
+ " 35 | \n",
+ " Chicago | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Age City\n",
+ "0 Alice 25 New York\n",
+ "1 Bob 30 Los Angeles\n",
+ "2 Charlie 35 Chicago\n",
+ "3 Alice 25 New York\n",
+ "4 Bob 30 Los Angeles\n",
+ "5 Charlie 35 Chicago"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = MongoDB(\"mongodb://localhost:27017\").load_data(\"dmy001\", \"people\")\n",
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[{'Name': 'Alice', 'Age': 25, 'City': 'New York'},\n",
+ " {'Name': 'Bob', 'Age': 30, 'City': 'Los Angeles'},\n",
+ " {'Name': 'Charlie', 'Age': 35, 'City': 'Chicago'},\n",
+ " {'Name': 'Alice', 'Age': 25, 'City': 'New York'},\n",
+ " {'Name': 'Bob', 'Age': 30, 'City': 'Los Angeles'},\n",
+ " {'Name': 'Charlie', 'Age': 35, 'City': 'Chicago'}]"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dic = data.to_dict(orient=\"records\")\n",
+ "dic"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
diff --git a/src/dbsconnector/databases.py b/src/dbsconnector/databases.py
index dbf3232..f798800 100644
--- a/src/dbsconnector/databases.py
+++ b/src/dbsconnector/databases.py
@@ -1,33 +1,92 @@
# importing necessary libraries
import pandas as pd
-from pymongo.mongo_client import MongoClient
-from typing import List, Dict
+from pymongo import MongoClient
from pathlib import Path
+import warnings
-# load csv file:
-def load_csv(filepath:Path, delimiter:str):
- filepath = Path(filepath)
- return pd.read_csv(str(filepath), delimiter=delimiter)
-
-# load excel sheet
-def load_excelsheet(filepath:Path, sheet_name:str):
- filepath = Path(filepath)
- return pd.read_excel(str(filepath), sheet_name=sheet_name)
-
-# load google sheet
-def load_gsheet(gsheet_id:str, sheet_name:str):
- base_url = 'https://docs.google.com/spreadsheets/d'
- sheet_csv = 'gviz/tq?tqx=out:csv&sheet='
- url = f'{base_url}/{gsheet_id}/{sheet_csv}{sheet_name}'
- return pd.read_csv(url)
-
-# load mongodb data:
-def load_mongodbdata(host:str, database:str, collection:str):
- client: MongoClient = MongoClient(host)
- db = client[database]
- col = db[collection]
- records = col.find()
- data: List[Dict] = list(records)
- df = pd.DataFrame(data)
- df.drop('_id', axis=1, inplace=True)
- return df
+warnings.filterwarnings("ignore")
+
+
+# Connect to csv:
+class CSV:
+ def __init__(self):
+ pass
+
+ # load csv file:
+ def load_csv(self, filepath: Path, delimiter: str) -> pd.DataFrame:
+ filepath = Path(filepath)
+ return pd.read_csv(filepath, delimiter=delimiter)
+
+ # convert dataframe to csv file:
+ def to_csv(self, data: pd.DataFrame, filepath: Path):
+ filepath = Path(filepath)
+ data.to_csv(filepath, index=False)
+
+
+# Connect to Excel Sheet:
+class Excel:
+ def __init__(self):
+ pass
+
+ # load excel sheet:
+ def load_excelsheet(self, filepath: Path, sheet_name: str) -> pd.DataFrame:
+ filepath = Path(filepath)
+ return pd.read_excel(filepath, sheet_name=sheet_name)
+
+ # convert dataframe to excel sheet:
+ def to_excel(self, data: pd.DataFrame, filepath: Path, sheet_name: str):
+ filepath = Path(filepath)
+ data.to_excel(filepath, sheet_name, index=False)
+
+
+# Connect to Google Sheet:
+class GSheet:
+ def __init__(self):
+ pass
+
+ # load google sheet
+ def load_gsheet(self, gsheet_id: str, sheet_name: str) -> pd.DataFrame:
+ base_url = "https://docs.google.com/spreadsheets/d"
+ sheet_csv = "gviz/tq?tqx=out:csv&sheet="
+ url = f"{base_url}/{gsheet_id}/{sheet_csv}{sheet_name}"
+ return pd.read_csv(url)
+
+
+# Connect to MOngoDB:
+class MongoDB:
+ # initialize the mongo client:
+ def __init__(self, host_url: str):
+ self.mongoclinet: MongoClient = MongoClient(host_url)
+
+ # load data from mongodb:
+ def load_data(self, database: str, collection_name: str) -> pd.DataFrame:
+ database_connect = self.mongoclinet[database]
+ collection = database_connect[collection_name]
+ records = collection.find()
+ data = list(records)
+ df = pd.DataFrame(data)
+ df.drop("_id", axis=1, inplace=True)
+ return df
+
+ # upload data to mongodb:
+ def upload_data(self, database: str, collection_name: str, data: pd.DataFrame):
+ database_connect = self.mongoclinet[database]
+ collection = database_connect[collection_name]
+ data_dict = data.to_dict(orient="records")
+ collection.insert_many(data_dict)
+
+ # upload object to mongodb:
+ def upload_object(
+ self, database: str, collection_name: str, object_name: str, object_
+ ):
+ database_connect = self.mongoclinet[database]
+ collection = database_connect[collection_name]
+ record = {"object_name": object_name, "object": object_}
+ collection.insert_one(record)
+
+ # load object from mongodb:
+ def load_object(self, database: str, collection_name: str, object_name: str):
+ database_connect = self.mongoclinet[database]
+ collection = database_connect[collection_name]
+ object_ = collection.find_one({"obajct_name": object_name})
+ return object_