company

#!/Users/proffapt/.pyenv/shims/python

import os
import json
import erpcreds
import requests
import argparse
import webbrowser
from tabulate import tabulate
from datetime import datetime
import iitkgp_erp_login.erp as erp
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup as bs

# Global variables and objects
companies_file = f"{os.path.dirname(__file__)}/companies.json"
TPSTUDENT_URL = "https://erp.iitkgp.ac.in/TrainingPlacementSSO/TPStudent.jsp"
COMPANIES_URL = "https://erp.iitkgp.ac.in/TrainingPlacementSSO/ERPMonitoring.htm?action=fetchData&jqqueryid=37&_search=false&nd=1448725351715&rows=20&page=1&sidx=&sord=asc&totalrows=50"
HEADERS = {
    "timeout": "20",
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/51.0.2704.79 Chrome/51.0.2704.79 Safari/537.36",
}
session = requests.Session()


def parse_args():
    parser = argparse.ArgumentParser(
        description="Fetch companies, queried based on their status from CDC portal"
    )
    parser.add_argument(
        "-o",
        "--open",
        action="store_true",
        help="Query for companies currently open for CV submission",
    )
    parser.add_argument(
        "-a",
        "--applied",
        action="store_true",
        help="Query for companies you have applied to",
    )
    parser.add_argument(
        "-aa",
        "--applied-available",
        action="store_true",
        help="Query for companies you have applied to and they are still opoen",
    )
    return parser.parse_args()


# Maximum length for each field
MAX_NAME_LENGTH = 30
MAX_ROLE_LENGTH = 40
MAX_SALARY_LENGTH = 12
MAX_DATE_LENGTH = 20


def limit_word_length(text, max_length):
    if text and len(text) > max_length:
        return text[:max_length] + "..."  # Truncate and add "..."
    return text


def create_hyperlink(label, url):
    return f"\033]8;;{url}\033\\{label}\033]8;;\033\\"


def applied(companies):
    print(">> [Listing Applied Companies]")
    table_data = [
        [
            limit_word_length(company["Name"], MAX_NAME_LENGTH),
            create_hyperlink(
                limit_word_length(company["Role"], MAX_ROLE_LENGTH),
                company["Job_Description"],
            ),
            limit_word_length(company["CTC"], MAX_SALARY_LENGTH),
            limit_word_length(company["Interview_Date"], MAX_DATE_LENGTH),
        ]
        for company in companies
        if company["Application_Status"] == "Y"
    ]

    headers = ["Company Name", "Role", "Salary", "Interview Date"]
    print(tabulate(table_data, headers=headers, tablefmt="fancy_grid"))


def exam(companies):
    print(">> [Listing Applied Companies with Remaining Interviews]")
    current_time = datetime.now()

    table_data = []
    for company in companies:
        interview_date = None
        if company["Interview_Date"]:
            try:
                interview_date = datetime.strptime(
                    company["Interview_Date"], "%d-%m-%Y %H:%M"
                )
            except ValueError:
                # Handle incorrect date formats, you can log or raise an error if needed
                continue

        # Add rows where either interview date is None or current_time < interview_date
        if company["Application_Status"] == "Y" and (
            interview_date is None or current_time < interview_date
        ):
            table_data.append([
                limit_word_length(company["Name"], MAX_NAME_LENGTH),
                create_hyperlink(
                    limit_word_length(company["Role"], MAX_ROLE_LENGTH),
                    company.get("Job_Description", ""),
                ),
                limit_word_length(company["CTC"], MAX_SALARY_LENGTH),
                limit_word_length(company["Interview_Date"], MAX_DATE_LENGTH),
            ])

    headers = ["Company Name", "Role", "Salary", "Interview Date"]
    if table_data:
        print(tabulate(table_data, headers=headers, tablefmt="fancy_grid"))
    else:
        print("No companies with remaining interviews.")


def _open(companies):
    print(">> [Listing Open Companies]")
    current_time = datetime.now()

    table_data = []
    links = []  # To store full URLs with their indices

    for i, company in enumerate(companies):
        try:
            deadline = datetime.strptime(company["End_Date"], "%d-%m-%Y %H:%M")
        except ValueError:
            continue

        if current_time < deadline:
            name = limit_word_length(company["Name"], MAX_NAME_LENGTH)
            role = limit_word_length(company["Role"], MAX_ROLE_LENGTH)

            apply_link = "APPLIED"
            if company["Application_Status"] == "N":
                apply_link = f"[Link {i + 1}]"

            table_data.append([
                name,
                create_hyperlink(role, company["Job_Description"]),
                apply_link,
            ])
            if apply_link != "APPLIED":
                links.append(f"{apply_link}: {company['Apply_Link']}")

    headers = ["Company Name", "Role", "Apply Link"]
    if table_data:
        print(tabulate(table_data, headers=headers, tablefmt="fancy_grid"))
        if links:
            print("\nFull Links:")
            for link in links:
                print(link)
            print()
    else:
        print("No companies with active deadlines.")


def get_ctc_with_currency(jd_url):
    jd_response = session.get(jd_url, headers=HEADERS)
    html_content = jd_response.text.strip()
    soup = bs(html_content, "html.parser")

    row = soup.find_all("tr")[-1]
    column = row.find_all("td")[-1]
    ctc = column.text

    return ctc


# Downloads pdf content in bytes format
# And checks if it is empty or has content
## Not used currently
def parse_link(link):
    stream = session.get(link, stream=True)
    attachment = b''
    for chunk in stream.iter_content(4096):
        attachment += chunk
    
    return link if attachment else None


def main():
    args = parse_args()  # Arguments parser

    # Getting existing companies
    companies = []
    if os.path.exists(companies_file):
        with open(companies_file, "r") as json_file:
            companies = json.load(json_file)

    print(">> [Syncing with ERP]")
    _, ssoToken = erp.login(
        HEADERS,
        session,
        ERPCREDS=erpcreds,
        OTP_CHECK_INTERVAL=2,
        SESSION_STORAGE_FILE=".erp_session",
    )

    session.post(
        TPSTUDENT_URL,
        data=dict(ssoToken=ssoToken, menu_id=11, module_id=26),
        headers=HEADERS,
    )

    # Open CDC Portal
    cdc_url = f"{TPSTUDENT_URL}?ssoToken={ssoToken}"
    webbrowser.open(cdc_url)

    r = session.get(COMPANIES_URL, headers=HEADERS)

    soup = bs(r.text, features="xml")
    xml_string = soup.prettify()
    xml_encoded = xml_string.encode("utf-8")
    root = ET.fromstring(xml_encoded)

    table_data = []
    links = []  # To store full URLs with their indices
    for i, row in enumerate(root.findall("row")):
        jd_args = row.find("cell[4]").text.split("'")[5].split('"')
        jnf_id, com_id, year = jd_args[1], jd_args[3], jd_args[5]

        # Links
        company_details = f"https://erp.iitkgp.ac.in/TrainingPlacementSSO/TPComView.jsp?yop={year}&com_id={com_id}&user_type=SU"
        company_additional_details = f"https://erp.iitkgp.ac.in/TrainingPlacementSSO/AdmFilePDF.htm?type=COM&year={year}&com_id={com_id}"
        ppt = f"https://erp.iitkgp.ac.in/TrainingPlacementSSO/AdmFilePDF.htm?type=PPT&year={year}&com_id={com_id}"
        jd = f"https://erp.iitkgp.ac.in/TrainingPlacementSSO/TPJNFView.jsp?jnf_id={jnf_id}&com_id={com_id}&yop={year}&user_type=SU&rollno={erpcreds.ROLL_NUMBER}"
        apply_link = f"https://erp.iitkgp.ac.in/TrainingPlacementSSO/TPJNFViewAction.jsp?jnf_id={jnf_id}&com_id={com_id}&year={year}&rollno={erpcreds.ROLL_NUMBER}&mode=ApplyCV1"
        additional_jd = f"https://erp.iitkgp.ac.in/TrainingPlacementSSO/JnfMoreDet.jsp?mode=jnfMoreDet&rollno={erpcreds.ROLL_NUMBER}&year={year}&com_id={com_id}&jnf_id={jnf_id}"
        form_additional_details = f"https://erp.iitkgp.ac.in/TrainingPlacementSSO/AdmFilePDF.htm?type=JNF&year={year}&jnf_id={jnf_id}&com_id={com_id}"

        company_info = {
            "Name": row.find("cell[1]").text.split(">")[1].split("<")[0].strip(),
            "Company_Details": company_details,
            "Company_Additional_Details": company_additional_details,
            "PPT": ppt,
            "Role": row.find("cell[4]").text.split("'")[1].strip(),
            "Job_Description": jd,
            "Apply_Link": apply_link,
            "Additional_Job_Description": additional_jd,
            "CTC": get_ctc_with_currency(additional_jd),
            "Form_Additional_Details": form_additional_details,
            "Application_Status": row.find("cell[9]").text.strip() if row.find("cell[9]").text.strip() else "N",
            "Start_Date": row.find("cell[10]").text.strip(),
            "End_Date": row.find("cell[11]").text.strip(),
            "Interview_Date": row.find("cell[12]").text.strip() if row.find("cell[12]").text.strip() else None,
        }

        job_desc_exists = False
        # Update Application Status and End Date with latest data in the stored database
        # These are dynamic, might change from one run to another based on what I do
        for comp in companies:
            if comp["Job_Description"] == company_info["Job_Description"]:
                comp["Application_Status"] = company_info["Application_Status"]
                comp["End_Date"] = company_info["End_Date"]
                comp["Interview_Date"] = company_info["Interview_Date"]
                job_desc_exists = True
                break

        if not job_desc_exists:
            name = limit_word_length(company_info["Name"], MAX_NAME_LENGTH)
            role = limit_word_length(company_info["Role"], MAX_ROLE_LENGTH)
            apply_link = f"[Link {i + 1}]"

            table_data.append([
                name,
                create_hyperlink(role, company_info["Job_Description"]),
                apply_link,
            ])
            links.append(f"{apply_link}: {company_info['Apply_Link']}")

            companies.append(company_info)

    print(">> [Listing Newly Opened Companies]")
    headers = ["Company Name", "Role", "Apply Link"]
    if table_data:
        print(tabulate(table_data, headers=headers, tablefmt="fancy_grid"))
        if links:
            print("\nFull Links:")
            for link in links:
                print(link)
            print()
    else:
        print("No new companies came since last time.")

    # Update / Sync Interview Date with latest data in the stored database
    for company in companies:
        if not company["Interview_Date"]:
            for c in companies:
                if c["Name"] == company["Name"] and c["Interview_Date"]:
                    company["Interview_Date"] = c["Interview_Date"]
                    break

    # Open and Applied queries
    if args.open:
        _open(companies)
    if args.applied:
        applied(companies)
    if args.applied_available:
        exam(companies)

    print(">> [Updating localDB]")
    with open(companies_file, "w") as json_file:
        json.dump(companies, json_file, indent=2)


if __name__ == "__main__":
    main()