-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1_top_paying_companies.py
49 lines (43 loc) · 1.46 KB
/
1_top_paying_companies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
from dotenv import load_dotenv
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
# Load environment variables
load_dotenv(dotenv_path='e:/VS Code Projects/SQL_Project_Data_Jobs_Analysis/project_python/.env')
# Get the environment variables
db_user = os.getenv('DB_USER')
db_pass = os.getenv('DB_PASS')
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')
db_name = os.getenv('DB_NAME')
# Establish a connection to the database
engine = create_engine(f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}")
# Define your SQL query
sql_query = """
SELECT job_id,
job_title,
job_location,
job_schedule_type,
salary_year_avg,
job_posted_date,
name AS company_name
FROM job_postings_fact
LEFT JOIN company_dim ON job_postings_fact.company_id = company_dim.company_id
WHERE job_title_short = 'Data Analyst'
AND job_location = 'Canada'
AND salary_year_avg IS NOT NULL
ORDER BY salary_year_avg DESC
LIMIT 10;
"""
# Execute the query and fetch the result into a pandas DataFrame
df = pd.read_sql_query(sql_query, engine)
# Plot the data
plt.figure(figsize=(10, 6))
colors = ['blue', 'green', 'red', 'orange', 'purple', 'yellow', 'pink', 'brown', 'gray', 'cyan']
plt.barh(df['company_name'], df['salary_year_avg'], color=colors, edgecolor='black')
plt.xlabel('Average Salary')
plt.ylabel('Company')
plt.title('Top Paying Companies for Data Analysts in Canada')
plt.gca().invert_yaxis()
plt.show()