Skip to content

Commit

Permalink
Added DB integration
Browse files Browse the repository at this point in the history
  • Loading branch information
mkovalyshev committed Nov 5, 2021
1 parent d622aa8 commit c78f09b
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 57 deletions.
3 changes: 1 addition & 2 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 16 additions & 17 deletions functions.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
# from typing import Optional
import requests
import datetime as dt
from bs4 import BeautifulSoup
import json
import os
import yaml
from sqlalchemy import create_engine
import pandas as pd

with open('config.yaml') as f:
CONFIG = yaml.load(f, Loader=yaml.FullLoader)

# YESTERDAY = (dt.datetime.today() - dt.timedelta(days=1)).strftime('%Y-%m-%d')
YESTERDAY = '2021-10-26'
YESTERDAY = (dt.datetime.today() - dt.timedelta(days=1)).strftime('%Y-%m-%d')
HOST = 'https://www.bustime.ru'
HEADERS = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,'
Expand All @@ -31,7 +27,9 @@ def get_cities(url: str = 'https://www.bustime.ru') -> dict:
returns dict or None
"""

cities = pg_engine.execute("select * from bustime.cities").fetchall()
cities = pg_engine.execute("""select * from bustime.cities
where id in (7,21,28,46,54,5573,80,101,109,120,123,132,136,143)""").\
fetchall()

if len(cities) != 0:
return dict(tuple([tuple([x[1], x[0]]) for x in cities]))
Expand Down Expand Up @@ -70,7 +68,7 @@ def get_routes(city: str, cities_dict: dict) -> list:
return [routes_dict[i] for i in routes_dict.keys()]

else:
soup = BeautifulSoup(requests.get(HOST + '/' + city + '/' + 'transport/' + TODAY).text, features="html.parser")
soup = BeautifulSoup(requests.get(HOST + '/' + city + '/' + 'transport/' + YESTERDAY).text, features="html.parser")
routes = {int(x.get('value')): x.text for x in soup.find('select', {'name': 'bus_id'}). \
find_all('option') if x.get('value') != '0'}

Expand All @@ -88,7 +86,7 @@ def get_routes(city: str, cities_dict: dict) -> list:
return [routes_dict[i] for i in routes_dict.keys()]


def post_ajax(city: str, bus_id: str = 0, date: str = YESTERDAY) -> None:
def get_telemetry(city: str, bus_id: str = 0, date: str = YESTERDAY) -> None:
"""
gets telemetry data from bustime.ru
loads to database
Expand All @@ -104,12 +102,13 @@ def post_ajax(city: str, bus_id: str = 0, date: str = YESTERDAY) -> None:
'day': date}

response_df = pd.DataFrame(requests.post(HOST + '/ajax/transport/', data=data).json())
response_df['timestamp'] = date + ' ' + response_df['timestamp']
response_df['timestamp'] = pd.to_datetime(response_df['timestamp'])
response_df['upload_date'] = datetime.datetime.today()

response_df.to_sql('telemetry',
pg_engine,
schema='bustime',
if_exists='append',
index=False)
if len(response_df)!=0:
response_df['timestamp'] = date + ' ' + response_df['timestamp']
response_df['timestamp'] = pd.to_datetime(response_df['timestamp'])
response_df['upload_date'] = dt.datetime.today()

response_df.to_sql(f'telemetry_{YESTERDAY.replace("-", "_")}',
pg_engine,
schema='bustime',
if_exists='append',
index=False)
79 changes: 41 additions & 38 deletions parser.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,45 @@
import os

from functions import *
from tqdm import tqdm

print("Start") # LOG
print(TODAY)

print("Loading cities.txt")
if os.path.exists('resources/cities.txt'):
print('LOG: found local file')
with open('resources/cities.txt') as f:
cities = f.read().split(';')
else:
print('LOG: fetching from bustime.ru')
get_cities()
with open('resources/cities.txt') as f:
cities = f.read().split(';')

print(f'LOG: fetched {len(cities)} cities')


print('Getting route lists')
for city in tqdm(cities):
if not os.path.exists('resources'+city+'routes.json'):
get_routes(city)
print('Success\n\n')

print('Getting telemetry')
for city in cities:
with open('resources'+city+'routes.json', encoding='utf-8') as f:
routes = json.load(f)

print('\t', city)

for route in routes.keys(): # Move with clause to function?
directory = 'resources' + city + TODAY
if not os.path.exists(directory):
os.mkdir(directory)
with open(directory+'/'+route+'.json', 'w', encoding='utf-8') as file:
response = post_ajax(city, route, TODAY)
json.dump(response, file, ensure_ascii=False)
print(YESTERDAY)

print("Creating table")

pg_engine.execute(f"""
-- DROP TABLE bustime.telemetry_{YESTERDAY.replace('-', '_')};
CREATE TABLE bustime.telemetry_{YESTERDAY.replace('-', '_')} (
uniqueid varchar(8) NOT NULL,
"timestamp" timestamp NOT NULL,
bus_id int4 NOT NULL,
heading int4 NULL,
speed int4 NULL,
lon float8 NOT NULL,
lat float8 NOT NULL,
direction int4 NULL,
gosnum varchar(64) NULL,
bortnum varchar(64) NULL,
probeg int4 NULL,
upload_date timestamp NOT NULL
);
-- bustime.telemetry_{YESTERDAY.replace('-', '_')} foreign keys
ALTER TABLE bustime.telemetry_{YESTERDAY.replace('-', '_')} ADD CONSTRAINT telemetry_fk FOREIGN KEY (bus_id) REFERENCES bustime.routes(id);
""")

print('Loading cities')

cities = get_cities()

print(f'LOG: fetched {len(list(cities.keys()))} cities')

for city in cities.keys():
print(city)
routes = get_routes(city, cities)

for route in tqdm(routes):
get_telemetry(city, route['id'])

0 comments on commit c78f09b

Please sign in to comment.