-
Notifications
You must be signed in to change notification settings - Fork 6
/
station_names.py
98 lines (81 loc) · 3.81 KB
/
station_names.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import pickle
import pandas as pd
import numpy as np
from ftplib import FTP
def get_station_names():
""" Return a dictonary containing the station ID's
as keys and the list [Stations_id, von_datum, bis_datum, Stationshoehe,
geoBreite, geoLaenge, Stationsname, Bundesland] and these column names in a
list as value.
"""
class Station:
def __init__(self, Stations_id, von_datum, bis_datum, Stationshoehe,
geoBreite, geoLaenge, Stationsname, Bundesland):
self.Stations_id = Stations_id
self.von_datum = von_datum
self.bis_datum = bis_datum
self.Stationshoehe = Stationshoehe
self.geoBreite = geoBreite
self.geoLaenge = geoLaenge
self.Stationsname = Stationsname
self.Bundesland = Bundesland
# Download the file from the internet
server = "ftp-cdc.dwd.de"
ftp = FTP(server)
ftp.login()
# Create a file stations.txt with all the station information
filename = "pub/CDC/observations_germany/climate/daily/kl/historical/KL_Tageswerte_Beschreibung_Stationen.txt"
file = open("stations.txt", 'wb')
ftp.retrbinary('RETR '+ filename, file.write)
file.close()
fileorigin = open("stations.txt", 'r', encoding = "cp1250")
partial_read_in = False
Stations = {}
stations_dict = {}
for lineid, line in enumerate(fileorigin):
if lineid > 1:
line_vec = list(filter(None, line.split(' ')))
Stations[line_vec[0]] = Station(line_vec[0],line_vec[1],
line_vec[2], line_vec[3],
line_vec[4], line_vec[5],
line_vec[6:-1], line_vec[-1])
stations_dict[Stations[line_vec[0]].Stations_id] =\
[Stations[line_vec[0]].von_datum,
Stations[line_vec[0]].bis_datum,
Stations[line_vec[0]].Stationshoehe,
Stations[line_vec[0]].geoBreite,
Stations[line_vec[0]].geoLaenge,
Stations[line_vec[0]].Stationsname,
Stations[line_vec[0]].Bundesland]
elif lineid == 0:
column_names = list(filter(None, line.split(' ')))
column_names = [c.strip().lower() for c in column_names]
print("number of stations loaded: ", len(Stations))
fileorigin.close()
# Join town names that are made up of multiple strings
for key in stations_dict:
if len(stations_dict[key][5]) > 2:
town_name = stations_dict[key][5][0:-1]
land_name = stations_dict[key][5][-1]
joint_name = " ".join(town_name)
stations_dict[key][5] = joint_name
stations_dict[key][6] = land_name
else:
stations_dict[key][6] = stations_dict[key][5][1]
stations_dict[key][5] = stations_dict[key][5][0]
return stations_dict, column_names
def get_stations_dataframe():
'Return stations in a pandas DataFrame'
stations_dict, column_names = get_station_names()
stations_pd = pd.DataFrame.from_dict(stations_dict, orient = 'index')
stations_pd = stations_pd.reset_index()
stations_pd.set_axis(column_names, axis = 'columns', inplace = True)
stations_pd['stations_id'] = stations_pd['stations_id'].apply(int)
stations_pd = stations_pd.set_index('stations_id')
stations_pd['von_datum'] = pd.to_datetime(stations_pd['von_datum'])
stations_pd['bis_datum'] = pd.to_datetime(stations_pd['bis_datum'])
return stations_pd
if __name__ == '__main__':
stations_dict = get_station_names()
pickle.dump(stations_dict, open("stations.p", "wb"))