-
Notifications
You must be signed in to change notification settings - Fork 0
/
0node.py
162 lines (126 loc) · 6.51 KB
/
0node.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import pandas as pd
import numpy as np
import networkx as nx
import os
import re
import logging
logging.basicConfig(level=logging.INFO)
def parse_linestring(linestring):
# Extract coordinates from LINESTRING
coordinates = re.findall(r'LINESTRING \((.*?)\)', linestring)
if coordinates:
return [tuple(map(float, coord.split())) for coord in coordinates[0].split(', ')]
else:
return [] # Return an empty list if no match is found
def create_graph_from_linestrings(df):
G = nx.Graph()
for linestring in df['geometry']:
# Extract coordinates from LINESTRING
linestring = re.findall(r'LINESTRING \((.*?)\)', linestring)[0]
coordinates = [tuple(map(float, coord.split())) for coord in linestring.split(', ')]
# Add nodes and edges to the graph
for i in range(len(coordinates) - 1):
G.add_edge(coordinates[i], coordinates[i + 1])
return G
def create_node_feature_matrix(df):
# Initialize an empty list to store node features
node_features_list = []
# Iterate over each row in the DataFrame
for _, row in df.iterrows():
# Parse the LINESTRING to get coordinates
coordinates = parse_linestring(row['geometry'])
for coord in coordinates:
# Append each coordinate as a dictionary to the list
node_features_list.append({'latitude': coord[0], 'longitude': coord[1]})
# Convert the list of dictionaries to a DataFrame
node_features = pd.DataFrame(node_features_list)
# Drop duplicate rows to ensure each node is unique
node_features = node_features.drop_duplicates().reset_index(drop=True)
return node_features
def center_coordinates(node_features):
# Calculate the geometric center
center_x = node_features['latitude'].mean()
center_y = node_features['longitude'].mean()
# Shift the coordinates to center them
node_features['centered_latitude'] = node_features['latitude'] - center_x
node_features['centered_longitude'] = node_features['longitude'] - center_y
#logging.info(f"Center of nodes shifted to: ({center_x}, {center_y})")
return node_features
def normalize_coordinates(centered_node_features):
max_x = centered_node_features['centered_latitude'].max()
min_x = centered_node_features['centered_latitude'].min()
max_y = centered_node_features['centered_longitude'].max()
min_y = centered_node_features['centered_longitude'].min()
diagonal_length = ((max_x - min_x)**2 + (max_y - min_y)**2)**0.5
centered_node_features['normalized_latitude'] = centered_node_features['centered_latitude'] / diagonal_length
centered_node_features['normalized_longitude'] = centered_node_features['centered_longitude'] / diagonal_length
return centered_node_features
def quantize_coordinates(normalized_node_features):
quantized_node_features = normalized_node_features.copy()
quantized_node_features['quantized_latitude'] = ((normalized_node_features['normalized_latitude'] * 127.5) + 127.5).astype(int)
quantized_node_features['quantized_longitude'] = ((normalized_node_features['normalized_longitude'] * 127.5) + 127.5).astype(int)
return quantized_node_features
def order_and_flatten_nodes(node_features, cseq_file):
# Select only the quantized latitude and longitude
quantized_coords = node_features[['quantized_latitude', 'quantized_longitude']]
# Order nodes by y-coordinate, then by x-coordinate
ordered_coords = quantized_coords.sort_values(by=['quantized_longitude', 'quantized_latitude'])
# Flatten the sequence of coordinates
Cseq = ordered_coords.values.flatten()
print("Totoal Cseq: ", Cseq)
with open(cseq_file, 'w') as file:
file.write(' '.join(map(str, Cseq)) + '\n')
return Cseq
def create_adjacency_matrix(G):
# Initialize the adjacency matrix with zeros
adjacency_matrix = np.zeros((len(G.nodes), len(G.nodes)))
# Populate the matrix
for node1, node2 in G.edges():
index1 = list(G.nodes).index(node1)
index2 = list(G.nodes).index(node2)
adjacency_matrix[index1][index2] = 1
adjacency_matrix[index2][index1] = 1 # For undirected graph
return adjacency_matrix
# New base directory for Cseq data
cseq_base_directory = 'Cseq_Data_Master/'
os.makedirs(cseq_base_directory, exist_ok=True) # Create base directory if it doesn't exist
# Base directory containing the country folders with city CSVs
base_directory = 'Street_Network_Data_Raw/'
# Iterate over each country folder
for country_folder_name in os.listdir(base_directory):
country_folder_path = os.path.join(base_directory, country_folder_name)
# Skip if it's not a directory
if not os.path.isdir(country_folder_path):
continue
# Extract country name and code from folder name
country_name, country_code = country_folder_name.rsplit('_', 1)
# Create a new directory for Cseq data corresponding to this country
country_cseq_directory = os.path.join(cseq_base_directory, country_folder_name)
os.makedirs(country_cseq_directory, exist_ok=True)
# Iterate over each CSV file in the country folder
for filename in os.listdir(country_folder_path):
if filename.endswith(".csv"):
print(f"Processing file: {filename}")
filepath = os.path.join(country_folder_path, filename)
df = pd.read_csv(filepath)
# Create graph from linestrings
G = create_graph_from_linestrings(df)
print(f"Number of nodes in the graph: {G.number_of_nodes()}")
# Create node feature matrix
X = create_node_feature_matrix(df)
#print(X.head())
X_centered = center_coordinates(X)
#print("X_centered head \n", X_centered.head())
X_normalized = normalize_coordinates(X_centered)
#print("X_normalised head \n", X_normalized.head())
X_quantized = quantize_coordinates(X_normalized)
#print("X_quantized head \n", X_quantized.head())
# File path for saving the Cseq
city_name = filename.replace('_street_network.csv', '')
cseq_file = os.path.join(country_cseq_directory, f"{city_name}_cseq.txt")
# Call order_and_flatten_nodes and pass cseq_file instead of using a global variable
Cseq = order_and_flatten_nodes(X_quantized, cseq_file)
print("Length of Cseq of " + filename + ": ", len(Cseq))
print("Cseq first 100: \n", Cseq[:100])
# Create adjacency matrix
A = create_adjacency_matrix(G)