-
Notifications
You must be signed in to change notification settings - Fork 0
/
operations_dash.py
293 lines (242 loc) · 9.82 KB
/
operations_dash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px
import time
import base64
# Load the dataset
df = pd.read_csv('operations.csv')
# Data preprocessing
df.drop(columns=['Target ID', 'Source ID', 'Unit ID'], inplace=True)
df.dropna(subset=['Country'], inplace=True)
df['Mission Date'] = pd.to_datetime(df['Mission Date'])
df['Year'] = df['Mission Date'].dt.year
# Filter the DataFrame to include only columns with more than 50000 values
print('Filtering the DataFrame to include only columns with more than 50000 values...')
useful_columns = df.columns[len(df) - df.isnull().sum() > 50000]
df = df[useful_columns]
# Configure image
image_filename = 'PhotoofdevastatedDresden.jpeg'
encoded_image = base64.b64encode(open(image_filename, 'rb').read())
def remove_outliers(df, column_names):
df_outlier_removed = df.copy()
for column_name in column_names:
Q1 = df_outlier_removed[column_name].quantile(0.25)
Q3 = df_outlier_removed[column_name].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df_outlier_removed = df_outlier_removed[
(df_outlier_removed[column_name] >= lower_bound) & (df_outlier_removed[column_name] <= upper_bound)]
return df_outlier_removed
# Specify the numerical features for outlier removal
numerical_features = df.select_dtypes(include='float64').columns
# Remove outliers for all numerical features
df_all_outlier_removed = remove_outliers(df, numerical_features)
# Initialize the Dash app
app = dash.Dash(__name__)
# Define the layout of the dashboard
app.layout = html.Div([
html.H1("World War II Aerial Bombing Operations Dashboard"),
html.Img(src='data:image/png;base64,{}'.format(encoded_image.decode()),
style={'width': '50%'}),
dcc.Tabs([
dcc.Tab(label='Count Plots', children=[
html.H2("Count Plots for Categorical Features"),
html.H3("Select Feature for Count Plot"),
# Dropdown for selecting features for count plots
dcc.Dropdown(
id='count-plot-features-dropdown',
options=[{'label': feature, 'value': feature}
for feature in df.select_dtypes(include='object').columns],
value=df.select_dtypes(include='object').columns[2],
multi=False,
style={'width': '50%'}
),
html.Br(),
html.Label("Filter by Year Range"),
dcc.RangeSlider(
id='year-slider',
min=df['Year'].min(),
max=df['Year'].max(),
step=1,
marks={str(year): str(year) for year in range(df['Year'].min(), df['Year'].max() + 1)},
value=[df['Year'].min(), df['Year'].max()]
),
html.Label(id='selected-year-label'),
html.Br(),
dcc.Loading(
id="loading-1",
type="default",
children=[
# Graph components to display the count plots
html.Div(id="loading-output-1"),
dcc.Graph(id='count-plots'),
dcc.Graph(id='pie-plot'),
]),
]),
dcc.Tab(label='Trends', children=[html.H2("Trends in Explosives Weights Over Time for Top Countries"),
html.H3("Select Country"),
# Dropdown for selecting a country
dcc.RadioItems(
id='country-dropdown',
options=[{'label': country, 'value': country}
for country in ['USA', 'GREAT BRITAIN', 'NEW ZEALAND']],
value=df['Country'].unique()[0],
style={'width': '50%'}
),
html.H3("Select Features"),
# Checklist for selecting features to display
dcc.Checklist(
id='feature-checklist',
options=[
{'label': 'Total Weight (Tons)',
'value': 'Total Weight (Tons)'},
{'label': 'High Explosives Weight (Tons)',
'value': 'High Explosives Weight (Tons)'}
],
value=['Total Weight (Tons)'],
inline=True
),
html.Br(),
# Graph component to display the selected data
dcc.Graph(id='line-plot'),
# Text area for displaying additional information
dcc.Textarea(
id='info-text',
value='',
readOnly=True,
style={'width': '100%', 'height': '100px'}
),
]),
dcc.Tab(label='Scatter Plots', children=[html.H2("Regression Plots for Top Countries"),
html.H3("Select Countries"),
dcc.Dropdown(
id='country-dropdown-2',
options=[{'label': country, 'value': country} for country in ['USA', 'GREAT BRITAIN', 'NEW ZEALAND']],
value=df['Country'].unique()[0:2],
multi=True,
style={'width': '50%'}
),
html.H3("Select Features for Scatter Plot"),
# Dropdown for selecting X feature
dcc.Dropdown(
id='x-feature-dropdown',
options=[{'label': feature, 'value': feature} for feature in numerical_features],
value='Target Latitude',
style={'width': '50%'}
),
# Dropdown for selecting Y feature
dcc.Dropdown(
id='y-feature-dropdown',
options=[{'label': feature, 'value': feature} for feature in numerical_features],
value='Target Longitude',
style={'width': '50%'}
),
html.Br(),
# Add a Slider for picking a specific year
html.Label("Filter by a Specific Year"),
dcc.Slider(
id='year-picker-slider',
min=df['Year'].min(),
max=df['Year'].max(),
step=1,
marks={str(year): str(year) for year in range(df['Year'].min(), df['Year'].max() + 1)},
value=df['Year'].max()-2,
),
html.Br(),
# Graph component to display the selected data
dcc.Graph(id='scatter-plot')
]),
]),
])
# Define callback to update the count plots based on user inputs
@app.callback(
[
Output('count-plots', 'figure'),
Output('pie-plot', 'figure') # Output for the pie plot
],
[
Input('count-plot-features-dropdown', 'value'),
Input('year-slider', 'value') # Include year slider as an input
]
)
def update_count_plots(selected_count_plot_feature, selected_years):
filtered_df = df_all_outlier_removed[
(df_all_outlier_removed['Year'] >= selected_years[0]) & (df_all_outlier_removed['Year'] <= selected_years[1])
]
# Create count plots using Plotly Express
fig_count_plots = px.histogram(
filtered_df,
x=selected_count_plot_feature,
title=f'Count Plots for {selected_count_plot_feature} ({selected_years[0]} - {selected_years[1]})',
labels={'value': 'Count'},
template='plotly_dark'
)
# Create pie plot
fig_pie_plot = px.pie(
filtered_df,
names=selected_count_plot_feature,
title=f'Pie Plot for {selected_count_plot_feature} ({selected_years[0]} - {selected_years[1]})',
template='plotly_dark'
)
return fig_count_plots, fig_pie_plot
# Define callback to update the graph based on user inputs
@app.callback(
[Output('line-plot', 'figure'),
Output('info-text', 'value')],
[Input('country-dropdown', 'value'),
Input('feature-checklist', 'value')]
)
def update_line_plot(selected_country, selected_features):
# Filter data based on selected country
filtered_df = df_all_outlier_removed[df_all_outlier_removed['Country'] == selected_country]
# Group by 'Year' and calculate the mean for selected features
grouped_df = filtered_df.groupby('Year')[selected_features].mean().reset_index()
# Create line plot using Plotly Express
fig = px.line(
grouped_df,
x='Year',
y=selected_features,
title=f'Mean {", ".join(selected_features)} for {selected_country}',
labels={'Year': 'Year', 'value': f'Mean {", ".join(selected_features)}'},
template='plotly_dark'
)
# Prepare information text
info_text = f"Selected Country: {selected_country}\nSelected Features: {', '.join(selected_features)}"
return fig, info_text
# Define callback to update the regression plot based on user inputs
@app.callback(
Output('scatter-plot', 'figure'),
[
Input('country-dropdown-2', 'value'),
Input('x-feature-dropdown', 'value'),
Input('y-feature-dropdown', 'value'),
Input('year-picker-slider', 'value') # Add the new slider input
]
)
def update_scatter_plot(selected_countries, x_feature, y_feature, selected_year):
# Filter data based on selected countries and year
filtered_df_scatter = df_all_outlier_removed[
(df_all_outlier_removed['Country'].isin(selected_countries)) &
(df_all_outlier_removed['Year'] == selected_year)
]
# Create scatter plot using Plotly Express
fig_scatter = px.scatter(
filtered_df_scatter,
x=x_feature,
y=y_feature,
color='Country',
trendline="ols",
title=f'Scatter Plot for {y_feature} vs {x_feature} for Selected Countries in {selected_year}',
template='plotly_dark'
)
return fig_scatter
@app.callback(Output("loading-output-1", "children"), Input("year-slider", "value"))
def input_triggers_spinner(value):
time.sleep(1)
return None
# Run the app
if __name__ == '__main__':
app.run_server(debug=True)