-
Notifications
You must be signed in to change notification settings - Fork 1
/
ab_testing_challenge.py
196 lines (181 loc) · 8.13 KB
/
ab_testing_challenge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
from pywebio.platform.flask import webio_view
from pywebio import STATIC_PATH
from flask import Flask, send_from_directory
from pywebio.input import *
from pywebio.output import *
import argparse
from pywebio import start_server
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd
from scipy.stats import beta
ab_testing_challenge = Flask(__name__)
# defining the bandit set up
class ABBandit:
def __init__(self, number_of_trials=6000, difficulty=5):
# general setup
self.reward_per_win = 50
self.bandits = ['A', 'B', 'C', 'D']
if difficulty > 10:
self.difficulty_rate = 1
elif difficulty < 1:
self.difficulty_rate = 10
else:
self.difficulty_rate = 11-difficulty
self.bandit_indices = {'A': 0,
'B': 1,
'C': 2,
'D': 3}
self.played = {'A': 0,
'B': 0,
'C': 0,
'D': 0}
self.wins = {'A': 0,
'B': 0,
'C': 0,
'D': 0}
self.actual_win_rate = {'A': 0.03,
'B': 0.02,
'C': 0.035,
'D': 0.027}
# update winrates by chosen difficulty
for a in self.bandits:
self.actual_win_rate[a] = self.actual_win_rate[a]*self.difficulty_rate
self.observed_win_rate = {'A': 0.0,
'B': 0.0,
'C': 0.0,
'D': 0.0}
self.money_won = {'A': 0.0,
'B': 0.0,
'C': 0.0,
'D': 0.0}
self.overall_played = 0
self.overall_wins = 0
self.overall_winrate = 0
self.overall_money_won = 0
self.number_of_trials = number_of_trials
self.games_left = self.number_of_trials
# prior/posterior believes
self.pri_post_a = {'A': 1,
'B': 1,
'C': 1,
'D': 1}
self.pri_post_b = {'A': 1,
'B': 1,
'C': 1,
'D': 1}
def sample(self, bandit):
return np.random.beta(self.pri_post_a[bandit], self.pri_post_b[bandit])
def plot_posteriors(self):
x = np.linspace(0, 1, 200)
fig = go.Figure()
for bandit in self.bandits:
y = beta.pdf(x, self.pri_post_a[bandit], self.pri_post_b[bandit])
fig.add_trace(go.Scatter(x=x, y=y,
mode='lines',
name=bandit))
fig.update_layout(title_text='Posterior distributions of bandit win rates')
html = fig.to_html(include_plotlyjs="require", full_html=False)
return html
def pull_arm(self, bandit, rounds, mode='Human'):
if mode == 'Thompson sampling':
# Thompson sampling
bandit_sample_probs = [self.sample(b) for b in self.bandits]
bandits_prob = np.argmax(bandit_sample_probs)
bandit = self.bandits[bandits_prob]
bandit_index = self.bandit_indices[bandit]
else:
pass
for i in range(rounds):
if self.games_left > 0:
random_chance = np.random.rand()
self.played[bandit] += 1
self.overall_played += 1
if random_chance < self.actual_win_rate[bandit]:
self.wins[bandit] += 1
self.overall_wins += 1
self.pri_post_a[bandit] += 1
else:
self.pri_post_b[bandit] += 1
self.observed_win_rate[bandit] = self.wins[bandit] / self.played[bandit]
self.money_won[bandit] = self.wins[bandit] * self.reward_per_win
self.overall_winrate = self.overall_wins / self.overall_played
self.overall_money_won = self.overall_wins * self.reward_per_win
self.games_left = self.number_of_trials - self.overall_played
else:
pass
# update df_overview
core_data = {'Bandits': list(self.bandits),
'Played': list(self.played.values()),
'Wins': list(self.wins.values()),
'Winrate': list(self.observed_win_rate.values())
}
df_overview = pd.DataFrame(core_data, columns=['Bandits', 'Played', 'Wins', 'Winrate'])
return df_overview
def generate_bar_charts(df, x_axis, y_axis, title):
fig = px.bar(df, x=x_axis, y=y_axis,
color=y_axis,
height=400)
fig.update_layout(title_text=title)
html = fig.to_html(include_plotlyjs="require", full_html=False)
return put_html(html).send()
def run_experiment():
# instantiate bandits
random_seed = input("Set random seed (any integer)", value='1', type=NUMBER)
chosen_difficulty = input("Set a difficulty (easy:1, hard: 10)", value='1', type=NUMBER)
np.random.seed(random_seed)
number_of_trials = input("What shall be the max. no. of rounds?", value='6000', type=NUMBER)
bandit_challenge = ABBandit(number_of_trials=number_of_trials, difficulty=chosen_difficulty)
add_more = True
while add_more:
add_more = actions(label="Which bandit do you chose?",
buttons=[{'label': 'A', 'value': 'A'},
{'label': 'B', 'value': 'B'},
{'label': 'C', 'value': 'C'},
{'label': 'D', 'value': 'D'},
{'label': 'Thompson sampling', 'value': 'Thompson sampling'}])
if add_more == 'Thompson sampling':
rounds = 1
for i in range(bandit_challenge.games_left):
df_overview = bandit_challenge.pull_arm(add_more, rounds, mode='Thompson sampling')
fig = bandit_challenge.plot_posteriors()
put_html(fig).send()
else:
rounds = input("How many rounds do you want to play?", value='1', type=NUMBER)
df_overview = bandit_challenge.pull_arm(add_more, rounds, mode='Human')
# show total money won
fig = go.Figure(go.Indicator(
mode="number",
value=bandit_challenge.overall_money_won,
domain={'x': [0.1, 1], 'y': [0.2, 0.9]},
title={'text': "Money won so far:"}))
html = fig.to_html(include_plotlyjs="require", full_html=False)
put_html(html).send()
# show progress and rounds left
fig = go.Figure(go.Indicator(
mode="number+gauge+delta",
gauge={'shape': "bullet"},
delta={'reference': number_of_trials},
value=df_overview['Played'].sum(),
domain={'x': [0.1, 1], 'y': [0.2, 0.9]},
title={'text': "No. played"}))
html = fig.to_html(include_plotlyjs="require", full_html=False)
put_html(html).send()
generate_bar_charts(df_overview, 'Bandits', 'Played', 'Rounds played')
generate_bar_charts(df_overview, 'Bandits', 'Winrate', 'Overview of winrates')
continue_button = actions(label="Do you want to continue",
buttons=[{'label': 'Yes', 'value': True},
{'label': 'No', 'value': False}])
if continue_button:
clear()
else:
clear()
break
ab_testing_challenge.add_url_rule('/tool', 'webio_view', webio_view(run_experiment),
methods=['GET', 'POST', 'OPTIONS'])
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--port", type=int, default=8080)
args = parser.parse_args()
start_server(run_experiment, port=args.port)