-
Notifications
You must be signed in to change notification settings - Fork 1
/
fake-data-gen-with-availability.R
128 lines (93 loc) · 3.9 KB
/
fake-data-gen-with-availability.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
##########################################################################
##
## Aggregate latenat classs logit
## By Minha Hwang
## Synthetic data generation (to test estimation code)
## 8/25, 2016
##
##########################################################################
# install.packages("maxLik")
# library(maxLik)
# Step 1: Set up a list of product attributes and levels
attrib <- list(brand = c("1", "2", "3", "4"))
#pack = c("6pack", "12pack", "24pack"),
pricelevels = c(2, 3, 4, 5, 6)
availability = matrix(c(1,1,1,0,
1,1,0,1,
1,0,1,1,
0,1,1,1), nrow=4,ncol=4,byrow=TRUE)
profiles <- expand.grid(attrib)
#nrow(profiles)
profiles.coded <- model.matrix(~ brand,data=profiles)[,-1]
# Step 2: Assign demand parameters
coef.names <- NULL
for (a in 1) {
coef.names <- c(coef.names,
paste(names(attrib)[a], attrib[[a]][-1], sep=""))
}
coef.names <- c(coef.names,"price")
beta1 <- c(1.5,0.5,0.1,-0.5)
beta2 <- c(0.5,0.5,0.5,-3.0)
phi <- 0.3
names(beta1) <-coef.names
names(beta2) <-coef.names
names(phi) <- "seg1size"
# Step 3: Initialize required parameters
nWk <- 500
nProd <- 4
nPar <- nProd + 1 - 1
nInd <- 500
Ind.id <- 1:nInd # Generate Individual IDs
Ind.seg <- sample(c(1,2),size=length(Ind.id), replace = TRUE, prob = c(phi,(1 - phi))) # Assign segment membership
# Assign demand parameters for each individual based on segment membership
coefs <- matrix(,nrow=nInd,ncol=nPar)
for (i in Ind.id) {
if (Ind.seg[i] == 1) coefs[i,] <- beta1
else coefs[i,] <- beta2
}
# coefsf <- matrix(rep(coefs,each=nProd),nrow=nInd*nProd,ncol=nPar)
# Set product availability
# ava <- ones(nProd,nWk)
# Step 4: Generate required keys
wk.id <- 1:nWk
#Ind.idf <- cbind(rep(Ind.id,nWk))
#seg <- cbind(rep(Ind.seg,nWk))
#productd <- cbind(rep())
# Step 5: Generate simulated dataset - in a "long" format
lcl.df <- data.frame(NULL)
for (m in seq_along(wk.id)) {
price.m <- sample(pricelevels, size=nProd)
availability.m <- availability[sample(nrow(availability),size=1),]
profiles.f <- cbind(profiles.coded,price.m)
utility.1 <- coefs %*% profiles.f[1,]
utility.2 <- coefs %*% profiles.f[2,]
utility.3 <- coefs %*% profiles.f[3,]
utility.4 <- coefs %*% profiles.f[4,]
wide.util <- cbind(utility.1, utility.2, utility.3, utility.4)
availability.e <- matrix(rep(t(availability.m),nInd),ncol=nProd,byrow=TRUE)
probs <- availability.e *exp(wide.util) / rowSums(availability.e * exp(wide.util))
choice <- apply(probs, 1, function(x) sample(1:nProd, size=1, prob=x))
choice.expand <- rep(choice,each=nProd)
wkkey = rep(m,nInd*nProd)
indkey = rep(Ind.id,each=nProd)
prodkey = rep(1:nProd,nInd)
avaind = matrix(rep(availability.m),nInd,byrow=TRUE)
profiles.expand = matrix(rep(t(profiles.f),nInd),ncol=ncol(profiles.f),byrow=TRUE)
choicef = as.numeric(choice.expand==prodkey)
data.i <- data.frame(wkkey,
indkey,
prodkey,
avaind,
profiles.expand,
choicef)
lcl.df <- rbind(lcl.df, data.i)
}
# Step 6: Store individual-level dataset
names(lcl.df) <- c("wkkey","indkey","prodkey","avaind","brand2","brand3","brand4","price","choice")
write.csv(lcl.df,file="lcl_ind_fake_data-with-availability-v2.csv")
# Step 7: Aggregate across individuals and store dataset
lcl.agg <- aggregate(choice ~ prodkey + wkkey,data=lcl.df, FUN=sum)
lcl.agg2 <- aggregate(cbind(avaind,brand2,brand3,brand4,price) ~ prodkey + wkkey,data=lcl.df,FUN=mean)
lcl.aggf <- merge(lcl.agg,lcl.agg2,by=c("wkkey","prodkey"))
lcl.aggrf <- lcl.aggf[with(lcl.aggf, order(wkkey,prodkey)),]
write.csv(lcl.aggrf,file="lcl_agg_fake_data-with-availability-v2.csv")