-
Notifications
You must be signed in to change notification settings - Fork 0
/
project_sunroof_analysis.R
271 lines (243 loc) · 14.9 KB
/
project_sunroof_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
# Analyze Google Project Sunroof data
# Jeff Hubbs 2021
# SETUP
library("rjson")
setwd("~/Desktop/drawdown/misc/rooftop")
rm(list=ls())
# CONTROLS
# This boolean controls the behavior of the estipower() and fullpower() functions.
# If TRUE, use the given power value of each bucket. If FALSE, use the midpoint
# between the given power value of each bucket and that of the bucket above
# except for the highest-value bucket, in which case use its given value
floors <- FALSE
# Set a lower limit, irrespective of the setting of the "floors" variable above,
# for the nameplate power of a rooftop solar installation. This is because
# regardless of whether 0 or the midpoint between 0 and the next bin up is used
# as the value of the lower bin for weighted averaging, there is a practical
# minimum for how small an installation will ever go onto a rooftop.
minpower <- 3.0 # Units: kW
# FUNCTIONS
# Function to return the total power (i.e., integral) represented by any of those JSON strings
fullpower <- function(jsonstring){
prelim <- do.call(cbind, fromJSON(jsonstring, method="C"))
# Assert lower bound on pre_powers using constant "minpower"
pre_powers <- pmax(minpower, prelim[1,])
n <- length(prelim)/2
if (floors==TRUE){
powers <- pre_powers
}else{
powers <- c((pre_powers[2:n] + pre_powers[1:(n-1)])/2, pre_powers[n])
}
bucketcounts <- prelim[2,]
return(sum(powers * bucketcounts))
}
# Function to calculate the weighted average of the density distribution described
# by the JSON string
wavgpower <- function(jsonstring){
prelim <- do.call(cbind, fromJSON(jsonstring, method="C"))
# Assert lower bound on pre_powers using constant "minpower"
pre_powers <- pmax(minpower, prelim[1,])
n <- length(prelim)/2
if (floors==TRUE){
powers <- pre_powers
}else{
powers <- c((pre_powers[2:n] + pre_powers[1:(n-1)])/2, pre_powers[n])
# This is a kluge to deal with the case where the JSON string coming in is just one pair long. What
# can we do?? Just overwrite the "powers" structure with something sensible.
if (length(pre_powers)==1){powers <- pre_powers}
}
return(weighted.mean(powers, prelim[2,])) # prelim[2,] are the counts and become the weights
}
# Function to replace the power of the lowest bin (generally will be 0) with the maximum of it and minpower
lowlimit <- function(somejay){
blink <- fromJSON(somejay, method="C")
blink[[1]][1] <- max(blink[[1]][1], minpower)
return(toJSON(blink))
}
# Function to take a bucket collection and rewrite it with a bucket for every integer power value covered
deconstruct <- function(injay){
jlist <- fromJSON(injay, method="C") # Make a list of 2-element vectors (power, count) from the JSON
jmat <- do.call(cbind, jlist) # Make a matrix from that list (top row is powers, bottom row = counts)
newlist <- vector("list", 1 + max(jmat[1,])) # Make a new list of length equal to the largest power plus one
for (q in 1:length(newlist)){newlist[[q]] <- c(q-1, 0)}
for (q in 1:(length(jlist)-1)){ # Iterate through the bucket list up to the next to last bucket
thispower <- jmat[1,q]
nextpower <- jmat[1,q+1]
newcount <- jmat[2,q]/(nextpower-thispower)
for (r in (thispower+1):nextpower){newlist[[r]][2]<-newcount}
}
newlist[[length(newlist)]][2] <- jmat[2,ncol(jmat)]
return(toJSON(newlist))
}
# A function to effectively sum two power distribution JSON strings.
powersum <- function(ajson, bjson){
decon_a <- fromJSON(deconstruct(ajson), method="C")
decon_b <- fromJSON(deconstruct(bjson), method="C")
writeLines("Creating newlist...")
longest_list_length <- max(decon_a[[length(decon_a)]][1], decon_b[[length(decon_b)]][1])
newlist <- vector("list", 1 + longest_list_length)
writeLines("Prewriting newlist...")
for (q in 1:length(newlist)){newlist[[q]] <- c(q-1, 0)}
writeLines("Pasting decon_a into newlist...")
newlist[1:length(decon_a)] <- decon_a
writeLines("Adding bucket set b to newlist...")
for (q in 1:length(decon_b)){
put_it_here <- which(sapply(newlist, function(e) is.element(c(decon_b[[q]][1],0), e))[1,])[1]
newlist[[put_it_here]][2] <- newlist[[put_it_here]][2] + decon_b[[q]][2]
}
# newlist is complete and correct here but let's consolidate the now-one-wide buckets where the
# count values match
writeLines(paste0("Consolidating down from ", length(newlist), "..."))
i <- 1
while (i < length(newlist)){
accum <- newlist[[i]][2]
while (newlist[[i]][2]==newlist[[i+1]][2]){
accum <- accum + newlist[[i+1]][2]
newlist <- newlist[-(i+1)]
}
newlist[[i]][2] <- accum
i <- i + 1
}
writeLines(paste0("Consolidated to ",length(newlist)))
writeLines("")
return(toJSON(newlist))
}
# When we use the buckets below 10kW for residential, we need to trick the fullpower() function's treatment
# of the highest bin into not just using the floor value (the first of the pair in the last bucket) like it's
# written but instead think there's another bucket up at 10kW with a zero count. This function takes the list
# coming out of the fromJSON() function and appends an element corresponding to a bucket "[10,0]".
tackonten <- function(resjson){return(toJSON(c(fromJSON(resjson),list(c(10,0)))))}
# CONSTANTS
# Project sunroof gives 1.650m x 0.992m = 1.6368m^2 panels
panel_area_sqm <- 1.6368
# READ PROJECT SUNROOF DATA
allsunroof.df <- read.csv("project-sunroof-county.csv", header=TRUE)
# Select for just the Georgia counties
gasunroof.df <- allsunroof.df[allsunroof.df$state_name=="Georgia",]
# Just obliterate Twiggs County; it's negligible and it causes problems
gasunroof.df <- gasunroof.df[!gasunroof.df$region_name=="Twiggs County",]
rm(allsunroof.df)
# INITIIALIZE DATAFRAME
# Google Sunroof metadata says:
# install_size_kw_buckets_json - [Number] of buildings with potential for
# various installation size buckets. Format is a JSON array, where each element
# is a tuple containing (1) lower bound of bucket, in kW, and (2) number of
# buildings in that bucket.
#
# existing_installs_count - [Number] of buildings estimated to have a solar
# installation, at time of data collection
#
# yearly_sunlight_kwh_total - total solar energy generation potential for all roof space in that region
#
# yearly_sunlight_kwh_s & yearly_sunlight_kwh_f - same, but for south-facing and flat roof space
rm(rooftop.df)
# Convert "Xxxxx County" to "Xxxxx" on the fly
# gen_potl column is the all-surface "total solar energy generation potential" in kilowatts
# sfspace_gen column is the south-facing and flat "total solar energy generation potential" in kilowatts
rooftop.df <- data.frame(name=sub(" County", "", gasunroof.df$region_name),
buckets=gasunroof.df$install_size_kw_buckets_json,
allspace_gen=gasunroof.df$yearly_sunlight_kwh_total / (365.25 * 24),
sfspace_gen=(gasunroof.df$yearly_sunlight_kwh_s + gasunroof.df$yearly_sunlight_kwh_f) / (365.25 * 24),
south_panels=gasunroof.df$number_of_panels_s,
flat_panels=gasunroof.df$number_of_panels_f,
total_panels=gasunroof.df$number_of_panels_total,
existing_count=gasunroof.df$existing_installs_count,
kw_total=gasunroof.df$kw_total)
# EXTEND DATAFRAME
# We're going to create a new field in the dataframe that is just the first two buckets of the potential
# rooftop power distribution. We assume that any system under 10kW is residential. First: a function to
#convert the whole JSON bucket string to a new one with just the first two buckets.
firsttwo <- function(longjson){return(tackonten(toJSON(fromJSON(longjson)[1:2])))}
# And now a function to return *all but* the first two buckets.
allbutfirsttwo <- function(longjson){
fj <- fromJSON(longjson)
return(toJSON(fj[3:length(fj)]))
}
# Use those functions to create new JSON strings for each of residential and commercial
rooftop.df$res_buckets <- unlist(lapply(rooftop.df$buckets, firsttwo), use.names=FALSE)
rooftop.df$com_buckets <- unlist(lapply(rooftop.df$buckets, allbutfirsttwo), use.names=FALSE)
# We need a capacity factor to be able to connect system nameplate powers to their effective generated power.
# We have two columns in the dataframe now that can give aggregate nameplate (buckets) and aggregate generation
# (allspace_gen) that are both predicated on the use of all roof surfaces. We won't actually use all-surface
# powers for anything because we don't believe that people will realistically install rooftop systems that way
# but we need nameplate and effective powers to be on the same terms if we are to obtain a capacity factor from
# them.
the_fullpowers <- unlist(lapply(rooftop.df$buckets,fullpower), use.names=FALSE)
sf_panel_ratios <- (rooftop.df$south_panels + rooftop.df$flat_panels) / rooftop.df$total_panels
# OLD/WRONG WAY rooftop.df$cap_fac <- rooftop.df$allspace_gen / the_fullpowers
rooftop.df$cap_fac <- rooftop.df$sfspace_gen / (the_fullpowers * sf_panel_ratios)
# That gives us each county's rooftop solar capacity factor but we also want a single number to characterize
# the whole state's capacity factor. We can't just average the values giving them equal weight so first let's
# get counts for the number of systems shown in the buckets and we'll use those to weight the average of
# capacity factors.
howmany <- function(thejson){return(sum(do.call(cbind, fromJSON(thejson, method="C"))[2,]))}
# NOT USING THIS ANYMORE - SEE PART AT BOTTOM THAT AGGREGATES BUCKETS FROM ALL COUNTIES
# statewide_cap_fac <- weighted.mean(rooftop.df$cap_fac,
# unlist(lapply(rooftop.df$buckets,howmany), use.names=FALSE))
# To get total powers for residential and commercial generation separately, we've split the buckets up into two new
# sets, one of 0-10kW and another >=1-kW. But those are all-surface nameplate powers so we need to multiply
# the integrated powers we get by a south-flat-surface/all-surface ratio. We already have that by county. We
# also need to use the by-county capacity factors because the res and com buckets have nameplate powers,
# The numbers below are the maximum reasonable effective generation powers for all available residential
# and commercial buildings, using only the south-facing and flat surfaces.
rooftop.df$sf_all_ratios <- (rooftop.df$sfspace_gen/rooftop.df$allspace_gen)
rooftop.df$max_res_gen <- unlist(lapply(rooftop.df$res_buckets,fullpower)) * rooftop.df$sf_all_ratios * rooftop.df$cap_fac
rooftop.df$max_com_gen <- unlist(lapply(rooftop.df$com_buckets,fullpower)) * rooftop.df$sf_all_ratios * rooftop.df$cap_fac
rooftop.df$max_all_sf_capacity <- unlist(lapply(rooftop.df$buckets,fullpower)) * rooftop.df$sf_all_ratios
# This next one is not necessary but it will give you numbers very close to rooftop.df$allspace_gen
# rooftop.df$max_gen <- unlist(lapply(rooftop.df$buckets,fullpower)) * rooftop.df$sf_all_ratios * rooftop.df$cap_fac
# PANEL AREA
# Determine the total amount of maximal panel area on south-facing and flat roofs
total_panel_area_sqkm <- sum(rooftop.df$south_panels,rooftop.df$flat_panels) * panel_area_sqm * 1E-06
# WEIGHTED AVERAGES
# New columns for weighted averages of the bucket sets: all, residential, commercial
rooftop.df$wavg_buck_all <- unlist(lapply(rooftop.df$buckets, wavgpower))
rooftop.df$wavg_buck_res <- unlist(lapply(rooftop.df$res_buckets, wavgpower))
rooftop.df$wavg_buck_com <- unlist(lapply(rooftop.df$com_buckets, wavgpower))
# SYSTEM COUNTS & EXISTING RESIDENTIAL POWER
allpotcounts <- unlist(lapply(rooftop.df$buckets, howmany), use.names=FALSE) # All potential systems
residpotcounts <- unlist(lapply(rooftop.df$res_buckets, howmany), use.names=FALSE) # All residential systems
# Use ratio between those two to estimate how many residential systems exist
rooftop.df$exist_res_count <- (residpotcounts/allpotcounts) * rooftop.df$existing_count
# Use that to estimate existing residential system power
rooftop.df$exist_res_cap_power <- rooftop.df$exist_res_count *
unlist(lapply(rooftop.df$res_buckets, wavgpower), use.names=FALSE) *
rooftop.df$sf_all_ratios
rooftop.df$exist_com_cap_power <- (rooftop.df$existing_count - rooftop.df$exist_res_count) *
unlist(lapply(rooftop.df$com_buckets, wavgpower), use.names=FALSE) *
rooftop.df$sf_all_ratios
rooftop.df$exist_all_cap_power <- rooftop.df$existing_count *
unlist(lapply(rooftop.df$buckets, wavgpower), use.names=FALSE) *
rooftop.df$sf_all_ratios
# OUTPUT
{
#writeLines(sprintf("Statewide capacity factor weighted average: %.4f", statewide_cap_fac))
writeLines(sprintf("Maximal (tech-pot) nameplate capacity for south-facing & flat surfaces: %.2fGW",
c(sum(rooftop.df$max_all_sf_capacity)/1E06)))
writeLines(sprintf("Maximal south-facing and flat surface generated power, residential: %.3fGW = %.2fTWh/yr",
c(sum(rooftop.df$max_res_gen)/1E06), sum(rooftop.df$max_res_gen)* 8766* 1E-09))
writeLines(sprintf("Maximal south-facing and flat surface generated power, commercial: %.2fGW = %.2fTWh/yr",
c(sum(rooftop.df$max_com_gen)/1E06), sum(rooftop.df$max_com_gen) * 8766*1E-09))
writeLines(sprintf("Total number of existing systems: %d", sum(rooftop.df$existing_count)))
writeLines(sprintf("Total number of existing residential systems: %.0f", sum(rooftop.df$exist_res_count)))
writeLines(sprintf("Total capacity power of existing residential systems: %.0fkW", sum(rooftop.df$exist_res_cap_power)))
writeLines(sprintf("Total capacity power of existing commercial systems: %.0fkW", sum(rooftop.df$exist_com_cap_power)))
writeLines(sprintf("Total capacity power of all existing systems: %.0fkW", sum(rooftop.df$existing_count *
+ unlist(lapply(rooftop.df$buckets,
wavgpower),
use.names=FALSE) *
+ rooftop.df$sf_all_ratios)))
}
# Write out a new CSV
write.csv(rooftop.df, file="google_sunroof_based_estimates.csv", row.names=FALSE)
###################################################################################################
# This code will aggregate the capacity power distributions for all counties. Careful; it's slow.
agg <- lowlimit(rooftop.df$buckets[1])
for (k in 2:nrow(rooftop.df)){
writeLines(paste0("County=", rooftop.df$name[k], " k=", k))
agg <- powersum(agg, lowlimit(rooftop.df$buckets[k]))
}
# That being done, calculate the statewide capacity factor
all_state_cap_fac <- sum(rooftop.df$sfspace_gen)/
(fullpower(agg)*(((sum(rooftop.df$south_panels+rooftop.df$flat_panels)/sum(rooftop.df$total_panels)))))
writeLines(sprintf("Statewide capacity factor weighted average: %.4f", all_state_cap_fac))