-
Notifications
You must be signed in to change notification settings - Fork 3
/
DATA_COVID_JHU.m
247 lines (216 loc) · 9.79 KB
/
DATA_COVID_JHU.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
function data = DATA_COVID_JHU(n)
% Data retrieval function for COVID modelling
% FORMAT data = DATA_COVID_JHU(n)
%
% n - number of countries to retain [default: n]
%
% This auxiliary routine retrieves data from comma separated data files
% that can be downloaded from:
% https://github.com/CSSEGISandData/COVID-19/
%
% time_series_covid19_confirmed_global.csv
% time_series_covid19_deaths_global.csv
% time_series_covid19_recovered_global.csv
%
% It augments these data with population sizes from the United Nations,
% returning the following data structure:
%
% Data(k).country - country
% Data(k).pop - population size
% Data(k).lat - latitude
% Data(k).long - longitude
% Data(k).date - date when more than one case was reported
% Data(k).cases - number of cases, from eight days prior to first cases
% Data(k).death - number of deaths, from eight days prior to first cases
% Data(k).recov - number recovered, from eight days prior to first cases
% Data(k).days - number of days in timeseries
% Data(k).cum - cumulative number of deaths
%
% Population data from (cite as):
% United Nations, Department of Economic and Social Affairs, Population
% Division (2019). World Population Prospects 2019, Online Edition. Rev. 1.
%
% Please see the main body of the script for a description of the graphical
% outputs provided when the routine is called with at an output argument.
%__________________________________________________________________________
% Copyright (C) 2020 Wellcome Centre for Human Neuroimaging
% Karl Friston
% $Id: DATA_COVID_JHU.m 8001 2020-11-03 19:05:40Z karl $
# SPDX-License-Identifier: GPL-2.0
%--------------------------------------------------------------------------
% defaults
%--------------------------------------------------------------------------
url = 'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/';
if nargin < 1, n = 16; end
% load data from https://github.com/CSSEGISandData/COVID-19/
% Data import in octave isn't as well supported as it is in MATLAB, use local
% functions for this which may be slower
%--------------------------------------------------------------------------
if exist ('OCTAVE_VERSION', 'builtin');
octave_websave('time_series_covid19_confirmed_global.csv',[url,'time_series_covid19_confirmed_global.csv']);
octave_websave('time_series_covid19_deaths_global.csv',[url,'time_series_covid19_deaths_global.csv']);
try
C = octave_importglobal('time_series_covid19_confirmed_global.csv');
D = octave_importglobal('time_series_covid19_deaths_global.csv' );
catch
clc, warning('Please load csv files into the current working directory')
help DATA_COVID_JHU
return;
end
N = octave_importpop('population.csv');
else
websave('time_series_covid19_confirmed_global.csv',[url,'time_series_covid19_confirmed_global.csv']);
websave('time_series_covid19_deaths_global.csv',[url,'time_series_covid19_deaths_global.csv']);
try
C = importdata('time_series_covid19_confirmed_global.csv');
D = importdata('time_series_covid19_deaths_global.csv' );
catch
clc, warning('Please load csv files into the current working directory')
help DATA_COVID_JHU
return;
end
N = importdata('population.csv');
end
date = D.textdata(1,5:end); % date
Location = D.textdata(2:end,2); % location by country
State = unique(Location); % common countries
Npop = N.data;
Country = N.textdata;
% ensure consistency between covid timeseries and population data
%--------------------------------------------------------------------------
i = logical(ismember(Country,{'United States of America','US'}));
Country{i} = 'US';
i = logical(ismember(Country,{'Republic of Korea'}));
Country{i} = 'Korea, South';
i = logical(ismember(Country,{'Russian Federation'}));
Country{i} = 'Russia';
i = logical(ismember(Country,{'Venezuela (Bolivarian Republic of)'}));
Country{i} = 'Venezuela';
i = logical(ismember(Country,{'Bolivia (Plurinational State of)'}));
Country{i} = 'Bolivia';
i = logical(ismember(Country,{'Iran (Islamic Republic of)'}));
Country{i} = 'Iran';
% i = find(ismember(State,'Spain'));
% assemble data structure
%==========================================================================
Data = struct([]);
s = 7; % data smoothing (days)
k = 1;
for i = 1:numel(State)
j = find(ismember(Country,State{i}));
if numel(j) && ~ismember(State(i),'China')
% confirmed cases
%------------------------------------------------------------------
Ci = logical(ismember(C.textdata(2:end,2),State{i}));
CY = sum(C.data(Ci,3:end),1)';
% confirmed deaths
%------------------------------------------------------------------
Di = logical(ismember(D.textdata(2:end,2),State{i}));
DY = sum(D.data(Di,3:end),1)';
% save from first reported case
%------------------------------------------------------------------
d = find(cumsum(CY) > 1,1);
if (isnull(d)|isempty(d))
continue;
end
l = find(ismember(C.textdata(2:end,2),State{i}),1);
Data(k).country = State{i};
Data(k).pop = Npop(j)*1e3;
Data(k).lat = C.data(l,1);
Data(k).long = C.data(l,2);
try
Data(k).date = date{d};
catch
qwerty = 1;
disp(d);
end
Data(k).cases = spm_hist_smooth(gradient([zeros(8,1); CY(d:end)]),s);
Data(k).death = spm_hist_smooth(gradient([zeros(8,1); DY(d:end)]),s);
Data(k).days = numel(Data(k).cases);
Data(k).cum = sum(Data(k).death);
% check
%------------------------------------------------------------------
if ismember(State(i),'United Kingdom')
% keyboard;
end
% population of Wuhan
%------------------------------------------------------------------
if ismember(State(i),'China')
Data(k).pop = 11.08e6;
end
k = k + 1;
end
end
% Illustrate some features of the data, focusing on early cases
%==========================================================================
% This figure provides a brief survey of the timeseries used for subsequent
% modelling, with a focus on the early trajectories of mortality. The upper
% left panel shows the distribution, over countries, of the number of days
% after more than one case was reported. At the time of writing, a
% substantial number of countries witnessed an outbreak lasting for more
% than 60 days. The upper left panel plots the total number of deaths
% against the durations in the left panel. Those countries whose outbreak
% started earlier have greater cumulative deaths; however, within this
% group, there is no clear correlation between population size and total.
% The middle left panel plots the new deaths reported (per day) over a
% 48-day period following the first report of more than one case. The
% colours of the lines denote different countries. These countries are
% listed in the lower left panel, which plots the cumulative death rate.
% China is clearly the first country to be severely affected, with
% remaining countries evincing an accumulation of deaths some 30 days after
% China. Interestingly, there is little correlation between the total
% number of deaths and population size. The middle right panel is a
% logarithmic plot of the total deaths against population size in the
% initial (64-day) period. However, there is a stronger correlation between
% the total number of cases reported (within the first 64 days) and the
% total number of deaths.
% duration of pandemic
%--------------------------------------------------------------------------
T = spm_cat({Data.days});
N = spm_cat({Data.cum});
% rank the number of cumulative cases
%--------------------------------------------------------------------------
[N,i] = sort(N,'descend');
i = i(1:n);
data = Data(i);
t = min(T(i));
for i = 1:numel(data)
death(:,i) = data(i).death(1:t);
cases(:,i) = data(i).cases(1:t);
end
% plot sorting, unless an outcome argument is specified
%--------------------------------------------------------------------------
if nargout, return, end
figure();
subplot(3,2,1), hist(T,32,'Edgecolor','none')
title('Duration (days)','Fontsize',16)
xlabel('days'),ylabel('number of countries'), axis square
subplot(3,2,2), plot(T,N,'.','MarkerSize',32,'Color',[0.5 0.5 1])
title('Total deaths','Fontsize',16)
xlabel('days'),ylabel('total deaths'), axis square
% compare initial statistics
%--------------------------------------------------------------------------
subplot(3,2,3), plot(1:t,death)
title('Death rate','Fontsize',16)
xlabel('days'),ylabel('deaths per day')
axis square
pop = [data.pop];
subplot(3,2,4), loglog(pop,sum(death),'.','MarkerSize',32,'Color',[0.8 0.8 1])
title('Population and deaths','Fontsize',16)
xlabel('population'),ylabel('deaths (within 64 days)')
axis square
subplot(3,2,5), plot(1:t,cumsum(death,1))
title('Cumulative deaths','Fontsize',16)
xlabel('days'),ylabel('deaths (within 64 days)')
axis square, legend({data(1:min(end,14)).country}), legend('boxoff')
subplot(3,2,6), loglog(sum(death),sum(cases),'.','MarkerSize',32,'Color',[0.8 0.8 1])
title('Cases and deaths','Fontsize',16)
xlabel('cumulative deaths'),ylabel('total cases (within 64 days)')
axis square
% label countries
%--------------------------------------------------------------------------
for i = 1:numel(data)
subplot(3,2,6), text(sum(death(:,i)),sum(cases(:,i)),data(i).country,'FontSize',9)
subplot(3,2,4), text(pop(i),sum(death(:,i)), data(i).country,'FontSize',9)
end
return