-
Notifications
You must be signed in to change notification settings - Fork 0
/
syn_dataset_common.m
113 lines (105 loc) · 3.04 KB
/
syn_dataset_common.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
function [ dataset, realLabels ] = syn_dataset_common(alpha, isSaveToFiles, path)
% Synthetic dataset #1
% Conserved modules have the same size and are common to a given set of
% networks.
%
% INPUT:
% alpha: The probability of the edge connected inside a module
% isSaveToFiles: a flag for deciding whether to store the results to
% files (only for sparse matrix format)
% path: output file path (it must be denoted if 'isSaveToFiles' is true)
%
% OUTPUT:
% dataset: the generated dataset
% realLabels: labels indicating the conserved module to which each point is allocated.
%
% Peizhuo Wang (wangpeizhuo_37@163.com)
N = 500; % Number of nodes
M = 30; % Number of networks
dataset = cell(M, 1);
C_size = 80; % Size of each cluster
C1 = (1:C_size);
C2 = C1 + C_size;
C3 = C2 + C_size;
C4 = C3 + C_size;
C5 = C4 + C_size;
C = {C1, C2, C3, C4, C5};
realLabels = C;
M_C = length(C) * 5; % Number of networks with defined patterns
p_in = alpha;
p_out = 0.05;
if (p_out*(N-C_size))>(p_in*(C_size-1))
p_out = (p_in*(C_size-1)) / (N-C_size); % (N-n)*p_out < (n-1)*p_in
end
mc = 1;
C_temp = {};
for m = 1:M
% % Background network
W = unifrnd(0,1,N,N);
WW = zeros(N);
WW(W < p_out) = 1;
if (m <= M_C)
if (mod(m-1, 5) == 0)
C_temp = [C_temp, C{mc}];
mc = mc + 1;
end
for i = 1:length(C_temp)
% inside the cluster
C_in = C_temp{i};
WW1 = zeros(C_size);
WW1(W(C_in, C_in) < p_in) = 1;
WW(C_in, C_in) = WW1;
end
else
% One random module in each of the last 5 networks
s = randperm(N);
C_in = s(1:80);
WW1 = zeros(length(C_in));
WW1(W(C_in, C_in) < p_in) = 1;
WW(C_in, C_in) = WW1;
end
% Gaussian noise, sigma=0.1 or 0.15
WW_tril = tril(WW);
E = normrnd(0.25, 0.1, N, N);
WW_0 = WW_tril + tril(E); % X0+E
WW_0(WW_tril == 1) = 0;
WW_0(WW_0 < 0) = 0;
WW_0(WW_0 > 1) = 1;
WW_1 = WW_tril - tril(E); % X1-E
WW_1(WW_tril == 0) = 0;
WW_1(WW_1 > 1) = 1;
WW_1(WW_1 < 0) = 0;
WW = tril(WW_1 + WW_0);
WW = WW - diag(diag(WW));
WW = tril(WW) + tril(WW)';
dataset{m} = WW;
% Save to file. Sparse matrix. Delete the edges with weight less than 0.3.
if isSaveToFiles
fp = fopen([path, 'network_', num2str(m)],'wt');
for i=1:(size(WW,1)-1)
for j=(i+1):size(WW,1)
if (WW(i, j) >= 0.25)
fprintf(fp, '%d\t%d\t%f\n', i, j, WW(i,j));
end
end
end
fclose(fp);
fp = fopen([path, 'labels.txt'],'wt');
for i=1:length(realLabels)
theLabel = realLabels{i};
for j=1:length(theLabel)
fprintf(fp, '%d\t', theLabel(j));
end
fprintf(fp, '\n');
end
fclose(fp);
end
end
if isSaveToFiles
flist = fopen([path, 'networklist.txt'],'wt');
for m = 1:M
fprintf(flist, 'network_%d\n', m);
end
fclose(flist);
end
end