-
Notifications
You must be signed in to change notification settings - Fork 0
/
x2p.m
100 lines (84 loc) · 3.22 KB
/
x2p.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
function [P, beta] = x2p(X, u, tol)
%X2P Identifies appropriate sigma's to get kk NNs up to some tolerance
%
% [P, beta] = x2p(xx, kk, tol)
%
% Identifies the required precision (= 1 / variance^2) to obtain a Gaussian
% kernel with a certain uncertainty for every datapoint. The desired
% uncertainty can be specified through the perplexity u (default = 15). The
% desired perplexity is obtained up to some tolerance that can be specified
% by tol (default = 1e-4).
% The function returns the final Gaussian kernel in P, as well as the
% employed precisions per instance in beta.
%
%
% (C) Laurens van der Maaten, 2008
% Maastricht University
if ~exist('u', 'var') || isempty(u)
u = 15;
end
if ~exist('tol', 'var') || isempty(tol)
tol = 1e-4;
end
% Initialize some variables
n = size(X, 1); % number of instances
P = zeros(n, n); % empty probability matrix
beta = ones(n, 1); % empty precision vector
logU = log(u); % log of perplexity (= entropy)
% Compute pairwise distances
disp('Computing pairwise distances...');
sum_X = sum(X .^ 2, 2);
D = bsxfun(@plus, sum_X, bsxfun(@plus, sum_X', -2 * X * X'));
% Run over all datapoints
disp('Computing P-values...');
for i=1:n
if ~rem(i, 500)
disp(['Computed P-values ' num2str(i) ' of ' num2str(n) ' datapoints...']);
end
% Set minimum and maximum values for precision
betamin = -Inf;
betamax = Inf;
% Compute the Gaussian kernel and entropy for the current precision
Di = D(i, [1:i-1 i+1:end]);
[H, thisP] = Hbeta(Di, beta(i));
% Evaluate whether the perplexity is within tolerance
Hdiff = H - logU;
tries = 0;
while abs(Hdiff) > tol && tries < 50
% If not, increase or decrease precision
if Hdiff > 0
betamin = beta(i);
if isinf(betamax)
beta(i) = beta(i) * 2;
else
beta(i) = (beta(i) + betamax) / 2;
end
else
betamax = beta(i);
if isinf(betamin)
beta(i) = beta(i) / 2;
else
beta(i) = (beta(i) + betamin) / 2;
end
end
% Recompute the values
[H, thisP] = Hbeta(Di, beta(i));
Hdiff = H - logU;
tries = tries + 1;
end
% Set the final row of P
P(i, [1:i - 1, i + 1:end]) = thisP;
end
disp(['Mean value of sigma: ' num2str(mean(sqrt(1 ./ beta)))]);
disp(['Minimum value of sigma: ' num2str(min(sqrt(1 ./ beta)))]);
disp(['Maximum value of sigma: ' num2str(max(sqrt(1 ./ beta)))]);
end
% Function that computes the Gaussian kernel values given a vector of
% squared Euclidean distances, and the precision of the Gaussian kernel.
% The function also computes the perplexity of the distribution.
function [H, P] = Hbeta(D, beta)
P = exp(-D * beta);
sumP = sum(P);
H = log(sumP) + beta * sum(D .* P) / sumP;
P = P / sumP;
end