-
Notifications
You must be signed in to change notification settings - Fork 2
/
helper.py
116 lines (101 loc) · 3.53 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
from scipy.sparse import csr_matrix, csc_matrix, linalg
from sklearn.decomposition import TruncatedSVD
from fbpca import pca
# noinspection PyUnresolvedReferences
import time
def save_sparse_csr(filename,array, label=[]):
np.savez(filename,data = array.data ,indices=array.indices,
indptr =array.indptr, shape=array.shape, label = label )
def load_sparse_csr(filename):
loader = np.load(filename)
if 'label' in loader.keys():
label = loader['label']
else:
label = []
return csr_matrix(( np.log10(1.0+loader['data']), loader['indices'], loader['indptr']),
shape = loader['shape']), label
def NE_dn(A, type='ave'):
m , n = A.shape
diags = np.ones(m)
diags[:] = abs(A).sum(axis=1).flatten()
if type == 'ave':
D = 1/(diags+np.finfo(float).eps)
return A*D[:,np.newaxis]
elif type == 'gph':
D = 1/np.sqrt(diags+np.finfo(float).eps)
return (A*D[:,np.newaxis])*D
def Hbeta(D,beta):
D = (D-D.min())/(D.max() - D.min() + np.finfo(float).eps)
P = np.exp(-D*beta)
sumP = P.sum()
H = np.log(sumP) + beta*sum(D*P)/sumP
P = P / sumP
return H, P
def umkl_bo(D, beta):
tol = 1e-4
u = 20
logU = np.log(u)
H, P = Hbeta(D,beta)
betamin = -np.inf
betamax = np.inf
Hdiff = H - logU
tries = 0
while(abs(Hdiff)>tol)&(tries < 30):
if Hdiff>0:
betamin = beta
if np.isinf(betamax):
beta *= 2.0
else:
beta = .5*(beta + betamax)
else:
betamax = beta
if np.isinf(betamin):
beta /= 2.0
else:
beta = .5*(beta + betamin)
H, P = Hbeta(D,beta)
Hdiff = H - logU
tries +=1
return P
def euclidean_proj_simplex(v, s=1):
""" Compute the Euclidean projection on a positive simplex
Solves the optimisation problem (using the algorithm from [1]):
min_w 0.5 * || w - v ||_2^2 , s.t. \sum_i w_i = s, w_i >= 0
Parameters
----------
v: (n,) numpy array,
n-dimensional vector to project
s: int, optional, default: 1,
radius of the simplex
Returns
-------
w: (n,) numpy array,
Euclidean projection of v on the simplex
"""
assert s > 0, "Radius s must be strictly positive (%d <= 0)" % s
n,d = v.shape # will raise ValueError if v is not 1-D
# get the array of cumulative sums of a sorted (decreasing) copy of v
v -= (v.mean(axis = 1)[:,np.newaxis]-1.0/d)
u = -np.sort(-v)
cssv = np.cumsum(u,axis = 1)
# get the number of > 0 components of the optimal solution
temp = u * np.arange(1,d+1) - cssv +s
temp[temp<0] = 'nan'
rho = np.nanargmin(temp,axis = 1)
#rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
# compute the Lagrange multiplier associated to the simplex constraint
theta = (cssv[np.arange(n), rho] - s) / (rho + 1.0)
# compute the projection by thresholding v using theta
w = (v - theta[:,np.newaxis]).clip(min=0)
return w
def fast_pca(in_X, no_dim):
(U, s, Va) = pca(in_X, no_dim, False, 8)
del Va
U[:] = U*np.sqrt(np.abs(s))
D = 1/(np.sqrt(np.sum(U*U,axis = 1)+np.finfo(float).eps)+np.finfo(float).eps)
return U*D[:,np.newaxis]