Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated benchmark to remove NAs in activities #62

Merged
merged 1 commit into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 47 additions & 2 deletions decoupler/tests/test_utilsbenchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,21 @@ def test_append_by_experiment():

append_by_experiment(df, grpby_i=None, grp=None, act=act, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, min_exp=1)

act_na = act.astype(float)
act_na[1,0,0] = np.nan
act_na[1,1,0] = np.nan
act_na[1,2,0] = np.nan

df_na = []

append_by_experiment(df_na, grpby_i=None, grp=None, act=act_na, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, min_exp=1)

assert len(df) == 2
assert df[0][5] < df[1][5]
assert df[0][5] < df_na[0][5] #check improvement of performance due to removal of NAs
assert df[0][6] < df_na[0][6] #check change of class imbalance due to removal of NAs


def test_append_by_source():
Expand Down Expand Up @@ -216,7 +229,39 @@ def test_append_by_source():
append_by_source(df, grpby_i=None, grp=None, act=act, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, min_exp=1)
assert len(df) == 4
assert df[0][5] < df[1][5]
assert df[0][5] < df[2][5]

act_na = act.astype(float)
act_na[1,4,0] = np.nan

df_na = []

append_by_source(df_na, grpby_i=None, grp=None, act=act_na, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, min_exp=1)

assert len(df_na) == 3
assert df_na[0][2] == 'T1'

act_na[1,0,0] = np.nan

df_na_2 = []
append_by_source(df_na_2, grpby_i=None, grp=None, act=act_na, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, min_exp=1)

assert len(df_na_2) == 2

act_na_3 = act.astype(float)
act_na_3[1,0,0] = np.nan

df_na_3 = []

append_by_source(df_na_3, grpby_i=None, grp=None, act=act_na_3, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, min_exp=1)

assert len(df_na_3) == 3
assert df_na_3[0][2] == 'T5'




def test_append_metrics_scores():
Expand Down Expand Up @@ -260,7 +305,7 @@ def test_append_metrics_scores():
append_metrics_scores(df, grpby_i=None, grp=None, act=act, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, by='source', min_exp=1)
assert len(df) == 4
assert df[0][5] < df[1][5]
assert df[0][5] < df[2][5]


def test_check_groupby():
Expand Down
67 changes: 41 additions & 26 deletions decoupler/utils_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,43 +123,58 @@ def append_by_experiment(df, grpby_i, grp, act, grt, srcs, mthds, metrics, min_e
# Flatten act by method
act, grt = act.reshape(-1, act.shape[-1]).T, grt.flatten()

# Compute Class Imbalance
ci = np.sum(grt) / len(grt)

# Compute per method and metric
for m in range(len(mthds)):
mth = mthds[m]
for metric in metrics:
scores = compute_metric(act[m], grt, metric, pi0=pi0, n_iter=n_iter, seed=seed)
# identify activity scores with NAs in each method
act_i = act[m]
nan_mask = np.isnan(act_i)
# Remove NAs from activity matrix and ground truth
act_i = act_i[~nan_mask]
grt_i = grt[~nan_mask]
# Compute Class Imbalance
ci = np.sum(grt_i) / len(grt_i)
# Compute metrics
scores = compute_metric(act_i, grt_i, metric, pi0=pi0, n_iter=n_iter, seed=seed)
for score in scores:
row = [grpby_i, grp, None, mth, metric, score, ci]
df.append(row)


def append_by_source(df, grpby_i, grp, act, grt, srcs, mthds, metrics, min_exp=5, pi0=0.5,
n_iter=1000, seed=42):

# Remove sources with less than min_exp
src_msk = np.sum(grt > 0., axis=0) >= min_exp
act, grt = act[:, src_msk, :], grt[:, src_msk]
srcs = srcs[src_msk]

# Compute per source, method and metric
for s in range(len(srcs)):
src = srcs[s]
tmp_grt = grt[:, s]

# Compute Class Imbalance
ci = np.sum(tmp_grt) / len(tmp_grt)

for m in range(len(mthds)):
mth = mthds[m]
tmp_act = act[:, s, m]
for metric in metrics:
scores = compute_metric(tmp_act, tmp_grt, metric, pi0=pi0, n_iter=n_iter, seed=seed)
for score in scores:
row = [grpby_i, grp, src, mth, metric, score, ci]
df.append(row)

for m in range(len(mthds)):
mth = mthds[m]
act_i = act[:,:,m]
nan_mask = np.isnan(act_i)

grt_i = grt.copy()
grt_i[nan_mask]=np.nan

# Remove sources with less than min_exp
src_msk = np.sum(grt_i > 0., axis=0) >= min_exp
act_i, grt_i = act[:, src_msk, :], grt_i[:, src_msk]
srcs_method = srcs[src_msk]

# Compute per source, method and metric
for s in range(len(srcs_method)):
src = srcs_method[s]
tmp_grt = grt_i[:, s]
nan_mask = np.isnan(tmp_grt)

grt_source = tmp_grt[~nan_mask]
act_source = act_i[:, s, m][~nan_mask]

# Compute Class Imbalance
ci = np.sum(grt_source) / len(grt_source)
if ci != 0. and ci != 1.:
for metric in metrics:
scores = compute_metric(act_source, grt_source, metric, pi0=pi0, n_iter=n_iter, seed=seed)
for score in scores:
row = [grpby_i, grp, src, mth, metric, score, ci]
df.append(row)


def append_metrics_scores(df, grpby_i, grp, act, grt, srcs, mthds, metrics, by, min_exp=5, pi0=0.5,
Expand Down
Loading