-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
upload code for additional experiments
- Loading branch information
1 parent
7d5b65f
commit 480e354
Showing
9 changed files
with
716 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Experiment 5: Binary Search vs Density Improvement on ordinary graphs | ||
Run | ||
``` | ||
julia --project di-vs-bs.jl | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
include("../header.jl") | ||
|
||
flowtol = 1e-8 | ||
|
||
for dataset in [ | ||
"ca-AstroPh", | ||
"ca-HepPh", | ||
"email-Enron", | ||
"com-amazon", | ||
"com-youtube", | ||
] | ||
preprocess_graph(dataset) | ||
data = load_graph(dataset) | ||
A = data["A"] | ||
@show dataset | ||
@show size(A, 1), div(nnz(A), 2) | ||
|
||
di_res = DSG_flow_density_improvement(A; flowtol=flowtol) | ||
@show di_res["total_dt"], di_res["niter"], di_res["optval"] | ||
bs_res = DSG_flow_binary_search(A; flowtol=flowtol) | ||
@show bs_res["total_dt"], bs_res["niter"], bs_res["optval"] | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Experiment 6 Comparison with Greedy Peeling on Detecting Planted Dense Structures | ||
Run | ||
``` | ||
julia -p X --project exp-synthetic.jl | ||
``` | ||
where ```X``` is the number of workers you want to spawn, e.g. 10. | ||
|
||
The generated hypergraph data are saved at ```local-DHSG/data/HSBM/```, | ||
and the results are saved at ```local-DHSG/results/HSBM/```, and | ||
the plot is saved at ```local-DHSG/figs/HSBM```. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
""" | ||
make_jobs(params, jobs) | ||
Create jobs to be done in parallel. | ||
Compute the anchored densest subhypergraph for each ϵ and R. | ||
""" | ||
function make_jobs( | ||
params::Vector{Tuple{Ti, Tf, Vector{Ti}, String}}, | ||
jobs::RemoteChannel, | ||
) where {Ti <: Integer, Tf} | ||
for t in params | ||
put!(jobs, t) | ||
end | ||
for i = 1:length(workers()) | ||
put!(jobs, (-1, -1, -1, "")) | ||
end | ||
end | ||
|
||
|
||
""" | ||
do_jobs(jobs, results, H, Ht) | ||
Worker's job function, solving the anchored densest subhypergraph problem. | ||
""" | ||
function do_jobs( | ||
jobs::RemoteChannel, | ||
results::RemoteChannel, | ||
H::SparseMatrixCSC{Tf, Ti}, | ||
Ht::SparseMatrixCSC{Tf, Ti}, | ||
) where {Ti <: Integer, Tf} | ||
while true | ||
# i-th cluster, j-th trial | ||
i, ϵ, R, penaltyType = take!(jobs) | ||
if i == -1 | ||
break | ||
end | ||
type = "global" | ||
if ϵ >= 1.0 | ||
type = "local" | ||
end | ||
if penaltyType in ["fracvol", "vol"] | ||
flow_res = ADHSG_flow_density_improvement(H, Ht, R, ϵ, penaltyType;flowtol=1e-8, type=type) | ||
_, n = size(H) | ||
p = frac_volume_penalty(H, Ht, R, n, ϵ) | ||
greedy_res = greedy_peeling(H, Ht, p) | ||
put!(results, (i, flow_res, greedy_res)) | ||
else | ||
error("For this experiment, we use fracvol/vol penalty only.") | ||
end | ||
end | ||
end | ||
|
||
|
||
""" | ||
bulkeval_ADHSG(H, Ht, ϵ, cluster, Rs, penaltyType) | ||
Solve the anchored densest subhypergraph problem for a bunch of Rs in parallel. | ||
The cluster is the ground truth planted dense structure. | ||
""" | ||
function bulkeval_ADHSG( | ||
H::SparseMatrixCSC{Tf, Ti}, | ||
Ht::SparseMatrixCSC{Tf, Ti}, | ||
ϵ::Tf, | ||
cluster::Vector{Ti}, | ||
Rs::Vector{Vector{Ti}}, | ||
penaltyType::String, | ||
) where {Ti <: Integer, Tf} | ||
ntrial = length(Rs) | ||
jobs = RemoteChannel(() -> Channel{Tuple}(ntrials + length(workers()))) | ||
results = RemoteChannel(() -> Channel{Tuple}(ntrials)) | ||
tasks = Tuple{Ti, Tf, Vector{Ti}, String}[] | ||
for j = 1:length(Rs) | ||
push!(tasks, (j, ϵ, Rs[j], penaltyType)) | ||
end | ||
make_jobs(tasks, jobs) | ||
for p in workers() | ||
remote_do(do_jobs, p, jobs, results, H, Ht) | ||
end | ||
njob = length(tasks) | ||
flow_objs = zeros(Tf, ntrial) | ||
flow_sizes = zeros(Ti, ntrial) | ||
flow_Rprecisions = zeros(Tf, ntrial) | ||
flow_F1scores = zeros(Tf, ntrial) | ||
flow_dts = zeros(Tf, ntrial) | ||
greedy_objs = zeros(Tf, ntrial) | ||
greedy_sizes = zeros(Ti, ntrial) | ||
greedy_Rprecisions = zeros(Tf, ntrial) | ||
greedy_F1scores = zeros(Tf, ntrial) | ||
greedy_dts = zeros(Tf, ntrial) | ||
while njob > 0 | ||
i, flow_res, greedy_res = take!(results) | ||
njob -= 1 | ||
println("$njob left.") | ||
flow_objs[i] = flow_res["optval"] | ||
flow_sizes[i] = length(flow_res["optsol"]) | ||
flow_Rprecisions[i] = precision(flow_res["optsol"], Rs[i]) | ||
flow_F1scores[i] = F1score(cluster, flow_res["optsol"]) | ||
flow_dts[i] = flow_res["total_dt"] | ||
greedy_objs[i] = greedy_res["optval"] | ||
greedy_sizes[i] = length(greedy_res["optsol"]) | ||
greedy_Rprecisions[i] = precision(greedy_res["optsol"], Rs[i]) | ||
greedy_F1scores[i] = F1score(cluster, greedy_res["optsol"]) | ||
greedy_dts[i] = greedy_res["total_dt"] | ||
end | ||
return Dict( | ||
"flow_objs" => flow_objs, | ||
"flow_sizes" => flow_sizes, | ||
"flow_Rprecisions" => flow_Rprecisions, | ||
"flow_F1scores" => flow_F1scores, | ||
"flow_dts" => flow_dts, | ||
"greedy_objs" => greedy_objs, | ||
"greedy_sizes" => greedy_sizes, | ||
"greedy_Rprecisions" => greedy_Rprecisions, | ||
"greedy_F1scores" => greedy_F1scores, | ||
"greedy_dts" => greedy_dts, | ||
) | ||
end | ||
|
||
|
||
""" | ||
plot_ADHSG!(axis, ϵs, result, label, col; yscale = identity) | ||
Plot the F1 score for each ϵ. Show the mean and standard error. | ||
""" | ||
function plot_ADHSG!( | ||
axis, | ||
ϵs::Vector{Tf}, | ||
result, | ||
label, | ||
col; | ||
yscale = identity, | ||
) where{Tf} | ||
result_mean = mean(result, dims=2)[:, 1] | ||
lines!(axis, ϵs, result_mean, label=label, color=(col, 1), yscale=yscale) | ||
n = size(result, 1) | ||
lowcurve = zeros(Tf, n) | ||
upcurve = zeros(Tf, n) | ||
for i = 1:n | ||
stderr = sem(result[i, :]) | ||
low = max(result_mean[i] - stderr, 0.0) | ||
up = min(result_mean[i] + stderr, 1.0) | ||
lowcurve[i] = low | ||
upcurve[i] = up | ||
end | ||
band!(axis, ϵs, upcurve, lowcurve, color = (col, 0.4), yscale=yscale) | ||
scatter!(axis, ϵs, result_mean, color=(col, 1), yscale=yscale) | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
using Distributed | ||
@everywhere begin | ||
include("../header.jl") | ||
include("gen-synthetic.jl") | ||
include("bulkeval-synthetic.jl") | ||
end | ||
|
||
n = 1000 | ||
ncluster = 30 | ||
m2 = 50000 | ||
hep1 = 0.8 | ||
hep2 = 0.8 | ||
seedratio = 0.05 | ||
Rratio = 1.5 | ||
ntrials = 10 | ||
genType = "RW" | ||
|
||
# pick of epsilon is based on some non-exhausting observations | ||
# i.e. we observe on a small set of hypergraphs and decide | ||
# which epsilon is best for each method | ||
ϵs = [1.0, 0.3] | ||
penaltyTypes = ["fracvol", "vol"] | ||
|
||
for m1 = 5000:5000:70000 | ||
|
||
# generate the hypergraph and save it | ||
cluster_vertices, cluster_edges, vlabel = gen_cluster(n, m2, ncluster) | ||
H_edges = hypergraph_SBM(n, ncluster, vlabel, m1, cluster_edges, hep1, hep2) | ||
clusters = Vector{Vector{Int64}}() | ||
for i = 1:ncluster | ||
C = findall(x -> vlabel[x] == i, 1:n) | ||
push!(clusters, C) | ||
end | ||
|
||
H = elist2inc(H_edges, n) | ||
Ht = sparse(H') | ||
stats(H, Ht) | ||
dataset = "HSBM-$n-$ncluster-$m1-$m2-$hep1-$hep2" | ||
matwrite(homedir()*"/local-DHSG/data/HSBM/"*dataset*".mat", | ||
Dict( | ||
"H" => H, | ||
"clusters" => clusters, | ||
) | ||
) | ||
|
||
# generate the Rs and save | ||
Rs_list = Vector{Vector{Int64}}[] | ||
for i = 1:ncluster | ||
C = clusters[i] | ||
Csz = length(C) | ||
Rs = Vector{Int64}[] | ||
for j = 1:ntrials | ||
seed = sample(C, Int64(round(seedratio*Csz)), replace=false) | ||
R = generate_R(H, Ht, seed, Int64(round(Rratio*Csz)), genType) | ||
push!(Rs, R) | ||
end | ||
push!(Rs_list, Rs) | ||
end | ||
|
||
matwrite(homedir()*"/local-DHSG/data/HSBM/"*dataset*"-Rs-$seedratio-$Rratio-$genType.mat", | ||
Dict( | ||
"Rs_list" => Rs_list, | ||
) | ||
) | ||
|
||
for i = eachindex(ϵs) | ||
ϵ = ϵs[i] | ||
penaltyType = penaltyTypes[i] | ||
for j = 1:ncluster | ||
res = bulkeval_ADHSG(H, Ht, ϵs[i], clusters[j], Rs_list[j], penaltyTypes[i]) | ||
matwrite(homedir()*"/local-DHSG/results/HSBM/"*dataset*"-$penaltyType-ϵ-$ϵ-cluster-$j.mat", res) | ||
end | ||
end | ||
end | ||
|
||
# Plot | ||
ϵs = [1.0, 0.3, 1.0, 0.3] | ||
penaltyTypes = ["fracvol", "vol", "fracvol", "vol"] | ||
penaltyLabels = ["FracVol", "Vol", "FracVol(Greedy)", "Vol(Greedy)"] | ||
colors = [:red, :blue, :green, :orange] | ||
type_prefix = ["flow_", "flow_", "greedy_", "greedy_"] | ||
fig = Figure(figure_padding=1.5, resolution=(530,265)) | ||
axis = Axis(fig[1,1], | ||
xtickalign=1, ytickalign=1, ygridvisible=false, xgridvisible=false, | ||
xlabelpadding=10, ylabelpadding=10, ylabel="F1 Score", | ||
topspinevisible=false, rightspinevisible=false) | ||
|
||
ratio = Vector(0.1:0.1:1.4) | ||
savepath=homedir()*"/local-DHSG/results/HSBM/" | ||
for i = 1:length(colors) | ||
result = zeros(Float64, length(ratio), ncluster * ntrials) | ||
penaltyType = penaltyTypes[i] | ||
penaltyLabel = penaltyLabels[i] | ||
ϵ = ϵs[i] | ||
for j = 1:14 | ||
m1 = j * 5000 | ||
dataset = "HSBM-$n-$ncluster-$m1-$m2-$hep1-$hep2" | ||
for clusterid = 1:ncluster | ||
res = matread(savepath*dataset*"-$penaltyType-ϵ-$ϵ-cluster-$clusterid.mat") | ||
f1score = res[type_prefix[i]*"F1scores"] | ||
result[j, (clusterid-1)*ntrials+1:clusterid * ntrials] .= f1score | ||
end | ||
end | ||
plot_ADHSG!(axis, ratio, result, penaltyLabel, colors[i]; yscale=identity) | ||
end | ||
axis.xticks = 0.0:0.25:1.5 | ||
axis.yticks = 0.0:0.2:0.8 | ||
axislegend(axis, position=(0.02, 0.7), merge = true, unique = true) | ||
save(homedir()*"/local-DHSG/figs/HSBM/flow_vs_greedy_f1score-comparison.pdf", fig, pt_per_unit=1) |
Oops, something went wrong.