Skip to content

Commit

Permalink
Merge pull request #44 from katelynqueen98/master
Browse files Browse the repository at this point in the history
Fix bug when first cluster is of size one
  • Loading branch information
malcolmbarrett authored Nov 10, 2024
2 parents 4c3b323 + d14c3f1 commit bf5863a
Showing 1 changed file with 32 additions and 7 deletions.
39 changes: 32 additions & 7 deletions R/super_partition.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,31 @@ super_partition <- function(full_data,
}

# ensure 0 < threshold < 1
if (0 > threshold | 1 < threshold) stop("Threshold must be between 0 and 1.")
if (0 > threshold | 1 < threshold) {
stop("Threshold must be between 0 and 1.")
}

# ensure no column names contain x
if (any(grepl(x, colnames(full_data)))) stop(paste0("The prefix for new variable names, ", x, ", is contained within existing data column names. Please choose a different prefix to avoid errors."))
if (any(grepl(x, colnames(full_data)))) {
stop(paste0("The prefix for new variable names, ", x, ", is contained within existing data column names. Please choose a different prefix to avoid errors."))
}

# ensure data frame structure
full_data <- as.data.frame(full_data)

# if < cluster_size features, call regular partition
if (ncol(full_data) < cluster_size) {
message(paste0("Using `partition()` since there are < ", cluster_size, "features."))
return(partition(full_data, threshold = threshold))
prt <- partition(
full_data,
threshold = threshold,
partitioner = partitioner,
tolerance = tolerance,
niter = niter,
x = x,
.sep = .sep
)
return(prt)
}

# iteration counters
Expand Down Expand Up @@ -183,18 +196,27 @@ super_partition <- function(full_data,
)
}

## first cluster
# if no dimension reduction, use partition instead
if (length(unique(master_cluster$cluster)) == ncol(full_data)) {
if (verbose) message("No dimension reduction occured using Super Partition. Using Partition instead.")
return(partition(full_data, threshold, partitioner, tolerance, niter, x, .sep))
}

## first cluster - always use largest cluster
clust_sizes <- as.data.frame(table(master_cluster$cluster))
first_clust <- which(unique(master_cluster$cluster) == clust_sizes[which.max(clust_sizes$Freq), 1])

# get initial partition to build off
part_master <- partition(
full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[1])],
full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[first_clust])],
threshold, partitioner, tolerance, niter, x, .sep
)

# update indices for each module
mod_rows <- grep(x, part_master$mapping_key$variable)
part_master$mapping_key$indices <- full_data_col_numbers(
full_data = full_data,
small_data = full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[1])],
small_data = full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[first_clust])],
modules = part_master$mapping_key$indices
)

Expand All @@ -208,7 +230,10 @@ super_partition <- function(full_data,
if (progress_bar) pb$tick()

# for each cluster...
for (i in 2:n_iter) {
for (i in seq_len(n_iter)) {
# skip if first cluster
if (i == first_clust) next()

# what to do if cluster is of size one
if (sum(master_cluster$cluster == unique(master_cluster$cluster)[i]) == 1) {
# cbind data to master partition reduced data
Expand Down

0 comments on commit bf5863a

Please sign in to comment.