cluster_cutoff_at_first_empty_binNEW <-
function(heights, diam, num_bins_when_clustering) {
# if there are only two points (one height value), then we have a single cluster
if (length(heights) == 1) {
if (heights == diam) {
cutoff <- Inf
return(cutoff)
}
}
bin_breaks <- seq(
from = min(heights),
to = diam,
by = (diam - min(heights)) / num_bins_when_clustering
)
if (length(bin_breaks) == 1) {
bin_breaks <- 1
}
myhist <-
hist(c(heights, diam), breaks = bin_breaks, plot = FALSE)
z <- (myhist$counts == 0)
if (sum(z) == 0) {
cutoff <- Inf
return(cutoff)
} else {
# which returns the indices of the logical vector (z == TRUE), min gives the smallest index
cutoff <- myhist$mids[min(which(z == TRUE))]
return(cutoff)
}
}
mapper1Dnew3 <- function(distance_matrix = dist(data.frame(x = 2 * cos(0.5 * (1:100)), y = sin(1:100))),
filter_values = 2 * cos(0.5 * (1:100)),
num_intervals = 10,
percent_overlap = 50,
hclustering = "single",
num_bins_when_clustering = 10) {
# initialize variables
vertex_index <- 0
# indexed from 1 to the number of vertices
level_of_vertex <- c()
points_in_vertex <- list()
# filter_values_in_vertex <- list() # just use filter_values[points_in_vertex[[i]]]
# indexed from 1 to the number of levels
points_in_level <- list()
vertices_in_level <- list()
# filter_values_in_level <- list() # just use filter_values[points_in_level[[i]]]
filter_min <- min(filter_values)
filter_max <- max(filter_values)
interval_length <-
(filter_max - filter_min) / (num_intervals - (num_intervals - 1) * percent_overlap /
100)
step_size <- interval_length * (1 - percent_overlap / 100)
level <- 1
# begin mapper main loop
for (level in 1:num_intervals) {
min_value_in_level <- filter_min + (level - 1) * step_size
max_value_in_level <- min_value_in_level + interval_length
# use & (not &&) for elementwise comparison
# create a logical vector of indices
points_in_level_logical <-
(min_value_in_level <= filter_values) &
(filter_values <= max_value_in_level)
num_points_in_level <- sum(points_in_level_logical)
points_in_level[[level]] <- which(points_in_level_logical == TRUE)
if (num_points_in_level == 0) {
print('Level set is empty')
next
}
if (num_points_in_level == 1) {
print('Level set has only one point')
num_vertices_in_level <- 1
cluster_indices_within_level <- c(1)
}
if (num_points_in_level > 1) {
# use as.matrix() to put the distance matrix in square form,
# and as.dist() to put it in vector form
# This could probably use some optimization...
level_distance_matrix <-
as.dist(as.matrix(distance_matrix)[points_in_level_logical, points_in_level_logical])
level_max_distance <- max(level_distance_matrix)
# use R's hclust (provided by stats or fastcluster)
# in place of Matlab's linkage function.
level_hcluster_ouput <-
hclust(level_distance_matrix, method = hclustering)
heights <- level_hcluster_ouput$height
cutoff <-
cluster_cutoff_at_first_empty_binNEW(heights, level_max_distance, num_bins_when_clustering)
# use as.vector() to get rid fo the names for the vector entries
cluster_indices_within_level <-
as.vector(cutree(level_hcluster_ouput, h = cutoff))
num_vertices_in_level <- max(cluster_indices_within_level)
# points_in_level[[level]] and cluster_indices_within_level have the same length.
# heights has length 1 less than points_in_level[[level]] and cluster_indices_within_level
# print(heights)
# print(points_in_level[[level]])
# print(cluster_indices_within_level)
}
vertices_in_level[[level]] <-
vertex_index + (1:num_vertices_in_level)
for (j in 1:num_vertices_in_level) {
vertex_index <- vertex_index + 1
# points_in_level_logical is a logical vector, so use which(points_in_level_logical==TRUE) to convert it to a numerical vector of indices
#nodeset <- which(points_in_level_logical==TRUE)[cluster_indices_within_level == j]
level_of_vertex[vertex_index] <- level
points_in_vertex[[vertex_index]] <-
which(points_in_level_logical == TRUE)[cluster_indices_within_level == j]
#points_in_vertex[[vertex_index]] <- nodeset
#filter_values_in_vertex[[vertex_index]] <- filter_values[nodeset]
}
} # end mapper main loop
# Note: num_vertices = vertex index.
# Create the adjacency matrix for the graph, starting with a matrix of zeros
adja <- mat.or.vec(vertex_index, vertex_index)
# for (j in 2:length(vertices_in_level)) {
for (j in 2:num_intervals) {
# check that both level sets are nonemtpy
if ((length(vertices_in_level[[j - 1]]) > 0) &
(length(vertices_in_level[[j]]) > 0)) {
for (v1 in vertices_in_level[[j - 1]]) {
for (v2 in vertices_in_level[[j]]) {
# return 1 if the intersection is nonempty
adja[v1, v2] <- (length(intersect(
points_in_vertex[[v1]],
points_in_vertex[[v2]]
)) > 0)
adja[v2, v1] <- adja[v1, v2]
}
}
}
} # end constructing adjacency matrix
mapperoutput <- list(
adjacency = adja,
num_vertices = vertex_index,
level_of_vertex = level_of_vertex,
points_in_vertex = points_in_vertex,
#filter_values_in_vertex = filter_values_in_vertex,
points_in_level = points_in_level,
vertices_in_level = vertices_in_level
)
class(mapperoutput) <- "TDAmapper"
return(mapperoutput)
}