# Author: Isabel Darcy # Date: Jan 29, 2018 # r script used to generate slides # describing m1 data structure. # Note changing num_intervals to 10 # results in 6 vertices. data11 <- c(1, 2, 2, 3) data22 <- c(1, 1, 9, 9) dd <- t(rbind(data11, data22)) DataSet = dd plot(DataSet, asp=1, pch=19) filter <- DataSet[,1] # projection to first coordinate. # You can choose any filter function you like. # Filter function is a vector with same # number of entries as your dataset. # See below for some examples of filter functions. num_intervals <- 1 # Number of intervals. # For mapper1D, number of overlapping bins = num_intervals # For mapper2D, number of overlapping bins = num_intervals^2 percent_overlap <- 10 # The percent that these intervals overlap. num_bins_when_clustering <- 10 # A parameter for determining clusters. # We will discuss this parameter later. # Call the mapper1D to apply TDAmapper to dataset m1 <- mapper1D( distance_matrix, filter, num_intervals, percent_overlap, num_bins_when_clustering) # create and plot mapper graph g1 <- graph.adjacency(m1$adjacency, mode="undirected") plot(g1, layout = layout.auto(g1) ) title("Filter = projection to x-axis", sub =paste("num_intervals = ", num_intervals, "\n percent_overlap = ", percent_overlap, "\n num_bins_when_clustering =", num_bins_when_clustering) ) # hierarchical clustering of middle bin data11 <- c(2, 2) data22 <- c(1, 9) dd <- t(rbind(data11, data22)) hcbin <-hclust(dist(dd), method="single") hcbin$height # hierarchical clustering of DataSet hc <-hclust(dist(DataSet), method="single") heights <- hc$height diam <- sqrt(4 + 64) seq(from=1, to=diam, by=(diam - 1)/num_bins_when_clustering) bin_breaks <- seq(from=1, to=diam, by=(diam - 1)/num_bins_when_clustering) if (length(bin_breaks) == 1) { bin_breaks <- 1 } c(heights,diam) myhist <- hist(c(heights,diam), breaks=bin_breaks, plot=TRUE) z <- (myhist$counts == 0) z sum(z) myhist$mids