<>= ################################################## #### Create data set ############################# ################################################## #set.seed(43) x <- rnorm(1000) y <- rnorm(1000) mydata <- data.frame(x,y) ################################################## numInt1 <- c(1) # Defining PCA for filter function pca <- prcomp(mydata) # pca as a fulter function pca1 <- (as.matrix(mydata) %*% pca$rotation)[,1] #PCA 1 pca2 <- (as.matrix(mydata) %*% pca$rotation)[,2] #PCA 2 # Calculate length vecNorm<- apply(mydata, 1, function(x){sqrt(sum(x^2))}) # run mapper over the following parameters filter <- list(pca1, pca2, mydata[,1], mydata[,2], vecNorm ) filter_choice <- list( "pca1", "pca2", "proj1", "proj2", "norm") numInt2 <- c(5, 7, 10, 20, 30) percent_overlap <- c(50) hierclustering <- c("single") clust <- c(10) # Calculate distance matrix d <- dist(mydata, method = 'euclidean') # Defining eccentricity for filter function d_mat <- as.matrix(d) ecc <- c() n <- length(mydata[,1]) for (i in 1:n) { ecc <- append(ecc, (1/n)^(1/2) * sum( (d_mat[,i] * d_mat[,i])^2 )^(1/2) ) } numInt <- append(numInt1, numInt2) ################################################## ################################################## options(error=function() dump.frames(to.file=TRUE)) #' @author Paul Pearson, \email{pearsonp@@hope.edu} # slightly modified by Isabel Darcy #' @references \url{https://github.com/paultpearson/TDAmapper} #' @seealso \code{\link{mapper1D}}, \code{\link{mapper2D}} #' cluster_cutoff_at_first_empty_binNEW <- function(heights, diam, num_bins_when_clustering) { # if there are only two points (one height value), then we have a single cluster if (length(heights) == 1) { if (heights == diam) { cutoff <- Inf return(cutoff) } } bin_breaks <- seq( from = min(heights), to = diam, by = (diam - min(heights)) / num_bins_when_clustering ) if (length(bin_breaks) == 1) { bin_breaks <- 1 } myhist <- hist(c(heights, diam), breaks = bin_breaks, plot = FALSE) z <- (myhist$counts == 0) if (sum(z) == 0) { cutoff <- Inf return(cutoff) } else { # which returns the indices of the logical vector (z == TRUE), min gives the smallest index cutoff <- myhist$mids[min(which(z == TRUE))] return(cutoff) } } mapper1Dnew3 <- function(distance_matrix = dist(data.frame(x = 2 * cos(0.5 * (1:100)), y = sin(1:100))), filter_values = 2 * cos(0.5 * (1:100)), num_intervals = 10, percent_overlap = 50, hclustering = "single", num_bins_when_clustering = 10) { # initialize variables vertex_index <- 0 # indexed from 1 to the number of vertices level_of_vertex <- c() points_in_vertex <- list() # filter_values_in_vertex <- list() # just use filter_values[points_in_vertex[[i]]] # indexed from 1 to the number of levels points_in_level <- list() vertices_in_level <- list() # filter_values_in_level <- list() # just use filter_values[points_in_level[[i]]] filter_min <- min(filter_values) filter_max <- max(filter_values) interval_length <- (filter_max - filter_min) / (num_intervals - (num_intervals - 1) * percent_overlap / 100) step_size <- interval_length * (1 - percent_overlap / 100) level <- 1 # begin mapper main loop for (level in 1:num_intervals) { min_value_in_level <- filter_min + (level - 1) * step_size max_value_in_level <- min_value_in_level + interval_length # use & (not &&) for elementwise comparison # create a logical vector of indices points_in_level_logical <- (min_value_in_level <= filter_values) & (filter_values <= max_value_in_level) num_points_in_level <- sum(points_in_level_logical) points_in_level[[level]] <- which(points_in_level_logical == TRUE) if (num_points_in_level == 0) { print('Level set is empty') next } if (num_points_in_level == 1) { print('Level set has only one point') num_vertices_in_level <- 1 cluster_indices_within_level <- c(1) } if (num_points_in_level > 1) { # use as.matrix() to put the distance matrix in square form, # and as.dist() to put it in vector form # This could probably use some optimization... level_distance_matrix <- as.dist(as.matrix(distance_matrix)[points_in_level_logical, points_in_level_logical]) level_max_distance <- max(level_distance_matrix) # use R's hclust (provided by stats or fastcluster) # in place of Matlab's linkage function. level_hcluster_ouput <- hclust(level_distance_matrix, method = hclustering) heights <- level_hcluster_ouput$height cutoff <- cluster_cutoff_at_first_empty_binNEW(heights, level_max_distance, num_bins_when_clustering) # use as.vector() to get rid fo the names for the vector entries cluster_indices_within_level <- as.vector(cutree(level_hcluster_ouput, h = cutoff)) num_vertices_in_level <- max(cluster_indices_within_level) # points_in_level[[level]] and cluster_indices_within_level have the same length. # heights has length 1 less than points_in_level[[level]] and cluster_indices_within_level # print(heights) # print(points_in_level[[level]]) # print(cluster_indices_within_level) } vertices_in_level[[level]] <- vertex_index + (1:num_vertices_in_level) for (j in 1:num_vertices_in_level) { vertex_index <- vertex_index + 1 # points_in_level_logical is a logical vector, so use which(points_in_level_logical==TRUE) to convert it to a numerical vector of indices #nodeset <- which(points_in_level_logical==TRUE)[cluster_indices_within_level == j] level_of_vertex[vertex_index] <- level points_in_vertex[[vertex_index]] <- which(points_in_level_logical == TRUE)[cluster_indices_within_level == j] #points_in_vertex[[vertex_index]] <- nodeset #filter_values_in_vertex[[vertex_index]] <- filter_values[nodeset] } } # end mapper main loop # Note: num_vertices = vertex index. # Create the adjacency matrix for the graph, starting with a matrix of zeros adja <- mat.or.vec(vertex_index, vertex_index) # for (j in 2:length(vertices_in_level)) { for (j in 2:num_intervals) { # check that both level sets are nonemtpy if ((length(vertices_in_level[[j - 1]]) > 0) & (length(vertices_in_level[[j]]) > 0)) { for (v1 in vertices_in_level[[j - 1]]) { for (v2 in vertices_in_level[[j]]) { # return 1 if the intersection is nonempty adja[v1, v2] <- (length(intersect( points_in_vertex[[v1]], points_in_vertex[[v2]] )) > 0) adja[v2, v1] <- adja[v1, v2] } } } } # end constructing adjacency matrix mapperoutput <- list( adjacency = adja, num_vertices = vertex_index, level_of_vertex = level_of_vertex, points_in_vertex = points_in_vertex, #filter_values_in_vertex = filter_values_in_vertex, points_in_level = points_in_level, vertices_in_level = vertices_in_level ) class(mapperoutput) <- "TDAmapper" return(mapperoutput) } ################################################## @ %{r echo=FALSE} opts_chunk$set(echo=FALSE) \documentclass{beamer} \usepackage{graphicx} \usepackage{pdfpages} \usepackage{lipsum} \begin{document} %\SweaveOpts{concordance=TRUE} \title{your title} \author{your name} %\maketitle % very important to use option [fragile] for frames containing code output! % Sweave2knitr <>= library(igraph) mapOut = list() ii <- 1 for (hh in 1:length(filter)){ for (cc in 1:length(hierclustering)){ # for (ii in 1:length(clust)){ for (kk in 1:length(percent_overlap)){ for (jj in 1:length(numInt)) { if (jj == 1){} else{ m <- mapper1Dnew3(distance_matrix = d, filter_values = unlist(filter[hh]), num_intervals = numInt[jj], percent_overlap = percent_overlap[kk], hclustering = hierclustering[cc], num_bins_when_clustering = clust[ii]) g <- graph.adjacency(m\$adjacency, mode = "undirected") mapOut[[jj -1 + (ii - 1)*(length(numInt)-1) + (hh-1)*(length(numInt)-1)*(length(percent_overlap)) + (hh-1)*(length(numInt)-1)*(length(percent_overlap)) *(length(clust))]] <- g } } } } } @ <>= for (hh in 1:length(filter)){ for (cc in 1:length(hierclustering)){ # for (ii in 1:length(clust)){ for (kk in 1:length(percent_overlap)){ cat("\\begin{frame}\n") cat("\\begin{columns}\n") cat("\\centering\n") cat("\\column{.25\\textwidth}\n") cat("\\begin{figure}\n") plot(x,y) title(paste0("filter=", filter_choice[hh]), cex.main=5, sub=paste("hclust = ", hierclustering[cc]), cex.sub=3.5) cat("\\end{figure}\n\n") for (jj in 2:3) { cat("\\column{.25\\textwidth}\n") cat("\\begin{figure}") MM <- jj -1 + (ii - 1)*(length(numInt)-1) + (hh-1)*(length(numInt)-1)*(length(clust)) plot(mapOut[[MM]], layout = layout.auto(mapOut[[MM]])) title(paste("~\n", numInt[jj], "intervals, ", percent_overlap[kk],"% overlap\n"), cex.main=3, sub=paste("clust = ",clust[ii]), cex.sub=3.5) cat("\\end{figure}\n") } cat("\\end{columns}\n\n") cat("\\begin{columns}\n") cat("\\centering\n") for (jj in 4:6) { cat("\\column{.25\\textwidth}\n") cat("\\begin{figure}") MM <- jj -1 + (ii - 1)*(length(numInt)-1) + (hh-1)*(length(numInt)-1)*(length(clust)) plot(mapOut[[MM]], layout = layout.auto(mapOut[[MM]])) title(paste("~\n", numInt[jj], "intervals, ", percent_overlap[kk],"% overlap\n"), cex.main=3, sub=paste("clust = ",clust[ii]), cex.sub=3.5) cat("\\end{figure}\n") } cat("\\end{columns}\n") cat("\\end{frame}\n\n") } } } @ \end{document}