data() datairis <- iris ?iris library(dplyr) data1 <- tbl_df(datairis) data1 <- datairis ?select # remove columns data2 <- select(data1, -Species) ?filter # remove rows data3 <- filter(data2, Petal.Length > 6.5) data3 select(data3, Sepal.Length:Petal.Length) select(data3, -(Sepal.Length:Petal.Length)) select(data3, Sepal.Length, Petal.Length) dataTemp <- rbind(data3, c(1, 2, NA, 3)) dataTemp na.omit(dataTemp) # remove all rows with NAs # one way to normalize data scaledata2 <- scale(data2) colMeans(scaledata2) # faster version of apply(scaled.dat, 2, mean) apply(scaledata2, 2, sd) # save data to current working directory # as a text file write.table(scaledata2, "data.txt", sep=" ", row.names = FALSE, col.names = FALSE) # install.packages("TDAmapper") library("TDAmapper") # The fastcluster package is not necessary. By loading the # fastcluster package, the fastcluster::hclust() function # automatically replaces the slower stats::hclust() function # whenever hclust() is called. # install.packages("fastcluster") require(fastcluster) # The igraph package is necessary to view simplicial complexes # (undirected graph) resulting from mapper1D(). library(igraph) P<- select(tbl_df(scaledata2), Petal.Length) # Apply mapper m1 <- mapper1D( distance_matrix = dist(data.frame(scaledata2)), filter_values = P, num_intervals = 10, percent_overlap = 50, num_bins_when_clustering = 10) # create and plot mapper graph g1 <- graph.adjacency(m1$adjacency, mode="undirected") plot(g1, layout = layout.auto(g1) ) ## load package TDA library("TDA") ?ripsDiag ## ripsDiag computes the persistence diagram of the Rips filtration built on ## top of a point cloud. ## First argument = data set (matrix of data points or distances, see dist). ## Second argument = max dimension of the homological features to be computed. ## Third argument = maximum value of the rips filtration (largest threshold). ## Optional arguments: ## dist ## dist = "euclidean" : The euclidean distance is used to calculate ## distances between points in your data set ## dist = "arbitrary" : Assumes your data set is NOT a matrix of data points ## assumes your data set is a matrix of distances ## between your data points. Note only distances ## between data points are used, not the actual ## data points ## library ## library = "GUDHI" : The GUDHI program is used to calculate Rips PD. ## library = "Dionysis" : The Dionysus program is used to calculate Rips PD. ## GUDHI is default unless distance matrix used (ie, dist = "arbitrary"). ## printProgress ## printProgress = TRUE : progress bar printed ## printProgress = FALSE : progress bar not printed ## The output is a list containing the object of class diagram, a P by 3 matrix, ## where P is the number of cycles in the barcode and ## the columns correspond to dimension, birth, death of the cycles. ## To access the diagram in the list, use $diagram maxscale=2 # max radius = 2 maxdimension=2 # to compute H0, H1, H2 Diag=ripsDiag(scaledata2,maxdimension,maxscale, printProgress=TRUE)$diagram Diag # View birth and death times of each cycle ## Plot barcode plot(Diag, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=TRUE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE, sub = "Barcode for data set consisting of 60 points from circle plus 20 points from noise.") ## Plot persistence diagram plot(Diag, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=FALSE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE, sub = "Persistence Diagram for data set consisting of 60 points from circle plus 20 points from noise.") ######################################## ############################################################################ ## The TDA package has several commands for generating data sets ## with known topology: # choose 300 points randomly (with uniform distribution)from a circle of radius 1. Circle = circleUnif(300, r = 1) # choose 300 points randomly (with uniform distribution)from a 2-dimensional # sphere of radius 1. Note the 2nd parameter determines the dimension of the # sphere. # 2nd parameter = 1: 1-d sphere in R^2 = circle in R^2 # 2nd parameter = 2: 2-d sphere in R^3 # 2nd parameter = 3: 3-d sphere in R^4 Sphere = sphereUnif(300, 2, r = 1) # choose 300 points randomly (with uniform distribution)from a torus with tube # radius 1 where the center of the tube is a circle of radius 2 centered at the # origin. Note the center of the tube is not part of the torus. Torus = torusUnif(300, 1, 2) ############################################################################ ## To plot your points in a new window of size 4x5 windows(4,5);plot(torus) ## To save your data points to the csv file, circle.csv ## Note ncolumns = dimension of your data set (unless you also have a column ## containing the names of your rows). write(Circle, file = "circle.csv", ncolumns = 2, sep = ",") # To determine the directory where your file was saved getwd() # Recall you can specify where you would like to save your file, by giving the # path to the directory. For example, write(torus, file = "C:/users/YourUserName/Downloads/torus.csv", ncolumns = 3, sep = ",") ############################################################################ ## We will first generate a data set where we know the topology ## circleUnif(30, r = 1) chooses 30 points randomly (with uniform distribution) ## from a circle of radius 1. ## A set of data points is called a point cloud. ## Thus XX is a point cloud. XX = circleUnif(30, r = 1) # plot XX with title at bottom and aspect ratio = 1 plot (XX, sub = "30 points from circle.", asp = 1) ?ripsDiag ## ripsDiag computes the persistence diagram of the Rips filtration built on ## top of a point cloud. ## First argument = data set (matrix of data points or distances, see dist). ## Second argument = max dimension of the homological features to be computed. ## Third argument = maximum value of the rips filtration (largest threshold). ## Optional arguments: ## dist ## dist = "euclidean" : The euclidean distance is used to calculate ## distances between points in your data set ## dist = "arbitrary" : Assumes your data set is NOT a matrix of data points ## assumes your data set is a matrix of distances ## between your data points. Note only distances ## between data points are used, not the actual ## data points ## library ## library = "GUDHI" : The GUDHI program is used to calculate Rips PD. ## library = "Dionysis" : The Dionysus program is used to calculate Rips PD. ## GUDHI is default unless distance matrix used (ie, dist = "arbitrary"). ## printProgress ## printProgress = TRUE : progress bar printed ## printProgress = FALSE : progress bar not printed ## The output is a list containing the object of class diagram, a P by 3 matrix, ## where P is the number of cycles in the barcode and ## the columns correspond to dimension, birth, death of the cycles. ## To access the diagram in the list, use $diagram maxscale=2.5 # max radius = 2.5 maxdimension=2 # to compute H0, H1, H2 Diag=ripsDiag(XX,maxdimension,maxscale, printProgress=TRUE)$diagram Diag # View birth and death times of each cycle ## Plot barcode plot(Diag, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=TRUE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE, sub = "Barcode for data set consisting of 30 points from circle.") ## Plot persistence diagram plot(Diag, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=FALSE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE, sub = "Persistence Diagram for data set consisting of 30 points from circle.") ## Let's now add noise to our data set: ?runif noise = cbind(runif(20, -2,2),runif(20, -2,2)) plot(noise) noise ## Add 20 more data points from noise to 30 point data set XX ## for a total of 50 data points in XXnoisy XXnoisy=rbind(XX, noise) plot(XXnoisy, asp = 1) # Caculate ripsDiag DiagN=ripsDiag(XXnoisy,maxdimension,maxscale, printProgress=TRUE)$diagram ## Plot barcode plot(DiagN, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=TRUE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE) ## Plot persistence diagram plot(DiagN, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=FALSE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE) ?bottleneck print( bottleneck(Diag, DiagN, dimension=0) ) ? wasserstein print( wasserstein(Diag, DiagN, p=2, dimension=0) ) print( bottleneck(Diag, DiagN, dimension=1) ) print( wasserstein(Diag, DiagN, p=2, dimension=1) ) print( bottleneck(Diag, DiagN, dimension=2) ) print( wasserstein(Diag, DiagN, p=2, dimension=2) ) ## Add even more noise noise=cbind(runif(40, -2,2),runif(40, -2,2)) ## Add 40 more data points from noise to 30 point data set XX ## for a total of 70 data points in XXnoisy XXnoisy2=rbind(XX, noise) plot(XXnoisy2, asp = 1) # Caculate ripsDiag DiagNN=ripsDiag(XXnoisy2,maxdimension,maxscale, printProgress=TRUE)$diagram ## Plot barcode plot(DiagNN, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=TRUE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE) ## Plot persistence diagram plot(DiagNN, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=FALSE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE) ?bottleneck print( bottleneck(Diag, DiagNN, dimension=0) ) ? wasserstein print( wasserstein(Diag, DiagNN, p=2, dimension=0) ) print( bottleneck(Diag, DiagNN, dimension=1) ) print( wasserstein(Diag, DiagNN, p=2, dimension=1) ) print( bottleneck(Diag, DiagNN, dimension=2) ) print( wasserstein(Diag, DiagNN, p=2, dimension=2) ) ############################################################################ ############################################################################ ############################################################################ ## You can write for loops in R ## To get help for some special characters and words, one must use quotes ## NOTE sapply is much more efficient than using for loops. help("for") # Create Noise data set containing 20 pts randomly chosen such that -2 < x,y < 2 Noise =cbind(runif(20, -2,2),runif(20, -2,2)) # Create a list where first element on the list is the Noise data set with 20pts. NoiseList <- list(Noise) # For each i = 2, 3, 4, 5, we create a new data set of noisy points by adding # 20 more points to the previously created noisy data set. for (i in 2:2) { MoreNoise <- cbind(runif(20, -2,2),runif(20, -2,2)) NoiseList[i] <- list(rbind(Noise, MoreNoise)) Noise <- NoiseList[[i]] windows(5,5); plot(Noise, sub = "20i points from noise.", asp = 1) filename <- "Noise.csv" write(Noise, file = filename, ncolumns = 2, sep = ",") } ##############################################################################