## NOTE: As part of your project, you should create well-documented R file(s). ## As part of your project, you can illustrate that TDA works by analyzing ## data sets whose topology are known. ## Optional: Your project could include (or focus on) creating a swirl project(s). ########################################################################### ## To install the pre-compiled package TDA for PC and possibly linux. ## Note a pre-compiled version is not available for the Mac. install.packages("TDA") ## To install and compile the package TDA. ## Note you need to have compilers installed on your computer. install.packages("TDA", type = "source") ############################################################################ ## load package TDA library("TDA") ############################################################################ ## The TDA package has several commands for generating data sets ## with known topology: # choose 300 points randomly (with uniform distribution)from a circle of radius 1. Circle = circleUnif(300, r = 1) # choose 300 points randomly (with uniform distribution)from a 2-dimensional # sphere of radius 1. Note the 2nd parameter determines the dimension of the # sphere. # 2nd parameter = 1: 1-d sphere in R^2 = circle in R^2 # 2nd parameter = 2: 2-d sphere in R^3 # 2nd parameter = 3: 3-d sphere in R^4 Sphere = sphereUnif(300, 2, r = 1) # choose 300 points randomly (with uniform distribution)from a torus with tube # radius 1 where the center of the tube is a circle of radius 2 centered at the # origin. Note the center of the tube is not part of the torus. Torus = torusUnif(300, 1, 2) ############################################################################ ## To plot your points in a new window of size 4x5 windows(4,5);plot(torus) ## To save your data points to the csv file, circle.csv ## Note ncolumns = dimension of your data set (unless you also have a column ## containing the names of your rows). write(Circle, file = "circle.csv", ncolumns = 2, sep = ",") # Recall you can specify where you would like to save your file, by giving the # path to the directory. For example, write(torus, file = "C:/users/YourUserName/Downloads/torus.csv", ncolumns = 3, sep = ",") # To determine the directory where your file was saved getwd() ############################################################################ ## We will first generate a data set where we know the topology ## circleUnif(30, r = 1) chooses 30 points randomly (with uniform distribution) ## from a circle of radius 1. ## A set of data points is called a point cloud. ## Thus XX is a point cloud. XX = circleUnif(30, r = 1) # plot XX with title at bottom and aspect ratio = 1 plot (XX, sub = "60 points from circle plus 20 points from noise.", asp = 1) ?ripsDiag ## ripsDiag computes the persistence diagram of the Rips filtration built on ## top of a point cloud. ## First argument = data set (matrix of data points or distances, see dist). ## Second argument = max dimension of the homological features to be computed. ## Third argument = maximum value of the rips filtration (largest threshold). ## Optional arguments: ## dist ## dist = "euclidean" : The euclidean distance is used to calculate ## distances between points in your data set ## dist = "arbitrary" : Assumes your data set is NOT a matrix of data points ## assumes your data set is a matrix of distances ## between your data points. Note only distances ## between data points are used, not the actual ## data points ## library ## library = "GUDHI" : The GUDHI program is used to calculate Rips PD. ## library = "Dionysis" : The Dionysus program is used to calculate Rips PD. ## GUDHI is default unless distance matrix used (ie, dist = "arbitrary"). ## printProgress ## printProgress = TRUE : progress bar printed ## printProgress = FALSE : progress bar not printed ## The output is a list containing the object of class diagram, a P by 3 matrix, ## where P is the number of cycles in the barcode and ## the columns correspond to dimension, birth, death of the cycles. ## To access the diagram in the list, use $diagram maxscale=2.5 # max radius = 2.5 maxdimension=2 # to compute H0, H1, H2 Diag=ripsDiag(XX,maxdimension,maxscale, printProgress=TRUE)$diagram Diag # View birth and death times of each cycle ## Plot barcode plot(Diag, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=TRUE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE, sub = "Barcode for data set consisting of 60 points from circle plus 20 points from noise.") ## Plot persistence diagram plot(Diag, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=FALSE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE, sub = "Persistence Diagram for data set consisting of 60 points from circle plus 20 points from noise.") ## Let's now add noise to our data set: ?runif noise = cbind(runif(20, -2,2),runif(20, -2,2)) plot(noise) noise ## Add 20 more data points from noise to 30 point data set XX ## for a total of 50 data points in XXnoisy XXnoisy=rbind(XX, noise) plot(XXnoisy, asp = 1) # Caculate ripsDiag DiagN=ripsDiag(XXnoisy,maxdimension,maxscale, printProgress=TRUE)$diagram ## Plot barcode plot(DiagN, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=TRUE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE) ## Plot persistence diagram plot(DiagN, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=FALSE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE) ?bottleneck print( bottleneck(Diag, DiagN, dimension=0) ) ? wasserstein print( wasserstein(Diag, DiagN, p=2, dimension=0) ) print( bottleneck(Diag, DiagN, dimension=1) ) print( wasserstein(Diag, DiagN, p=2, dimension=1) ) print( bottleneck(Diag, DiagN, dimension=2) ) print( wasserstein(Diag, DiagN, p=2, dimension=2) ) ## Add even more noise noise=cbind(runif(40, -2,2),runif(40, -2,2)) ## Add 40 more data points from noise to 30 point data set XX ## for a total of 70 data points in XXnoisy XXnoisy2=rbind(XX, noise) plot(XXnoisy2, asp = 1) # Caculate ripsDiag DiagNN=ripsDiag(XXnoisy2,maxdimension,maxscale, printProgress=TRUE)$diagram ## Plot barcode plot(DiagNN, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=TRUE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE) ## Plot persistence diagram plot(DiagNN, diagLim=NULL, dimension=NULL, col=NULL, rotated=FALSE, barcode=FALSE, band=NULL, lab.line=2.2, colorBand="pink", colorBorder=NA, add = FALSE) ?bottleneck print( bottleneck(Diag, DiagNN, dimension=0) ) ? wasserstein print( wasserstein(Diag, DiagNN, p=2, dimension=0) ) print( bottleneck(Diag, DiagNN, dimension=1) ) print( wasserstein(Diag, DiagNN, p=2, dimension=1) ) print( bottleneck(Diag, DiagNN, dimension=2) ) print( wasserstein(Diag, DiagNN, p=2, dimension=2) ) ############################################################################ ############################################################################ ############################################################################ ## You can write for loops in R ## To get help for some special characters and words, one must use quotes help("for") # Create Noise data set containing 20 pts randomly chosen such that -2 < x,y < 2 Noise =cbind(runif(20, -2,2),runif(20, -2,2)) # Create a list where first element on the list is the Noise data set with 20pts. NoiseList <- list(Noise) # For each i = 2, 3, 4, 5, we create a new data set of noisy points by adding # 20 more points to the previously created noisy data set. for (i in 2:2) { MoreNoise <- cbind(runif(20, -2,2),runif(20, -2,2)) NoiseList[i] <- list(rbind(Noise, MoreNoise)) Noise <- NoiseList[[i]] windows(5,5); plot(Noise, sub = "20i points from noise.", asp = 1) filename <- "Noise.csv" write(Noise, file = filename, ncolumns = 2, sep = ",") } ############################################################################## # Note you can also use Sweave to write part of your paper (compatible with latex). ?Sweave ######################################################## QUERY REPLACE plot with windows(5,5);plot