# Author: Isabel Darcy # Create a variety of artificial data sets # Date: Jan 15, 2018 # Note TDAmapper README contains 3 examples: figure 8, oval, 2 intertwined spirals # for TDAmapper README, see https://cran.r-project.org/web/packages/TDAmapper/README.html ##################################################################################### # create data set with 7 flares. Noise1 <- cbind(runif(200, -30,30), runif(200, 5,6)) Noise2 <- cbind(runif(200, -6,30), runif(200, -6,-5)) Noise3 <- cbind(runif(200, -6,-5), runif(200, -20,20)) Noise4<- cbind(runif(200, 5,6), runif(200, -20,20)) Noise5<- cbind(runif(200, -6,6), runif(200, -6,6)) flares <- rbind(Noise1, Noise2, Noise3, Noise4, Noise5) plot(flares, asp=1) ## Apply a linear transformation to flares data # create 2x2 matrix A <- matrix(c(1, 2, 2, 0), nrow=2, ncol=2) C <- t(flares) # take the transpose of flares data M <- A %*% C # multiply A and C E <- eigen(A) # calculate eigenvalues and eigenvectors of A plot(t(M), asp=1) # note M is the image of flares under map A ############################################################## ## You can write for loops in R ## Note you can speed up a for loop by vectorizing it. ## To get help for some special characters and words, one must use quotes help("for") # Create Noise data set containing 20 pts randomly chosen such that -2 < x,y < 2 Noise =cbind(runif(20, -2,2),runif(20, -2,2)) # Create a list where first element on the list is the Noise data set with 20pts. NoiseList <- list(Noise) # For each i = 2, 3, 4, 5, we create a new data set of noisy points by adding # 20 more points to the previously created noisy data set. for (i in 1:5) { MoreNoise <- cbind(runif(20, -2,2),runif(20, -2,2)) NoiseList[i] <- list(rbind(Noise, MoreNoise)) Noise <- NoiseList[[i]] windows(5,5); plot(Noise, sub = "20i points from noise.", asp = 1) filename <- "Noise.csv" write(Noise, file = filename, ncolumns = 2, sep = ",") } ############################################################## # Load trefoil knot data # This data created using software KnotPlot. # KnotPlot is available at KnotPlot.com knotdata <- read.csv("../Data/trefoilknot.txt", sep = " ", header = FALSE) # You may need to change the path or working directory to load trefoilknot.txt # Note ../ moves up a directory # getwd() # setwd("put path here") # for 3d plot: if (!require(package = "scatterplot3d")) { install.packages(pkgs = "scatterplot3d") } library("scatterplot3d") scatterplot3d(knotdata, highlight.3d=TRUE, col.axis="blue", scttr3col.grid="lightblue", main="Title", pch=20) # for 3d plot that you can rotate: if (!require(package = "rgl")) { install.packages(pkgs = "rgl") } library("rgl") plot3d(knotdata, col = rainbow(1000)) ############################################################## ## The TDA package has several commands for generating data sets ## with known topology. if (!require(package = "TDA")) { install.packages(pkgs = "TDA") } library("TDA") # choose 300 points randomly (with uniform distribution)from a circle of radius 1. Circle = circleUnif(300, r = 1) # choose 300 points randomly (with uniform distribution)from a 2-dimensional # sphere of radius 1. Note the 2nd parameter determines the dimension of the # sphere. # 2nd parameter = 1: 1-d sphere in R^2 = circle in R^2 # 2nd parameter = 2: 2-d sphere in R^3 # 2nd parameter = 3: 3-d sphere in R^4 Sphere = sphereUnif(300, 2, r = 1) # choose 300 points randomly (with uniform distribution)from a torus with tube # radius 1 where the center of the tube is a circle of radius 2 centered at the # origin. Note the center of the tube is not part of the torus. Torus = torusUnif(300, 1, 2) ############################################################################ ## To save your data points to the csv file, circle.csv ## Note ncolumns = dimension of your data set (unless you also have a column ## containing the names of your rows). write(Circle, file = "circle.csv", ncolumns = 2, sep = ",") # To determine the directory where your file was saved getwd() # Recall you can specify where you would like to save your file, by giving the # path to the directory. For example, write(torus, file = "C:/users/YourUserName/Downloads/torus.csv", ncolumns = 3, sep = ",") ############################################################## # Downloading and cleaning a dataset from R data() ?iris library(dplyr) data1 <- iris ?select # remove columns data2 <- select(data1, -Species) # ?filter # remove rows # data3 <- filter(data2, Petal.Length > 6.5) # data3 # select(data3, Sepal.Length:Petal.Length) # select(data3, -(Sepal.Length:Petal.Length)) # select(data3, Sepal.Length, Petal.Length) # dataTemp <- rbind(data3, c(1, 2, NA, 3)) # dataTemp # na.omit(dataTemp) # remove all rows with NAs # one way to normalize data scaledata2 <- scale(data2) colMeans(scaledata2) # faster version of apply(scaled.dat, 2, mean) apply(scaledata2, 2, sd) # save data to current working directory # as a text file write.table(scaledata2, "data.txt", sep=" ", row.names = FALSE, col.names = FALSE) ################################################ ## Download data from web footballdata <- read.csv("http://www.repole.com/sun4cast/stats/cfb20140906.csv") data <- cbind(footballdata$ScoreOff, footballdata$RushAttOff) plot(data, asp=1) ## digits ## digits <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tes",sep=",") digits64 <- select(digits, -65) ######### WARNING ######### ## you should clean your data before analyzing it