# Author:  Isabel Darcy 
# Create a variety of artificial data sets
# Date:  Jan 15, 2018

# Note TDAmapper README contains 3 examples: figure 8, oval, 2 intertwined spirals
# for TDAmapper README, see https://cran.r-project.org/web/packages/TDAmapper/README.html
#####################################################################################

# create data set with 7 flares.

Noise1 <- cbind(runif(200, -30,30), runif(200, 5,6))
Noise2 <- cbind(runif(200, -6,30), runif(200, -6,-5))
Noise3 <- cbind(runif(200, -6,-5), runif(200, -20,20))
Noise4<- cbind(runif(200, 5,6), runif(200, -20,20))
Noise5<- cbind(runif(200, -6,6), runif(200, -6,6))

flares <- rbind(Noise1, Noise2, Noise3, Noise4, Noise5)
plot(flares, asp=1)


## Apply a linear transformation to flares data

# create 2x2 matrix
A <- matrix(c(1, 2, 2, 0), nrow=2, ncol=2) 
C <- t(flares)   # take the transpose of flares data
M <- A %*% C     # multiply A and C
E <- eigen(A)    # calculate eigenvalues and eigenvectors of A


plot(t(M), asp=1)  # note M is the image of flares under map A

##############################################################

## You can write for loops in R
## Note you can speed up a for loop by vectorizing it.
## To get help for some special characters and words, one must use quotes

help("for")

# Create Noise data set containing 20 pts randomly chosen such that -2 < x,y < 2

Noise =cbind(runif(20, -2,2),runif(20, -2,2))

# Create a list where first element on the list is the Noise data set with 20pts.

NoiseList <- list(Noise)

# For each i = 2, 3, 4, 5, we create a new data set of noisy points by adding
# 20 more points to the previously created noisy data set.

for (i in 1:5)
{ 
  MoreNoise <- cbind(runif(20, -2,2),runif(20, -2,2)) 
  NoiseList[i] <- list(rbind(Noise, MoreNoise))    
  Noise <- NoiseList[[i]]
  windows(5,5); plot(Noise, sub = "20i points from noise.", asp = 1)
  filename <- "Noise.csv" 
  write(Noise, file = filename, ncolumns = 2, sep = ",")
}



##############################################################

# Load trefoil knot data
# This data created using software KnotPlot.
# KnotPlot is available at KnotPlot.com

knotdata <- read.csv("../Data/trefoilknot.txt", sep = " ", header = FALSE)

# You may need to change the path or working directory to load trefoilknot.txt
# Note ../ moves up a directory
# getwd()
# setwd("put path here")

# for 3d plot:
if (!require(package = "scatterplot3d")) {
  install.packages(pkgs = "scatterplot3d")
}
library("scatterplot3d")

scatterplot3d(knotdata, highlight.3d=TRUE, col.axis="blue",
              scttr3col.grid="lightblue", main="Title", pch=20)

# for 3d plot that you can rotate:
if (!require(package = "rgl")) {
  install.packages(pkgs = "rgl")
}
library("rgl")
plot3d(knotdata, col = rainbow(1000))


##############################################################

##  The TDA package has several commands for generating data sets 
##  with known topology.

if (!require(package = "TDA")) {
  install.packages(pkgs = "TDA")
}
library("TDA")

# choose 300 points randomly (with uniform distribution)from a circle of radius 1.

Circle = circleUnif(300, r = 1)  

# choose 300 points randomly (with uniform distribution)from a 2-dimensional 
# sphere of radius 1.  Note the 2nd parameter determines the dimension of the 
# sphere.  
# 2nd parameter = 1: 1-d sphere in R^2 = circle in R^2 
# 2nd parameter = 2: 2-d sphere in R^3 
# 2nd parameter = 3: 3-d sphere in R^4 

Sphere = sphereUnif(300, 2, r = 1)

# choose 300 points randomly (with uniform distribution)from a torus with tube  
# radius 1 where the center of the tube is a circle of radius 2 centered at the 
# origin.  Note the center of the tube is not part of the torus.

Torus = torusUnif(300,  1, 2)

############################################################################


## To save your data points to the csv file, circle.csv
## Note ncolumns = dimension of your data set (unless you also have a column 
## containing the names of your rows).

write(Circle, file = "circle.csv", ncolumns = 2, sep = ",")

# To determine the directory where your file was saved

getwd()

# Recall you can specify where you would like to save your file, by giving the 
# path to the directory.  For example,

write(torus, file = "C:/users/YourUserName/Downloads/torus.csv", 
      ncolumns = 3, sep = ",")


##############################################################

#  Downloading and cleaning a dataset from R

data()

?iris

library(dplyr)
data1 <- iris
?select  # remove columns
data2 <- select(data1, -Species)
#  ?filter  # remove rows
#  data3 <- filter(data2, Petal.Length > 6.5)
#  data3
#  select(data3, Sepal.Length:Petal.Length)
#  select(data3, -(Sepal.Length:Petal.Length))
#  select(data3, Sepal.Length, Petal.Length)
#  dataTemp <- rbind(data3, c(1, 2, NA, 3))
#  dataTemp
#  na.omit(dataTemp) # remove all rows with NAs

# one way to normalize data
scaledata2 <- scale(data2)
colMeans(scaledata2)  # faster version of apply(scaled.dat, 2, mean)
apply(scaledata2, 2, sd)

# save data to current working directory
# as a text file
write.table(scaledata2, "data.txt", sep=" ", 
            row.names = FALSE,
            col.names = FALSE)

################################################
## Download data from web

footballdata <- read.csv("http://www.repole.com/sun4cast/stats/cfb20140906.csv")

data <- cbind(footballdata$ScoreOff, footballdata$RushAttOff)
plot(data, asp=1)

## digits ##

digits <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tes",sep=",")
digits64 <- select(digits, -65)

######### WARNING #########
## you should clean your data before analyzing it