# Author:  Isabel Darcy 
# Date:  Jan 29, 2018
#  r script used to generate slides
#     describing m1 data structure.
#  Note changing num_intervals to 10
#     results in 6 vertices.


data11 <- c(1, 2, 2, 3)
data22 <- c(1, 1, 9, 9)
dd <- t(rbind(data11, data22))
DataSet = dd
plot(DataSet, asp=1, pch=19)

filter <- DataSet[,1] # projection to first coordinate.
# You can choose any filter function you like.
# Filter function is a vector with same 
#   number of entries as your dataset.
# See below for some examples of filter functions.

num_intervals <- 1    	# Number of intervals.
# For mapper1D, number of overlapping bins = num_intervals 
# For mapper2D, number of overlapping bins = num_intervals^2
percent_overlap <- 10   # The percent that these intervals overlap.  
num_bins_when_clustering <- 10  # A parameter for determining clusters.
# We will discuss this parameter later.

# Call the mapper1D to apply TDAmapper to dataset

m1 <- mapper1D(
  distance_matrix,
  filter,
  num_intervals,
  percent_overlap,
  num_bins_when_clustering)

# create and plot mapper graph

g1 <- graph.adjacency(m1$adjacency, mode="undirected")
plot(g1, layout = layout.auto(g1) )
title("Filter = projection to x-axis", 
      sub =paste("num_intervals = ", num_intervals,
                 "\n percent_overlap = ", percent_overlap,
                 "\n num_bins_when_clustering =", num_bins_when_clustering)
)


# hierarchical clustering of middle bin
data11 <- c(2, 2)
data22 <- c(1, 9)
dd <- t(rbind(data11, data22))
hcbin <-hclust(dist(dd), method="single")  
hcbin$height

# hierarchical clustering of DataSet
hc <-hclust(dist(DataSet), method="single")  
heights <- hc$height
diam <- sqrt(4 + 64)
seq(from=1, to=diam, by=(diam - 1)/num_bins_when_clustering)


bin_breaks <- seq(from=1, to=diam, 
                  by=(diam - 1)/num_bins_when_clustering)

if (length(bin_breaks) == 1) { bin_breaks <- 1 }
c(heights,diam)
myhist <- hist(c(heights,diam), breaks=bin_breaks, plot=TRUE)
z <- (myhist$counts == 0)
z
sum(z)
myhist$mids