# KS statistic (2-sided) # Author: Isabel K Darcy # Date: Feb 13, 2018 a <- sort(runif(30, 0,3)) # take 30 random points betwn 0 and 3 ## sort in increasing order sa <-sin(a) # take sine of these 30 points b <- sort(runif(25, 0,3)) # 25 random points betwn 0 and 3 sb <-sin(b) # take sine of these 25 points c <- sort(runif(30, 0,3)) # 30 random points betwn 0 and 3 sc <- c^2 # square these 30 points # plot the data set sa, title = main # pch = 17: choose triangle for shape of data points ## see http://www.sthda.com/english/wiki/r-plot-pch-symbols-the-different-point-shapes-available-in-r # cex.main increases font size of title by 50% # cex.main increases font size of title by 50% # cex.main increases font size of title by 50% plot(sa, main = "data", col="blue", pch = 17, cex.main = 1.5, cex.lab = 1.7, cex.axis = 2) points(sb, col="red", pch = 19) # add sb dataset to previous plot points(sc, pch = 10, cex=2) # add sc dataset to previous plot # cex = 2 doubles size of data point plot(sc, main = "data", pch = 10, cex=2, cex.main = 1.5, cex.lab = 1.7, cex.axis = 2) points(sb, col="red", pch = 19) points(sa, col="blue", pch = 17) # Plot empirical cumulative distribution function ## for these 3 data sets plot(ecdf(sa), col="blue") plot(ecdf(sb), add=TRUE, col="red") plot(ecdf(sc), add=TRUE) plot(ecdf(sc)) plot(ecdf(sb), add=TRUE, col="red") plot(ecdf(sa), add=TRUE, col="blue") # Calculate KS statistics for each pair of data sets ks.test(sa, sb) ks.test(sc, sb) ks.test(sa, sc) # generate larger datasets a1 <- sort(runif(30000, 0,3)) saLarge <-sin(a1) b1 <- sort(runif(2500, 0,3)) sbLarge <-sin(b1) # generate smaller datasets a <- sort(runif(30, 0,3)) # take 30 random points betwn 0 and 3 ## sort in increasing order saSmall <-sin(a) # take sine of these 30 points b <- sort(runif(25, 0,3)) # 25 random points betwn 0 and 3 sbSmall <-sin(b) # take sine of these 25 points #Compare ks on small vs large datasets ks.test(saSmall, sbSmall) ks.test(saLarge, sbLarge)