AddOne OMP Timings
if (! dir.exists("AddOne.Rcheck/")) {
year <- 2022
tarball <- "AddOne_4.0-2.tar.gz"
if (! file.exists(tarball)) {
path <- sprintf("classes/STAT7400-%s/examples/%s", year, tarball)
url <- sprintf("http://www.stat.uiowa.edu/~luke/%s", path)
download.file(url, tarball)
}
system(sprintf("R CMD check %s", tarball))
}
library(AddOne, lib = "AddOne.Rcheck")
R <- 100000
K <- 3
threads <- c(1, 2, 4)
if (isTRUE(parallel::detectCores() > 4))
threads <- c(1, 2, 4, 8)
lens <- c(8, 32, 64, 128, 256, 512, 1024, 2048)
n <- rep(rep(lens, length(threads)), K)
P <- rep(rep(threads, each = length(lens)), K)
tm <- function(n, P) {
x <- rnorm(n)
system.time(for (i in 1 : R) p.AddOne(x, P = P))[3]
}
system.time(time <- sapply(seq_along(n), function(i) tm(n[i], P[i])))
d <- data.frame(time, n, P = as.factor(P))
library(ggplot2)
ggplot(d, aes(n, time, color = P)) + geom_point() + geom_smooth()
Signaling NaN Warnings
The convention is to signal the "NaNs produced"
warning if new NaN
values are produced but not if NaN
arguments are being propagated.
Pure R Code
Keep in mind:
- R logical values can be
TRUE
, FALSE
, or NA
.
- Using an
NA
as the test condition with if
signals an error.
So you need to screen out incoming NA/NaN
values before doing a comparison:
ppareto <- function(x, alpha, beta) {
bad_alpha <- alpha[! is.na(alpha)] <= 0
bad_beta <- beta[! is.na(beta)] <= 0
if (any(bad_alpha, bad_beta)) {
warning("NaNs produced")
alpha[bad_alpha] <- NaN
beta[bad_beta] <- NaN
}
x <- ifelse(x <= alpha, alpha, x)
lp <- beta * (log(alpha) - log(x))
1 - exp(lp)
}
Using C Code
In C code comparisons will return 1 for False
and 0 for TRUE
.
The IEEE floating point standard specifies that comparisons with NaN
values behave like this (x
can be anything, including NaN
and \(\pm\infty\))
FALSE |
FALSE |
FALSE |
FALSE |
FALSE |
TRUE |
#include <Rinternals.h>
void ppareto(double *x, double *alpha, double *beta, double *p, double *dn)
{
R_xlen_t n = (R_xlen_t) *dn;
int na_gen = FALSE;
for (R_xlen_t i = 0; i < n; i++) {
if (alpha[i] <= 0 || beta[i] <= 0) {
p[i] = R_NaN;
na_gen = TRUE;
}
else if (x[i] <= alpha[i])
p[i] = 0;
else {
double lp = beta[i] * (log(alpha[i]) - log(x[i]));
p[i] = 1 - exp(lp);
}
}
if (na_gen)
warning("NaNs produced");
}
dyn.load("ppareto.so")
ppareto <- function(x, alpha, beta) {
n <- max(length(x), length(alpha), length(beta))
x <- rep_len(as.double(x), n)
alpha <- rep_len(as.double(alpha), n)
beta <- rep_len(as.double(beta), n)
.C("ppareto", x, alpha, beta, result = double(n),
as.double(n), NAOK = TRUE)$result
}
LS0tCnRpdGxlOiAiSFc3IE5vdGVzIgphdXRob3I6ICJMdWtlIFRpZXJuZXkiCm91dHB1dDoKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiB5ZXMKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKLS0tCgpgYGB7ciBnbG9iYWxfb3B0aW9ucywgaW5jbHVkZSA9IEZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQoY29sbGFwc2UgPSBUUlVFLAogICAgICAgICAgICAgICAgICAgICAgY2xhc3Muc291cmNlID0gImZvbGQtc2hvdyIsCiAgICAgICAgICAgICAgICAgICAgICBmaWcuYWxpZ24gPSAiY2VudGVyIikKc2V0LnNlZWQoMjAyMS0wMy0xOSkKYGBgCgojIyBBZGRPbmUgT01QIFRpbWluZ3MKCmBgYHtyLCBldmFsID0gRkFMU0V9CmlmICghIGRpci5leGlzdHMoIkFkZE9uZS5SY2hlY2svIikpIHsKICAgIHllYXIgPC0gMjAyMgogICAgdGFyYmFsbCA8LSAiQWRkT25lXzQuMC0yLnRhci5neiIKICAgIGlmICghIGZpbGUuZXhpc3RzKHRhcmJhbGwpKSB7CiAgICAgICAgcGF0aCA8LSBzcHJpbnRmKCJjbGFzc2VzL1NUQVQ3NDAwLSVzL2V4YW1wbGVzLyVzIiwgeWVhciwgdGFyYmFsbCkKICAgICAgICB1cmwgPC0gc3ByaW50ZigiaHR0cDovL3d3dy5zdGF0LnVpb3dhLmVkdS9+bHVrZS8lcyIsIHBhdGgpCiAgICAgICAgZG93bmxvYWQuZmlsZSh1cmwsIHRhcmJhbGwpCiAgICB9CiAgICBzeXN0ZW0oc3ByaW50ZigiUiBDTUQgY2hlY2sgJXMiLCB0YXJiYWxsKSkKfQpsaWJyYXJ5KEFkZE9uZSwgbGliID0gIkFkZE9uZS5SY2hlY2siKQoJClIgPC0gMTAwMDAwCksgPC0gMwp0aHJlYWRzIDwtIGMoMSwgMiwgNCkKaWYgKGlzVFJVRShwYXJhbGxlbDo6ZGV0ZWN0Q29yZXMoKSA+IDQpKQoJdGhyZWFkcyA8LSBjKDEsIDIsIDQsIDgpCmxlbnMgPC0gYyg4LCAzMiwgNjQsIDEyOCwgMjU2LCA1MTIsIDEwMjQsIDIwNDgpCgpuIDwtIHJlcChyZXAobGVucywgbGVuZ3RoKHRocmVhZHMpKSwgSykKUCA8LSByZXAocmVwKHRocmVhZHMsIGVhY2ggPSBsZW5ndGgobGVucykpLCBLKQoKdG0gPC0gZnVuY3Rpb24obiwgUCkgewogICAgeCA8LSBybm9ybShuKQogICAgc3lzdGVtLnRpbWUoZm9yIChpIGluIDEgOiBSKSBwLkFkZE9uZSh4LCBQID0gUCkpWzNdCn0KCnN5c3RlbS50aW1lKHRpbWUgPC0gc2FwcGx5KHNlcV9hbG9uZyhuKSwgZnVuY3Rpb24oaSkgdG0obltpXSwgUFtpXSkpKQoKZCA8LSBkYXRhLmZyYW1lKHRpbWUsIG4sIFAgPSBhcy5mYWN0b3IoUCkpCgpsaWJyYXJ5KGdncGxvdDIpCmdncGxvdChkLCBhZXMobiwgdGltZSwgY29sb3IgPSBQKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3Ntb290aCgpCmBgYAoKIyMgU2lnbmFsaW5nIE5hTiBXYXJuaW5ncwoKVGhlIGNvbnZlbnRpb24gaXMgdG8gc2lnbmFsIHRoZSBgIk5hTnMgcHJvZHVjZWQiYCB3YXJuaW5nIGlmIG5ldyBgTmFOYAp2YWx1ZXMgYXJlIHByb2R1Y2VkIGJ1dCBub3QgaWYgYE5hTmAgYXJndW1lbnRzIGFyZSBiZWluZyBwcm9wYWdhdGVkLgoKIyMjIFB1cmUgUiBDb2RlCgpLZWVwIGluIG1pbmQ6CgoqIFIgbG9naWNhbCB2YWx1ZXMgY2FuIGJlIGBUUlVFYCwgYEZBTFNFYCwgb3IgYE5BYC4KKiBVc2luZyBhbiBgTkFgIGFzIHRoZSB0ZXN0ICBjb25kaXRpb24gd2l0aCBgaWZgIHNpZ25hbHMgYW4gZXJyb3IuCgpTbyB5b3UgbmVlZCB0byBzY3JlZW4gb3V0IGluY29taW5nIGBOQS9OYU5gIHZhbHVlcyBiZWZvcmUgZG9pbmcgYQpjb21wYXJpc29uOgoKYGBge3J9CnBwYXJldG8gPC0gZnVuY3Rpb24oeCwgYWxwaGEsIGJldGEpIHsKICAgIGJhZF9hbHBoYSA8LSBhbHBoYVshIGlzLm5hKGFscGhhKV0gPD0gMAogICAgYmFkX2JldGEgPC0gYmV0YVshIGlzLm5hKGJldGEpXSA8PSAwCiAgICBpZiAoYW55KGJhZF9hbHBoYSwgYmFkX2JldGEpKSB7CiAgICAgICAgd2FybmluZygiTmFOcyBwcm9kdWNlZCIpCiAgICAgICAgYWxwaGFbYmFkX2FscGhhXSA8LSBOYU4KICAgICAgICBiZXRhW2JhZF9iZXRhXSA8LSBOYU4KICAgIH0KICAgIHggPC0gaWZlbHNlKHggPD0gYWxwaGEsIGFscGhhLCB4KQogICAgbHAgPC0gYmV0YSAqIChsb2coYWxwaGEpIC0gbG9nKHgpKQogICAgMSAtIGV4cChscCkKfQpgYGAKCgojIyMgVXNpbmcgQyBDb2RlCgpJbiBDIGNvZGUgY29tcGFyaXNvbnMgd2lsbCByZXR1cm4gMSBmb3IgYEZhbHNlYCBhbmQgMCBmb3IgYFRSVUVgLgoKVGhlIElFRUUgZmxvYXRpbmcgcG9pbnQgc3RhbmRhcmQgc3BlY2lmaWVzIHRoYXQgY29tcGFyaXNvbnMgd2l0aCBgTmFOYAp2YWx1ZXMgYmVoYXZlIGxpa2UgdGhpcyAoYHhgIGNhbiBiZSBhbnl0aGluZywgaW5jbHVkaW5nIGBOYU5gIGFuZAokXHBtXGluZnR5JCkKPCEtLSBodHRwczovL2VuLndpa2lwZWRpYS5vcmcvd2lraS9OYU4gLS0+Cgp8YE5hTiA+PSB4YHxgTmFOIDw9IHhgfGBOYU4gPiB4YHxgTmFOIDwgeGB8YE5hTiA9PSB4YHxgTmFOICE9IHhgfAp8Oi0tLS0tLS0tLTp8Oi0tLS0tLS0tLTp8Oi0tLS0tLS0tOnw6LS0tLS0tLTp8Oi0tLS0tLS0tLTp8Oi0tLS0tLS0tLTp8CnxgRkFMU0VgICAgIHxgRkFMU0VgICAgfGBGQUxTRWAgIHxgRkFMU0VgICB8YEZBTFNFYCAgIHxgVFJVRWAgICAgfAoKCmBgYGMKI2luY2x1ZGUgPFJpbnRlcm5hbHMuaD4KCnZvaWQgcHBhcmV0byhkb3VibGUgKngsIGRvdWJsZSAqYWxwaGEsIGRvdWJsZSAqYmV0YSwgZG91YmxlICpwLCBkb3VibGUgKmRuKQp7CiAgICBSX3hsZW5fdCBuID0gKFJfeGxlbl90KSAqZG47CiAgICBpbnQgbmFfZ2VuID0gRkFMU0U7CgogICAgZm9yIChSX3hsZW5fdCBpID0gMDsgaSA8IG47IGkrKykgewoJaWYgKGFscGhhW2ldIDw9IDAgfHwgYmV0YVtpXSA8PSAwKSB7CgkgICAgcFtpXSA9IFJfTmFOOwoJICAgIG5hX2dlbiA9IFRSVUU7Cgl9CgllbHNlIGlmICh4W2ldIDw9IGFscGhhW2ldKQoJICAgIHBbaV0gPSAwOwoJZWxzZSB7CgkgICAgZG91YmxlIGxwID0gYmV0YVtpXSAqIChsb2coYWxwaGFbaV0pIC0gbG9nKHhbaV0pKTsKCSAgICBwW2ldID0gMSAtIGV4cChscCk7Cgl9CiAgICB9CiAgICBpZiAobmFfZ2VuKQoJd2FybmluZygiTmFOcyBwcm9kdWNlZCIpOwp9CmBgYApgYGByCmR5bi5sb2FkKCJwcGFyZXRvLnNvIikKCnBwYXJldG8gPC0gZnVuY3Rpb24oeCwgYWxwaGEsIGJldGEpIHsKICAgIG4gPC0gbWF4KGxlbmd0aCh4KSwgbGVuZ3RoKGFscGhhKSwgbGVuZ3RoKGJldGEpKQogICAgeCA8LSByZXBfbGVuKGFzLmRvdWJsZSh4KSwgbikKICAgIGFscGhhIDwtIHJlcF9sZW4oYXMuZG91YmxlKGFscGhhKSwgbikKICAgIGJldGEgPC0gcmVwX2xlbihhcy5kb3VibGUoYmV0YSksIG4pCiAgICAuQygicHBhcmV0byIsIHgsIGFscGhhLCBiZXRhLCByZXN1bHQgPSBkb3VibGUobiksCiAgICAgICBhcy5kb3VibGUobiksIE5BT0sgPSBUUlVFKSRyZXN1bHQKfQpgYGAK