20 juillet 2018

Astuces de codage: bonnes pratiques

Pass argument by reference

Pass argument by reference

void byValue 
  (vec x, double newValue) {
  x.fill(newValue);
}

void byRef 
  (vec& x, double newValue) {
  x.fill(newValue);
}

void byRefSafe 
  (const vec& x, double newValue) {
  x.fill(newValue);
}

x <- 1:5 + .1
x %>% byValue(3.14)
x
## [1] 1.1 2.1 3.1 4.1 5.1
x %>% byRef(3.14)
x
## [1] 3.14 3.14 3.14 3.14 3.14
x <- 1:5
x %>% byRef(3.14)
x
## [1] 1 2 3 4 5

Pass argument by reference

mat prodByValue (mat X, mat Y) {
  return X*Y;
}

mat prodByRef (mat& X, mat& Y) {
  return X*Y;
}
X <- matrix(rnorm(10000), 100, 100)
Y <- matrix(rnorm(10000), 100, 100)

tm <- microbenchmark(prodByValue(X, Y), 
                     prodByRef(X, Y))

## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Inversion de matrices

vec invProdCpp (mat A, vec b) {
  return A.i() * b;
}

vec solveCpp (mat A, vec b) {
  return solve(A, b);
}
A <- matrix(rnorm(10000), 100, 100)
b <- rnorm(100)

tm <- microbenchmark(invProdCpp(A, b), 
                     solveCpp(A, b),
                     solve(A) %*% b, 
                     solve(A, b))

## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Declaration and scope

mat covOutOfScope (mat X)
{
  mat y = zeros(X.n_cols, X.n_cols);
  rowvec z = rowvec(X.n_cols);
  
  for (uword i = 0; i < X.n_rows; i++) {
    z = X.row(i);
    y += z.t() * z;
  }
  
  return y / X.n_rows;
}

mat covInScope (mat X) 
{
  mat y = zeros(X.n_cols, X.n_cols);

  for (uword i = 0; i < X.n_rows; i++) {
    rowvec z = X.row(i);
    y += z.t() * z;
  }
  
  return y / X.n_rows;
}

Declaration and scope

X <- matrix(rnorm(10000), 100, 100)

tm <- microbenchmark(covOutOfScope(X),
                     covInScope(X))
autoplot(tm)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Matrix storage of elements: column ordering

mat prodByRowOrdering (mat A, mat B) 
{
  mat C = mat(size(A));
  
  for (uword i = 0; i < A.n_rows; i++)
    for (uword j = 0; j < A.n_cols; j++)
      C(i,j) = A(i,j) * B(i,j);
  
  return C;
}

mat prodByColOrdering (mat A, mat B) 
{
  mat C = mat(size(A));
  
  for (uword j = 0; j < A.n_cols; j++)
    for (uword i = 0; i < A.n_rows; i++)
      C(i,j) = A(i,j) * B(i,j);
  
  return C;
}

Matrix storage of elements: column ordering

A <- matrix(rnorm(10000), 100, 100)
B <- matrix(rnorm(10000), 100, 100)

tm <- microbenchmark(prodByColOrdering(A, B), prodByRowOrdering(A, B))
autoplot(tm)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Code Profiling

In Rstudio it is easy to identify which part of code is worth coding in C++. This operation is called profiling and documentation are available here:

MDN

To quicky begin profiling one can select a part of code and then click on Profile > Profile Selected Lines (shortcut Ctrl+Alt+Shift+P).

The package loaded and used is profvis and similarly as microbenchmark it can be run to establish a comparison of function. It will get the time spent line by line of the code.

Code Profiling - Example

library(profvis)
profvis({
  data1 <- data
  # Four different ways of getting column means
  means <- apply(data1[, names(data1) != "id"], 2, mean)
  means <- colMeans(data1[, names(data1) != "id"])
  means <- lapply(data1[, names(data1) != "id"], mean)
  means <- vapply(data1[, names(data1) != "id"], mean, numeric(1))
})