# simple if-else statement > if ( x < 0 ) print("Negative") else print("Non-negative") [1] "Non-negative" > # if statement may be used inside other constructions > y <- if ( x < 0 ) -1 else 0 > y [1] 0 > # define x > x <- 7 > # simple if statement > if ( x < 0 ) print("Negative") > # simple if-else statement > if ( x < 0 ) print("Negative") else print("Non-negative") [1] "Non-negative" > # if statement may be used inside other constructions > y <- if ( x < 0 ) -1 else 0 > y [1] 0"> # simple if-else statement > if ( x < 0 ) print("Negative") else print("Non-negative") [1] "Non-negative" > # if statement may be used inside other constructions > y <- if ( x < 0 ) -1 else 0 > y [1] 0 > # define x > x <- 7 > # simple if statement > if ( x < 0 ) print("Negative") > # simple if-else statement > if ( x < 0 ) print("Negative") else print("Non-negative") [1] "Non-negative" > # if statement may be used inside other constructions > y <- if ( x < 0 ) -1 else 0 > y [1] 0">

Presentation is loading. Please wait.

Presentation is loading. Please wait.

Programming in R coding, debugging and optimizing Katia Oleinik Scientific Computing and Visualization Boston University

Similar presentations


Presentation on theme: "Programming in R coding, debugging and optimizing Katia Oleinik Scientific Computing and Visualization Boston University"— Presentation transcript:

1 Programming in R coding, debugging and optimizing Katia Oleinik koleinik@bu.edu Scientific Computing and Visualization Boston University http://www.bu.edu/tech/research/training/tutorials/list/

2 if Comparison operators: ==equal !=not equal > (<)greater (less) >= (<=)greater (less) or equal 2 if (condition) { command(s) } else { command(s) } Logical operators: &and |or !not

3 if 3 > # define x > x <- 7 > # simple if statement > if ( x < 0 ) print("Negative") > # simple if-else statement > if ( x < 0 ) print("Negative") else print("Non-negative") [1] "Non-negative" > # if statement may be used inside other constructions > y <- if ( x < 0 ) -1 else 0 > y [1] 0 > # define x > x <- 7 > # simple if statement > if ( x < 0 ) print("Negative") > # simple if-else statement > if ( x < 0 ) print("Negative") else print("Non-negative") [1] "Non-negative" > # if statement may be used inside other constructions > y <- if ( x < 0 ) -1 else 0 > y [1] 0

4 if 4 > # multiline if - else statement > if ( x < 0 ) { + x <- x+1 + print("Add one") + } else if ( x == 0 ) { + print("Zero") + } else { + print("Positive value") + } [1] positive > # multiline if - else statement > if ( x < 0 ) { + x <- x+1 + print("Add one") + } else if ( x == 0 ) { + print("Zero") + } else { + print("Positive value") + } [1] positive Note: For multiline if-statements braces are necessary even for single statement bodies. The left and right braces must be on the same line with else keyword (in interactive session).

5 ifelse 5 > # ifelse statement > y <- ifelse ( x < 0, -1, 0 ) > # nested ifelse statement > y 0, 1, 0) ) > # ifelse statement > y <- ifelse ( x < 0, -1, 0 ) > # nested ifelse statement > y 0, 1, 0) ) ifelse (test_condition, true_value, false_value)

6 ifelse 6 > # ifelse statement on a vector > digits <- 0 : 9 > ifelse( digits > 4, 1, 0 ) [1] 0 0 0 0 0 1 1 1 1 1 > # ifelse statement on a vector > digits <- 0 : 9 > ifelse( digits > 4, 1, 0 ) [1] 0 0 0 0 0 1 1 1 1 1 Best of all – ifelse statement operates on vectors!

7 ifelse 7 Exercise: define a random vector ranging from -10 to 10: x<- as.integer( runif( 10, -10, 10 ) ) create vector y, such that its elements equal to absolute values of x Note : normally, you would use abs() function to achieve this result

8 switch 8 > # simple switch statement > x <- 3 > switch( x, 2, 4, 6, 8 ) [1] 6 > switch( x, 2, 4 ) # returns NULL since there are only 2 elements in the list > # simple switch statement > x <- 3 > switch( x, 2, 4, 6, 8 ) [1] 6 > switch( x, 2, 4 ) # returns NULL since there are only 2 elements in the list switch (statement, list)

9 switch 9 > # switch statement with named list > day <- "Tue" > switch( day, Sun = 0, Mon = 1, Tue = 2, Wed = 3, … ) [1] 2 > # switch statement with a “default” value > food <- "meet" > switch( food, banana="fruit", carrot="veggie", "neither") [1] "neither" > # switch statement with named list > day <- "Tue" > switch( day, Sun = 0, Mon = 1, Tue = 2, Wed = 3, … ) [1] 2 > # switch statement with a “default” value > food <- "meet" > switch( food, banana="fruit", carrot="veggie", "neither") [1] "neither" switch (statement, name1 = str1, name2 = str2, … )

10 loops 10 There are 3 statements that provide explicit looping: - repeat - for - while Built – in constructs to control the looping: - next - break Note: Use explicit loops only if it is absolutely necessary. R has other functions for implicit looping, which will run much faster: apply(), sapply(), tapply(), and lapply().

11 repeat 11 repeat { } statement causes repeated evaluation of the body until break is requested. Be careful – infinite loop may occur! > # find the greatest odd divisor of an integer > x <- 84 > repeat{ + print(x) + if( x%2 != 0) break + x <- x/2 + } [1] 84 [1] 42 [1] 21 > > # find the greatest odd divisor of an integer > x <- 84 > repeat{ + print(x) + if( x%2 != 0) break + x <- x/2 + } [1] 84 [1] 42 [1] 21 >

12 for 12 > # print all words in a vector > names <- c(“Sam”, “Paul”, “Michael”) > > for( j in names ){ + print(paste(“My name is”, j)) + } [1] “My name is Sam” [1] “My name is Paul” [1] “My name is Michael” > > # print all words in a vector > names <- c(“Sam”, “Paul”, “Michael”) > > for( j in names ){ + print(paste(“My name is”, j)) + } [1] “My name is Sam” [1] “My name is Paul” [1] “My name is Michael” > for (object in sequence) { command(s) }

13 for 13 for (object in sequence) { command(s) if (…) next # return to the start of the loop if (…) break # exit from (innermost) loop }

14 while 14 > # find the largest odd divisor of a given number > x <- 84 > while (x % 2 == 0){ + x <- x/2 + } > x [1] 21 > > # find the largest odd divisor of a given number > x <- 84 > while (x % 2 == 0){ + x <- x/2 + } > x [1] 21 > while (test_statement) { command(s) }

15 loops 15 Exercise: Using either loop statement print all the numbers from 0 to 30 divisible by 7. Use % - modular arithmetic operator to check divisibility.

16 function 16 myFun <- function (ARG, OPT_ARGs ){ statement(s) } ARG: vector, matrix, list or a data frame OPT_ARGs: optional arguments Functions are a powerful R elements. They allows you to expand on existing functions by writing your own custom functions.

17 function 17 myFun <- function (ARG, OPT_ARGs ){ statement(s) } Naming: Variable naming rules apply. Avoid usage of existing (built-in) functions Arguments: Argument list can be empty. Some (or all) of the arguments can have a default value ( arg1 = TRUE ) The argument ‘…’ can be used to allow one function to pass on argument settings to another function. Return value: The value returned by the function is the last value computed, but you can also use return() statement.

18 function 18 > # simple function: calculate (x+1) 2 > f1 <- function (x) { + x^2 + 2*x + 1 + } > f1(3) [1] 16 > > # simple function: calculate (x+1) 2 > f1 <- function (x) { + x^2 + 2*x + 1 + }+ } > f1(3) [1] 16 >

19 function 19 > # function with default arguments: calculate (x+a) 2 > f2 <- function (x, a=1) { + x^2 + 2*x*a + a^2 + } > f2(3) [1] 16 > f2(3,2) [1] 25 > > # arguments can be called using their names ( and out of order!!!) > f2( a = 2, x = 1) [1] 9 > # function with default arguments: calculate (x+a) 2 > f2 <- function (x, a=1) { + x^2 + 2*x*a + a^2 + }+ } > f2(3) [1] 16 > f2(3,2) [1] 25 > > # arguments can be called using their names ( and out of order!!!) > f2( a = 2, x = 1) [1] 9

20 function 20 > # Some optional arguments can be specified as ‘…’ to pass them to another function > f3 <- function (x, … ) { + plot (x, … ) + } > > # print all the words together in one sentence > f3 <- function ( … ) { + print(paste ( … ) ) + } > f3("Hello", " R! ") [1] "Hello R! " > # Some optional arguments can be specified as ‘…’ to pass them to another function > f3 <- function (x, … ) { + plot (x, … ) + }+ } > > # print all the words together in one sentence > f3 <- function ( … ) { + print(paste ( … ) ) + }+ } > f3("Hello", " R! ") [1] "Hello R! "

21 function 21 > # define a function > f <- function (x) { + cat ("u=", u, "\n") # this variable is local ! + u<-u+1 # this will not affect the value of variable outside f() + cat ("u=", u, "\n") + } > > u <- 2 # define a variable > f(u) #execute the function u= 2 u= 3 > > cat ("u=", u, "\n") # print the value of the variable u= 2 > # define a function > f <- function (x) { + cat ("u=", u, "\n") # this variable is local ! + u<-u+1 # this will not affect the value of variable outside f() + cat ("u=", u, "\n") + }+ } > > u <- 2 # define a variable > f(u) #execute the function u= 2 u= 3 > > cat ("u=", u, "\n") # print the value of the variable u= 2 Local and global variables: All variables appearing inside a function are treated as local, except their initial value will be of that of the global (if such variable exists).

22 function 22 > # define a function > f <- function (x) { + cat ("u=", u, "\n") # this variable is local ! + u <<- u+1 # this WILL affect the value of variable outside f() + cat ("u=", u, "\n") + } > > u <- 2 # define a variable > f(u) #execute the function u= 2 u= 3 > > cat ("u=", u, "\n") # print the value of the variable u= 3 > > # define a function > f <- function (x) { + cat ("u=", u, "\n") # this variable is local ! + u <<- u+1 # this WILL affect the value of variable outside f() + cat ("u=", u, "\n") + }+ } > > u <- 2 # define a variable > f(u) #execute the function u= 2 u= 3 > > cat ("u=", u, "\n") # print the value of the variable u= 3 > Local and global variables: If you want to access the global variable – you can use the super- assignment operator <<-. You should avoid doing this!!!

23 function 23 > # define a function > f <- function (x) { + x <- 2 + print (x) + } > > x <- 3 # assign value to x > y <- f(x) # call the function [1] 2 > > print(x) # print value of x [1] 3 > > # define a function > f <- function (x) { + x <- 2 + print (x) + }+ } > > x <- 3 # assign value to x > y <- f(x) # call the function [1] 2 > > print(x) # print value of x [1] 3 > Call vector variables: Functions do not change their arguments.

24 function 24 > # define a function > f <- function (x) { + x <- 2 + print (x) + } > > x <- 3 # assign value to x > x <- f(x) # call the function [1] 2 > > print(x) # print value of x [1] 2 > > # define a function > f <- function (x) { + x <- 2 + print (x) + }+ } > > x <- 3 # assign value to x > x <- f(x) # call the function [1] 2 > > print(x) # print value of x [1] 2 > Call vector variables: If you want to change the value of the function’s argument, reassign the return value to the argument.

25 function 25 > # get the source code of lm() function > lm function (formula, data, subset, weights, na.action, method = "qr", model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE, contrasts = NULL, offset,...) { ret.x <- x ret.y <- y cl <- match.call()... z } > > # get the source code of lm() function > lm function (formula, data, subset, weights, na.action, method = "qr", model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE, contrasts = NULL, offset,...) { ret.x <- x ret.y <- y cl <- match.call()... z } > Finding the source code: You can find the source code for any R function by printing its name without parentheses.

26 function 26 > # get the source code of mean() function > mean function (x,...) UseMethod("mean") > > # get the source code of mean() function > mean function (x,...) UseMethod("mean") > Finding the source code: For generic functions there are many methods depending on the type of the argument.

27 function 27 > # get the source code of mean() function > methods("mean") [1] mean.Date mean.POSIXct mean.POSIXlt mean.data.frame [5] mean.default mean.difftime > > # get source code > mean.default function (x, trim = 0, na.rm = FALSE,...) { if (!is.numeric(x) && !is.complex(x) && !is.logical(x)) {... z } > # get the source code of mean() function > methods("mean") [1] mean.Date mean.POSIXct mean.POSIXlt mean.data.frame [5] mean.default mean.difftime > > # get source code > mean.default function (x, trim = 0, na.rm = FALSE,...) { if (!is.numeric(x) && !is.complex(x) && !is.logical(x)) {... z } Finding the source code: You can first explore different methods and then chose the one you need.

28 apply 28 apply (OBJECT, MARGIN, FUNCTION, ARGs ) object: vector, matrix or a data frame margin: 1 – rows, 2 – columns, c(1,2) – both function: function to apply args: possible arguments Description: Returns a vector or array or list of values obtained by applying a function to margins of an array or matrix

29 apply 29 > # create 3x4 matrix > x <- matrix( 1:12, nrow = 3, ncol = 4) > x [,1] [,2] [,3] [,4] [1,] 1 4 7 10 [2,] 2 5 8 11 [3,] 3 6 9 12 > > # create 3x4 matrix > x <- matrix( 1:12, nrow = 3, ncol = 4) > x [,1] [,2] [,3] [,4] [1,] 1 4 7 10 [2,] 2 5 8 11 [3,] 3 6 9 12 > Example: Create matrix and apply different functions to its rows and columns.

30 apply 30 > # create 3x4 matrix > x <- matrix( 1:12, nrow = 3, ncol = 4) > x [,1] [,2] [,3] [,4] [1,] 1 4 7 10 [2,] 2 5 8 11 [3,] 3 6 9 12 > # find median of each row > apply (x, 1, median) [1] 5.5 6.5 7.5 > > # create 3x4 matrix > x <- matrix( 1:12, nrow = 3, ncol = 4) > x [,1] [,2] [,3] [,4] [1,] 1 4 7 10 [2,] 2 5 8 11 [3,] 3 6 9 12 > # find median of each row > apply (x, 1, median) [1] 5.5 6.5 7.5 > Example: Create matrix and apply different functions to its rows and columns.

31 apply 31 > # create 3x4 matrix > x <- matrix( 1:12, nrow = 3, ncol = 4) > x [,1] [,2] [,3] [,4] [1,] 1 4 7 10 [2,] 2 5 8 11 [3,] 3 6 9 12 > # find mean of each column > apply (x, 2, mean) [1] 2 5 8 11 > > # create 3x4 matrix > x <- matrix( 1:12, nrow = 3, ncol = 4) > x [,1] [,2] [,3] [,4] [1,] 1 4 7 10 [2,] 2 5 8 11 [3,] 3 6 9 12 > # find mean of each column > apply (x, 2, mean) [1] 2 5 8 11 > Example: Create matrix and apply different functions to its rows and columns.

32 apply 32 > # create 3x4 matrix > x <- matrix( 1:12, nrow = 3, ncol = 4) > x [,1] [,2] [,3] [,4] [1,] 1 4 7 10 [2,] 2 5 8 11 [3,] 3 6 9 12 > # create a new matrix with values 0 or 1 for even and odd elements of x > apply (x, c(1,2), function (x) x%2) [,1] [,2] [,3] [,4] [1,] 1 0 1 0 [2,] 0 1 0 1 [3,] 1 0 1 0 > > # create 3x4 matrix > x <- matrix( 1:12, nrow = 3, ncol = 4) > x [,1] [,2] [,3] [,4] [1,] 1 4 7 10 [2,] 2 5 8 11 [3,] 3 6 9 12 > # create a new matrix with values 0 or 1 for even and odd elements of x > apply (x, c(1,2), function (x) x%2) [,1] [,2] [,3] [,4] [1,] 1 0 1 0 [2,] 0 1 0 1 [3,] 1 0 1 0 > Example: Create matrix and apply different functions to its rows and columns.

33 lapply 33 > # create a list > x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE)) > # compute the list mean for each list element > lapply (x, mean) $a [1] 5.5 $beta [1] 4.535125 $logic [1] 0.3333333 > > # create a list > x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE)) > # compute the list mean for each list element > lapply (x, mean) $a [1] 5.5 $beta [1] 4.535125 $logic [1] 0.3333333 > l lapply() function returns a list: lapply(X, FUN,...)

34 sapply 34 > # create a list > x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE)) > # compute the list mean for each list element > sapply (x, mean) a beta logic 5.5000000 4.5351252 0.3333333 > > # create a list > x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE)) > # compute the list mean for each list element > sapply (x, mean) a beta logic 5.5000000 4.5351252 0.3333333 > l sapply() function returns a vector or a matrix: sapply(X, FUN,..., simplify = TRUE, USE.NAMES = TRUE)

35 code sourcing 35 source ("file", … ) file: file with a source code to load (usually with extension.r ) echo: if TRUE, each expression is printed after parsing, before evaluation.

36 code sourcing 36 katana:~ % emacs foo_source.r & # dummy function foo <- function(x){ x+1 } # dummy function foo <- function(x){ x+1 } > # load foo.r source file > source ("foo_source.r") > # create a vector > x <- c(3,5,7) > # call function > foo(x) [1] 4 6 8 > # load foo.r source file > source ("foo_source.r") > # create a vector > x <- c(3,5,7) > # call function > foo(x) [1] 4 6 8 Linux prompt Text editor R session

37 code sourcing 37 > # load foo.r source file > source ("foo_source.r", echo = TRUE) > # dummy function > foo <- function(x){ + x+1; + } > # create a vector > x <- c(3,5,7) > # call function > foo(x) [1] 4 6 8 > # load foo.r source file > source ("foo_source.r", echo = TRUE) > # dummy function > foo <- function(x){ + x+1; + } > # create a vector > x <- c(3,5,7) > # call function > foo(x) [1] 4 6 8

38 code sourcing 38 Exercise: - write a function that computes a logarithm of inverse of a number log(1/x) - save it in the file with.r extension - load it into your workspace - execute it - try execute it with input vector ( 2, 1, 0, -1 ).

39 debugging 39 R package includes debugging tools. cat () & print () – print out the values browser () – pause the code execution and “browse” the code debug (FUN) – execute function line by line undebug (FUN) – stop debugging the function

40 debugging 40 # dummy function inv_log <- function(x){ y <- 1/x browser() y <- log(y) } # dummy function inv_log <- function(x){ y <- 1/x browser() y <- log(y) } > # load foo.r source file > source ("inv_log.r", echo = TRUE) > # dummy function > inv_log <- function(x){ + y<-1/x; + browser(); + y<-log(y); + } > inv_log (x) # call function Called from: inv_log(x) Browse[1]> y # check the values of local variables [1] 0.3333333 0.5000000 1.0000000 Inf -1.0000000 > # load foo.r source file > source ("inv_log.r", echo = TRUE) > # dummy function > inv_log <- function(x){ + y<-1/x; + browser(); + y<-log(y); + } > inv_log (x) # call function Called from: inv_log(x) Browse[1]> y # check the values of local variables [1] 0.3333333 0.5000000 1.0000000 Inf -1.0000000 inv_log.r

41 debugging 41 Go to the next statement if the function is being debugged. Continue execution if the browser was invoked. c or cont Continue execution without single stepping. n Execute the next statement in the function. This works from the browser as well. where Show the call stack. Q Halt execution and jump to the top-level immediately. To view the value of a variable whose name matches one of these commands, use the print() function, e.g. print(n).

42 debugging 42 # dummy function inv_log <- function(x){ y <- 1/x browser() y <- log(y) } # dummy function inv_log <- function(x){ y <- 1/x browser() y <- log(y) } > # load foo.r source file > source ("inv_log.r", echo = TRUE) > # dummy function > inv_log <- function(x){ + y<-1/x; + browser(); + y<-log(y); + } > inv_log (x) # call function Called from: inv_log(x) Browse[1]> y [1] 0.3333333 0.5000000 1.0000000 Inf -1.0000000 Browse[1]> n debug: y <- log(y) Browse[2]> Warning message: In log(y) : NaNs produced > > # load foo.r source file > source ("inv_log.r", echo = TRUE) > # dummy function > inv_log <- function(x){ + y<-1/x; + browser(); + y<-log(y); + } > inv_log (x) # call function Called from: inv_log(x) Browse[1]> y [1] 0.3333333 0.5000000 1.0000000 Inf -1.0000000 Browse[1]> n debug: y <- log(y) Browse[2]> Warning message: In log(y) : NaNs produced > inv_log.r

43 debugging 43 # dummy function inv_log <- function(x){ y <- 1/x y <- log(y) } # dummy function inv_log <- function(x){ y <- 1/x y <- log(y) } > # load foo.r source file > source ("inv_log.r", echo = TRUE) > # dummy function > inv_log <- function(x){ + y<-1/x; + y<-log(y); + } > debug(inv_log) # debug mode > inv_log (x) # call function Called from: inv_log(x) debugging in: inv_log(x) debug: { y <- 1/x y <- log(y) } Browse[2]>... > undebug(inv_log) # exit debugging mode > # load foo.r source file > source ("inv_log.r", echo = TRUE) > # dummy function > inv_log <- function(x){ + y<-1/x; + y<-log(y); + } > debug(inv_log) # debug mode > inv_log (x) # call function Called from: inv_log(x) debugging in: inv_log(x) debug: { y <- 1/x y <- log(y) } Browse[2]>... > undebug(inv_log) # exit debugging mode inv_log.r

44 timing 44 Use system.time() functions to measure the time of execution. > # make a function > g <- function(x) { + y = vector(length=x) + for (i in 1:x) y[i]=i/(i+1) + y + } > # make a function > g <- function(x) { + y = vector(length=x) + for (i in 1:x) y[i]=i/(i+1) + y + }

45 timing 45 Use system.time() functions to measure the time of execution. > # make a function > g <- function(x) { + y = vector(length=x) + for (i in 1:x) y[i]=i/(i+1) + y + } > # execute the function, measuring the time of the execution > system.time( g(100000) ) user system elapsed 0.107 0.002 0.109 > # make a function > g <- function(x) { + y = vector(length=x) + for (i in 1:x) y[i]=i/(i+1) + y + } > # execute the function, measuring the time of the execution > system.time( g(100000) ) user system elapsed 0.107 0.002 0.109

46 optimization 46 How to speed up the code?

47 optimization 47 How to speed up the code? Use vectors !

48 optimization 48 How to speed up the code? Use vectors ! > # using vectors > x <- (1:100000) > g2 <- function(x) { + x/(x+1) + } > > # using vectors > x <- (1:100000) > g2 <- function(x) { + x/(x+1) + } > > # using loops > g1 <- function(x) { + y = vector(length=x) + for (i in 1:x) y[i]=i/(i+1) + y + } > # using loops > g1 <- function(x) { + y = vector(length=x) + for (i in 1:x) y[i]=i/(i+1) + y + }

49 optimization 49 How to speed up the code? Use vectors ! > # using vectors > x <- (1:100000) > g2 <- function(x) { + x/(x+1) + } > # execute the function > system.time( g2(x) ) user system elapsed 0.002 0.000 0.003 > # using vectors > x <- (1:100000) > g2 <- function(x) { + x/(x+1) + } > # execute the function > system.time( g2(x) ) user system elapsed 0.002 0.000 0.003 > # using loops > g1 <- function(x) { + y = vector(length=x) + for (i in 1:x) y[i]=i/(i+1) + y + } > # execute the function > system.time( g1(100000) ) user system elapsed 0.107 0.002 0.109 > # using loops > g1 <- function(x) { + y = vector(length=x) + for (i in 1:x) y[i]=i/(i+1) + y + } > # execute the function > system.time( g1(100000) ) user system elapsed 0.107 0.002 0.109

50 optimization 50 How to speed up the code? Avoid dynamically expanding arrays

51 optimization 51 How to speed up the code? Avoid dynamically expanding arrays > vec2 <- vector( + mode=“numeric”,length=100000) > vec2 <- vector( + mode=“numeric”,length=100000) > vec1<-NULL

52 optimization 52 How to speed up the code? Avoid dynamically expanding arrays > vec2 <- vector( + mode=“numeric”,length=100000) > # execute the command > system.time( + for(i in 1:100000) + vec2[i] <- mean(1:100)) user system elapsed 2.324 0.063 2.388 > vec2 <- vector( + mode=“numeric”,length=100000) > # execute the command > system.time( + for(i in 1:100000) + vec2[i] <- mean(1:100)) user system elapsed 2.324 0.063 2.388 > vec1<-NULL > # execute the command > system.time( + for(i in 1:100000) + vec1 <- c(vec1,mean(1:100))) user system elapsed 58.181 0.193 58.417 > vec1<-NULL > # execute the command > system.time( + for(i in 1:100000) + vec1 <- c(vec1,mean(1:100))) user system elapsed 58.181 0.193 58.417

53 optimization 53 How to speed up the code? Avoid dynamically expanding arrays > f2<-function(x){ + vec2 <- vector( + mode="numeric",length=100000) + for(i in 1:100000) + vec2[i] <- mean(1:10) + } > # execute the command > system.time( f2(0) ) user system elapsed 2.096 0.067 2.163 > f2<-function(x){ + vec2 <- vector( + mode="numeric",length=100000) + for(i in 1:100000) + vec2[i] <- mean(1:10) + } > # execute the command > system.time( f2(0) ) user system elapsed 2.096 0.067 2.163 > f1<-function(x){ + vec1 <- NULL + for(i in 1:100000) + vec1 <- c(vec1,mean(1:10)) + } > # execute the command > system.time( f1(0) ) user system elapsed 57.035 0.209 57.280 > f1<-function(x){ + vec1 <- NULL + for(i in 1:100000) + vec1 <- c(vec1,mean(1:10)) + } > # execute the command > system.time( f1(0) ) user system elapsed 57.035 0.209 57.280

54 optimization 54 How to speed up the code? Use optimized R-functions, i.e. rowSums(), rowMeans(), table(), etc. In some simple cases – it is worth it to write your own!

55 optimization 55 How to speed up the code? Use optimized R-functions, i.e. rowSums(), rowMeans(), table(), etc. In some simple cases – it is worth it to write your own! > matx <- matrix + (rnorm(1000000),100000,10) > # execute the command > system.time(rowMeans(matx)) user system elapsed 0.013 0.000 0.014 > matx <- matrix + (rnorm(1000000),100000,10) > # execute the command > system.time(rowMeans(matx)) user system elapsed 0.013 0.000 0.014 > matx <- matrix + (rnorm(1000000),100000,10) > # execute the command > system.time(apply(matx,1,mean)) user system elapsed 2.686 0.057 2.748 > matx <- matrix + (rnorm(1000000),100000,10) > # execute the command > system.time(apply(matx,1,mean)) user system elapsed 2.686 0.057 2.748

56 optimization 56 How to speed up the code? Use optimized R-functions, i.e. rowSums(), rowMeans(), table(), etc. In some simple cases – it is worth it to write your own! > system.time( + for(i in 1:100000) + sum(1:100) / length(1:100) ) user system elapsed 0.485 0.013 0.498 > system.time( + for(i in 1:100000) + sum(1:100) / length(1:100) ) user system elapsed 0.485 0.013 0.498 > system.time( + for(i in 1:100000)mean(1:100)) user system elapsed 1.862 0.052 1.914 > system.time( + for(i in 1:100000)mean(1:100)) user system elapsed 1.862 0.052 1.914

57 optimization 57 How to speed up the code? Use vectors Avoid dynamically expanding arrays Use optimized R-functions, i.e. rowSums(), rowMeans(), table(), etc. In some simple cases – it is worth it to write your own implementation!

58 optimization 58 How to speed up the code? Use vectors Avoid dynamically expanding arrays Use optimized R-functions, i.e. rowSums(), rowMeans(), table(), etc. In some simple cases – it is worth it to write your own implementation! Use R - compiler or C/C++ code

59 compiling 59 Use library(compiler) : cmpfun() - compile existing function cmpfile() - compile source file loadcmp() - load compiled source file

60 compiling 60 # dummy function fsum <- function(x){ s <- 0 for ( n in x) s <- s+n s } # dummy function fsum <- function(x){ s <- 0 for ( n in x) s <- s+n s }

61 compiling 61 # dummy function fsum <- function(x){ s <- 0 for ( n in x) s <- s+n s } # dummy function fsum <- function(x){ s <- 0 for ( n in x) s <- s+n s } > # load compiler library > library (compiler) > > # load compiler library > library (compiler) >

62 compiling 62 # dummy function fsum <- function(x){ s <- 0 for ( n in x) s <- s+n s } # dummy function fsum <- function(x){ s <- 0 for ( n in x) s <- s+n s } > # load compiler library > library (compiler) > # load function from a source file (if necessary) > source ("fsum.r") > > # load compiler library > library (compiler) > # load function from a source file (if necessary) > source ("fsum.r") > fsum.r

63 compiling 63 # dummy function fsum <- function(x){ s <- 0 for ( n in x) s <- s+n s } # dummy function fsum <- function(x){ s <- 0 for ( n in x) s <- s+n s } > # load compiler library > library (compiler) > # load function from a source file (if necessary) > source (“fsum.r”) > # load function from a source file (if necessary) > fsumcomp <- cmpfun(fsum) > # load compiler library > library (compiler) > # load function from a source file (if necessary) > source (“fsum.r”) > # load function from a source file (if necessary) > fsumcomp <- cmpfun(fsum) fsum.r

64 compiling 64 > # run non-compiled version > system.time(fsum(1:100000)) user system elapsed 0.071 0.000 0.071 > # run non-compiled version > system.time(fsum(1:100000)) user system elapsed 0.071 0.000 0.071 Using compiled functions decreases the time of computation. > # run compiled version > system.time(fsumcomp(1:100000)) user system elapsed 0.025 0.001 0.026 > # run compiled version > system.time(fsumcomp(1:100000)) user system elapsed 0.025 0.001 0.026

65 compiling 65 A source file can be compiled with cmpfile(). The resulting file has to then be loaded with loadcmp(). > # compile source file > cmpfile("fsum.r") saving to file "fsum.Rc"... Done > # load compiled source > loadcmp("fsum.Rc") > # compile source file > cmpfile("fsum.r") saving to file "fsum.Rc"... Done > # load compiled source > loadcmp("fsum.Rc")

66 profiling 66 Profiling is a tool, which can be used to find out how much time is spent in each function. Code profiling can give a way to locate those parts of a program which will benefit most from optimization. Rprof() – turn profiling on Rprof(NULL) – turn profiling off summaryRprof("Rprof.out") – Summarize the output of the Rprof() function to show the amount of time used by different R functions.

67 profiling 67 # slow version of BM function bmslow <- function (x, steps){ BM <- matrix(x, nrow=length(x)) for (i in 1:steps){ # sample from normal distribution z <- rnorm(2) # attach a new column to the output matrix BM <- cbind (BM,z) } return(BM) } # slow version of BM function bmslow <- function (x, steps){ BM <- matrix(x, nrow=length(x)) for (i in 1:steps){ # sample from normal distribution z <- rnorm(2) # attach a new column to the output matrix BM <- cbind (BM,z) } return(BM) } Brownian Motion simulation. Input: x - initial position, steps - number of steps bm.R

68 profiling 68 # a faster version of BM function bm <- function (x, steps){ # allocate enough space to hold the output matrix BM <- matrix(nrow = length(x), ncol=steps+1) # add initial point to the matrix BM[,1] = x # sample from normal distribution (delX, delY) z <- matrix(rnorm(steps*length(x)),nrow=length(x)) for (i in 1:steps) BM[,i+1] <- BM[,i] + z[,i] return(BM) } # a faster version of BM function bm <- function (x, steps){ # allocate enough space to hold the output matrix BM <- matrix(nrow = length(x), ncol=steps+1) # add initial point to the matrix BM[,1] = x # sample from normal distribution (delX, delY) z <- matrix(rnorm(steps*length(x)),nrow=length(x)) for (i in 1:steps) BM[,i+1] <- BM[,i] + z[,i] return(BM) } Brownian Motion simulation. Input: x - initial position, steps - number of steps bm.R

69 profiling 69 > # load compiler library (if you have not done it before) > require (compiler) > # compile function from a source file > cmpfun ("bm.R") > # load function from a compiled file > loadcmp ("bm.Rc") > # load compiler library (if you have not done it before) > require (compiler) > # compile function from a source file > cmpfun ("bm.R") > # load function from a compiled file > loadcmp ("bm.Rc")

70 profiling 70 > # simulate 100 steps > BMsmall <- bm(c(0,0),100) > # plot the result > plot(BMsmall[1,],BMsmall[2,],…) > # simulate 100 steps > BMsmall <- bm(c(0,0),100) > # plot the result > plot(BMsmall[1,],BMsmall[2,],…)

71 profiling 71 > # start profiling slow function > Rprof("bmslow.out") # optional – provide output file name > # run function > BMS <- bmslow(c(0,0), 100000) > # finish profiling > Rprof(NULL) > # start profiling slow function > Rprof("bmslow.out") # optional – provide output file name > # run function > BMS <- bmslow(c(0,0), 100000) > # finish profiling > Rprof(NULL)

72 profiling 72 > # start profiling faster function > Rprof("bm.out") # optional – provide output file name > # run function > BM <- bm(c(0,0), 100000) > # finish profiling > Rprof(NULL) > # start profiling faster function > Rprof("bm.out") # optional – provide output file name > # run function > BM <- bm(c(0,0), 100000) > # finish profiling > Rprof(NULL)

73 profiling 73 > summaryRprof("bmslow.out") $by.self self.time self.pct total.time total.pct "cbind" 400.52 99.39 400.52 99.39 "rnorm" 1.70 0.42 1.70 0.42 "bmslow" 0.74 0.18 402.96 100.00 … > summaryRprof("bmslow.out") $by.self self.time self.pct total.time total.pct "cbind" 400.52 99.39 400.52 99.39 "rnorm" 1.70 0.42 1.70 0.42 "bmslow" 0.74 0.18 402.96 100.00 … > summaryRprof("bm.out") $by.self self.time self.pct total.time total.pct "bm" 0.62 75.61 0.82 100.00 "rnorm" 0.08 9.76 0.08 9.76 "matrix" 0.04 4.88 0.12 14.63 "+" 0.04 4.88 0.04 4.88 ":" 0.04 4.88 0.04 4.88 … > summaryRprof("bm.out") $by.self self.time self.pct total.time total.pct "bm" 0.62 75.61 0.82 100.00 "rnorm" 0.08 9.76 0.08 9.76 "matrix" 0.04 4.88 0.12 14.63 "+" 0.04 4.88 0.04 4.88 ":" 0.04 4.88 0.04 4.88 …

74 74 This tutorial has been made possible by Scientific Computing and Visualization group at Boston University. Katia Oleinik koleinik@bu.edu


Download ppt "Programming in R coding, debugging and optimizing Katia Oleinik Scientific Computing and Visualization Boston University"

Similar presentations


Ads by Google