R + Hadoop = big data analytics Antonio Piccolboni Revolution Analytics
mtcars[1:15,] mpg cyldisp hp dratwt qsec vs am gear carb Mazda RX Mazda RX4 Wag Datsun Hornet 4 Drive Hornet Sportabout Valiant Duster Merc 240D Merc Merc Merc 280C Merc 450SE Merc 450SL Merc 450SLC Cadillac Fleetwood
str( apply(mtcars[,1:5], 2, function(x) aggregate(x,list(x),length))) List of 5 $ mpg :'data.frame':25 obs. of 2 variables:..$ Group.1: num [1:25] $ x : int [1:25] $ cyl :'data.frame':3 obs. of 2 variables:..$ Group.1: num [1:3] $ x : int [1:3] $ disp:'data.frame':27 obs. of 2 variables:..$ Group.1: num [1:27] $ x : int [1:27] $ hp :'data.frame':22 obs. of 2 variables:..$ Group.1: num [1:22] $ x : int [1:22] $ drat:'data.frame':22 obs. of 2 variables:..$ Group.1: num [1:22] $ x : int [1:22]
from.dfs( mapreduce( data, map = map.count, reduce = reduce.count, combine = T,...))} count = function(data,...) { map.count = function(dummy,data) { counts = apply(data,2,function(x) aggregate(x,list(x),length)) keyval(names(counts), counts)} reduce.count = function(colname, counts) { counts = do.call(rbind, counts) keyval( colname, list(aggregate(counts$x, list(as.character(counts$Group.1)), sum)))}