Package provides pipe-style interface for data.table package. It preserves all data.table features without significant impact on performance. let
and take
functions are simplified interfaces for most common data manipulation tasks. query_if
function translates its arguments one-to-one to [.data.table
method. Additionally there are some conveniences such as automatic data.frame
conversion to data.table
.
take_if(mtcars, am==0)
take(mtcars, am, vs, mpg)
take(mtcars, mean_mpg = mean(mpg), by = am)
take(mtcars, fun = mean, by = am)
take(mtcars, mpg, hp, fun = mean, by = am)
by
argument: take(mtcars, fun = mean)
%>%
to chain several operations: mtcars %>%
let(mpg_hp = mpg/hp) %>%
take(mean(mpg_hp), by = am)
mtcars %>%
let(new_var = 42,
new_var2 = new_var*hp) %>%
head()
let(mtcars, am = NULL) %>% head()
:=
: new_var = "my_var"
old_var = "mpg"
mtcars %>%
let((new_var) := get(old_var)*2) %>%
head()
We will use for demonstartion well-known mtcars
dataset and some examples from dplyr
package.
library(maditr)
##
## To aggregate several columns with one summary: take(mtcars, mpg, hp, fun = mean, by = am)
data(mtcars)
# Newly created variables are available immediately
mtcars %>%
let(
cyl2 = cyl * 2,
cyl4 = cyl2 * 2
) %>% head()
## mpg cyl disp hp drat wt qsec vs am gear carb cyl2 cyl4
## 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 12 24
## 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 12 24
## 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 8 16
## 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 12 24
## 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 16 32
## 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 12 24
# You can also use let() to remove variables and
# modify existing variables
mtcars %>%
let(
mpg = NULL,
disp = disp * 0.0163871 # convert to litres
) %>% head()
## cyl disp hp drat wt qsec vs am gear carb
## 1: 6 2.621936 110 3.90 2.620 16.46 0 1 4 4
## 2: 6 2.621936 110 3.90 2.875 17.02 0 1 4 4
## 3: 4 1.769807 93 3.85 2.320 18.61 1 1 4 1
## 4: 6 4.227872 110 3.08 3.215 19.44 1 0 3 1
## 5: 8 5.899356 175 3.15 3.440 17.02 0 0 3 2
## 6: 6 3.687098 105 2.76 3.460 20.22 1 0 3 1
# window functions are useful for grouped computations
mtcars %>%
let(rank = rank(-mpg, ties.method = "min"),
by = cyl) %>%
head()
## mpg cyl disp hp drat wt qsec vs am gear carb rank
## 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 2
## 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 2
## 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 8
## 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 1
## 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 2
## 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 6
# You can drop variables by setting them to NULL
mtcars %>%
let(cyl = NULL) %>%
head()
## mpg disp hp drat wt qsec vs am gear carb
## 1: 21.0 160 110 3.90 2.620 16.46 0 1 4 4
## 2: 21.0 160 110 3.90 2.875 17.02 0 1 4 4
## 3: 22.8 108 93 3.85 2.320 18.61 1 1 4 1
## 4: 21.4 258 110 3.08 3.215 19.44 1 0 3 1
## 5: 18.7 360 175 3.15 3.440 17.02 0 0 3 2
## 6: 18.1 225 105 2.76 3.460 20.22 1 0 3 1
# keeps all existing variables
mtcars %>%
let(displ_l = disp / 61.0237) %>%
head()
## mpg cyl disp hp drat wt qsec vs am gear carb displ_l
## 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 2.621932
## 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 2.621932
## 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 1.769804
## 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 4.227866
## 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 5.899347
## 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 3.687092
# keeps only the variables you create
mtcars %>%
take(displ_l = disp / 61.0237) %>%
head()
## displ_l
## 1: 2.621932
## 2: 2.621932
## 3: 1.769804
## 4: 4.227866
## 5: 5.899347
## 6: 3.687092
# can refer to both contextual variables and variable names:
var = 100
mtcars %>%
let(cyl = cyl * var) %>%
head()
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1: 21.0 600 160 110 3.90 2.620 16.46 0 1 4 4
## 2: 21.0 600 160 110 3.90 2.875 17.02 0 1 4 4
## 3: 22.8 400 108 93 3.85 2.320 18.61 1 1 4 1
## 4: 21.4 600 258 110 3.08 3.215 19.44 1 0 3 1
## 5: 18.7 800 360 175 3.15 3.440 17.02 0 0 3 2
## 6: 18.1 600 225 105 2.76 3.460 20.22 1 0 3 1
# filter by condition
mtcars %>%
take_if(am==0) %>%
head()
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1: 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 2: 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 3: 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 4: 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 5: 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 6: 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
# filter by compound condition
mtcars %>%
take_if(am==0 & mpg>mean(mpg))
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1: 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 2: 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 3: 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 4: 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
# A 'take' with summary functions applied without 'by' argument returns an aggregated data
mtcars %>%
take(mean = mean(disp), n = .N)
## mean n
## 1: 230.7219 32
# Usually, you'll want to group first
mtcars %>%
take(mean = mean(disp), n = .N, by = am)
## am mean n
## 1: 1 143.5308 13
## 2: 0 290.3789 19
# grouping by multiple variables
mtcars %>%
take(mean = mean(disp), n = .N, by = list(am, vs))
## am vs mean n
## 1: 1 0 206.2167 6
## 2: 1 1 89.8000 7
## 3: 0 1 175.1143 7
## 4: 0 0 357.6167 12
# parametric evaluation:
var = quote(mean(cyl))
take(mtcars, eval(var))
## eval(var)
## 1: 6.1875
# You can group by expressions:
mtcars %>%
take(
fun = mean,
by = list(vsam = vs + am)
)
## vsam mpg cyl disp hp drat wt qsec
## 1: 1 20.28462 5.692308 189.4692 138.46154 3.738462 3.038846 18.04231
## 2: 2 28.37143 4.000000 89.8000 80.57143 4.148571 2.028286 18.70000
## 3: 0 15.05000 8.000000 357.6167 194.16667 3.120833 4.104083 17.14250
## gear carb
## 1: 4.076923 3.307692
## 2: 4.142857 1.428571
## 3: 3.000000 3.083333