Split an array along a dimension

split_on_dim(
  X,
  which_dim,
  f = dimnames(X)[[which_dim]],
  drop = FALSE,
  depth = Inf
)

split_on_rows(X, f = rownames(X), drop = FALSE, depth = Inf)

split_on_cols(X, f = rownames(X), drop = FALSE, depth = Inf)

split_along_dim(X, which_dim, depth = Inf)

split_along_rows(X, depth = Inf)

split_along_cols(X, depth = Inf)

Arguments

X

an array, or list of arrays. An atomic vector without a dimension attribute is treated as a 1 dimensional array (Meaning, atomic vectors without a dim attribute are only accepted if which_dim is 1. Names of the passed list are preserved. If a list of arrays, all the arrays must have the same length of the dimension being split.

which_dim

a scalar string or integer, specifying which dimension to split along. Negative integers count from the back. If a string, it must refer to a named dimension (e.g, one of names(dimnames(X)).

f

Specify how to split the dimension.

character, integer, factor

passed on to base::split(). Must be the same length as the dimension being split.

a list of vectors

Passed on to base::interaction() then base::split(). Each vector in the list must be the same length as the dimension being split.

a scalar integer

used to split into that many groups of equal size

a numeric vector where all(f<0)

specifies the relative size proportions of the groups being split. sum(f) must be 1. For example c(0.2, 0.2, 0.6) will return approximately a 20\ split.

drop

passed on to [.

depth

Scalar number, how many levels to recurse down. Set this if you want to explicitly treat a list as a vector (that is, a one-dimensional array). (You can alternatively set dim attributes with dim<- on the list to prevent recursion)

split_along_dim(X, which_dim) is equivalent to split_on_dim(X, which_dim, seq_along_dim(X, which_dim)).

Value

A list of arrays, or if a list of arrays was passed in, then a list of lists of arrays.

Examples

X <- array(1:8, c(2,3,4)) X
#> , , 1 #> #> [,1] [,2] [,3] #> [1,] 1 3 5 #> [2,] 2 4 6 #> #> , , 2 #> #> [,1] [,2] [,3] #> [1,] 7 1 3 #> [2,] 8 2 4 #> #> , , 3 #> #> [,1] [,2] [,3] #> [1,] 5 7 1 #> [2,] 6 8 2 #> #> , , 4 #> #> [,1] [,2] [,3] #> [1,] 3 5 7 #> [2,] 4 6 8 #>
split_along_dim(X, 2)
#> [[1]] #> [,1] [,2] [,3] [,4] #> [1,] 1 7 5 3 #> [2,] 2 8 6 4 #> #> [[2]] #> [,1] [,2] [,3] [,4] #> [1,] 3 1 7 5 #> [2,] 4 2 8 6 #> #> [[3]] #> [,1] [,2] [,3] [,4] #> [1,] 5 3 1 7 #> [2,] 6 4 2 8 #>
# specify f as a factor, akin to base::split() split_on_dim(X, 2, c("a", "a", "b"), drop = FALSE)
#> $a #> , , 1 #> #> [,1] [,2] #> [1,] 1 3 #> [2,] 2 4 #> #> , , 2 #> #> [,1] [,2] #> [1,] 7 1 #> [2,] 8 2 #> #> , , 3 #> #> [,1] [,2] #> [1,] 5 7 #> [2,] 6 8 #> #> , , 4 #> #> [,1] [,2] #> [1,] 3 5 #> [2,] 4 6 #> #> #> $b #> , , 1 #> #> [,1] #> [1,] 5 #> [2,] 6 #> #> , , 2 #> #> [,1] #> [1,] 3 #> [2,] 4 #> #> , , 3 #> #> [,1] #> [1,] 1 #> [2,] 2 #> #> , , 4 #> #> [,1] #> [1,] 7 #> [2,] 8 #> #>
d <- c(10, 3, 3) X <- array(1:prod(d), d) y <- letters[1:10] Y <- onehot(y) # specify `f`` as relative partition sizes if(require(zeallot) && require(magrittr) && require(purrr)) { c(train, validate, test) %<-% { list(X = X, Y = Y, y = y) %>% shuffle_rows() %>% split_on_rows(c(0.6, 0.2, 0.2)) %>% transpose() } str(test) str(train) str(validate) }
#> Loading required package: magrittr
#> Loading required package: purrr
#> #> Attaching package: ‘purrr’
#> The following object is masked from ‘package:magrittr’: #> #> set_names
#> List of 3 #> $ X: int [1:2, 1:3, 1:3] 1 10 11 20 21 30 31 40 41 50 ... #> $ Y: num [1:2, 1:10] 1 0 0 0 0 0 0 0 0 0 ... #> ..- attr(*, "dimnames")=List of 2 #> .. ..$ : NULL #> .. ..$ : chr [1:10] "a" "b" "c" "d" ... #> $ y: chr [1:2] "a" "j" #> List of 3 #> $ X: int [1:6, 1:3, 1:3] 7 5 9 4 8 2 17 15 19 14 ... #> $ Y: num [1:6, 1:10] 0 0 0 0 0 0 0 0 0 0 ... #> ..- attr(*, "dimnames")=List of 2 #> .. ..$ : NULL #> .. ..$ : chr [1:10] "a" "b" "c" "d" ... #> $ y: chr [1:6] "g" "e" "i" "d" ... #> List of 3 #> $ X: int [1:2, 1:3, 1:3] 6 3 16 13 26 23 36 33 46 43 ... #> $ Y: num [1:2, 1:10] 0 0 0 0 0 1 0 0 0 0 ... #> ..- attr(*, "dimnames")=List of 2 #> .. ..$ : NULL #> .. ..$ : chr [1:10] "a" "b" "c" "d" ... #> $ y: chr [1:2] "f" "c"
# with with array data in a data frame by splitting row-wise if(require(tibble)) tibble(y, X = split_along_rows(X))
#> Loading required package: tibble
#> # A tibble: 10 x 2 #> y X #> <chr> <list> #> 1 a <int[,3] [3 × 3]> #> 2 b <int[,3] [3 × 3]> #> 3 c <int[,3] [3 × 3]> #> 4 d <int[,3] [3 × 3]> #> 5 e <int[,3] [3 × 3]> #> 6 f <int[,3] [3 × 3]> #> 7 g <int[,3] [3 × 3]> #> 8 h <int[,3] [3 × 3]> #> 9 i <int[,3] [3 × 3]> #> 10 j <int[,3] [3 × 3]>