Split an array along a dimension

split_on_dim(
  X,
  which_dim,
  f = dimnames(X)[[which_dim]],
  drop = FALSE,
  depth = Inf
)

split_on_rows(X, f = rownames(X), drop = FALSE, depth = Inf)

split_on_cols(X, f = rownames(X), drop = FALSE, depth = Inf)

split_along_dim(X, which_dim, depth = Inf)

split_along_rows(X, depth = Inf)

split_along_cols(X, depth = Inf)

Arguments

X	an array, or list of arrays. An atomic vector without a dimension attribute is treated as a 1 dimensional array (Meaning, atomic vectors without a dim attribute are only accepted if `which_dim` is `1`. Names of the passed list are preserved. If a list of arrays, all the arrays must have the same length of the dimension being split.
which_dim	a scalar string or integer, specifying which dimension to split along. Negative integers count from the back. If a string, it must refer to a named dimension (e.g, one of `names(dimnames(X))`.
f	Specify how to split the dimension. character, integer, factor passed on to `base::split()`. Must be the same length as the dimension being split. a list of vectors Passed on to `base::interaction()` then `base::split()`. Each vector in the list must be the same length as the dimension being split. a scalar integer used to split into that many groups of equal size a numeric vector where `all(f<0)` specifies the relative size proportions of the groups being split. `sum(f)` must be `1`. For example `c(0.2, 0.2, 0.6)` will return approximately a 20\ split.
drop	passed on to `[`.
depth	Scalar number, how many levels to recurse down. Set this if you want to explicitly treat a list as a vector (that is, a one-dimensional array). (You can alternatively set dim attributes with `dim<-` on the list to prevent recursion) `split_along_dim(X, which_dim)` is equivalent to `split_on_dim(X, which_dim, seq_along_dim(X, which_dim))`.

Value

A list of arrays, or if a list of arrays was passed in, then a list of lists of arrays.

Examples

X <- array(1:8, c(2,3,4))
X
#> , , 1
#> 
#>      [,1] [,2] [,3]
#> [1,]    1    3    5
#> [2,]    2    4    6
#> 
#> , , 2
#> 
#>      [,1] [,2] [,3]
#> [1,]    7    1    3
#> [2,]    8    2    4
#> 
#> , , 3
#> 
#>      [,1] [,2] [,3]
#> [1,]    5    7    1
#> [2,]    6    8    2
#> 
#> , , 4
#> 
#>      [,1] [,2] [,3]
#> [1,]    3    5    7
#> [2,]    4    6    8
#> 
split_along_dim(X, 2)
#> [[1]]
#>      [,1] [,2] [,3] [,4]
#> [1,]    1    7    5    3
#> [2,]    2    8    6    4
#> 
#> [[2]]
#>      [,1] [,2] [,3] [,4]
#> [1,]    3    1    7    5
#> [2,]    4    2    8    6
#> 
#> [[3]]
#>      [,1] [,2] [,3] [,4]
#> [1,]    5    3    1    7
#> [2,]    6    4    2    8
#> 

# specify f as a factor, akin to base::split()
split_on_dim(X, 2, c("a", "a", "b"), drop = FALSE)
#> $a
#> , , 1
#> 
#>      [,1] [,2]
#> [1,]    1    3
#> [2,]    2    4
#> 
#> , , 2
#> 
#>      [,1] [,2]
#> [1,]    7    1
#> [2,]    8    2
#> 
#> , , 3
#> 
#>      [,1] [,2]
#> [1,]    5    7
#> [2,]    6    8
#> 
#> , , 4
#> 
#>      [,1] [,2]
#> [1,]    3    5
#> [2,]    4    6
#> 
#> 
#> $b
#> , , 1
#> 
#>      [,1]
#> [1,]    5
#> [2,]    6
#> 
#> , , 2
#> 
#>      [,1]
#> [1,]    3
#> [2,]    4
#> 
#> , , 3
#> 
#>      [,1]
#> [1,]    1
#> [2,]    2
#> 
#> , , 4
#> 
#>      [,1]
#> [1,]    7
#> [2,]    8
#> 
#> 

d <- c(10, 3, 3)
X <- array(1:prod(d), d)
y <- letters[1:10]
Y <- onehot(y)

# specify `f`` as relative partition sizes
if(require(zeallot) && require(magrittr) && require(purrr)) {

c(train, validate, test) %<-% {
  list(X = X, Y = Y, y = y) %>%
    shuffle_rows() %>%
    split_on_rows(c(0.6, 0.2, 0.2)) %>%
    transpose()
}

str(test)
str(train)
str(validate)

}
#> Loading required package: magrittr
#> Loading required package: purrr
#> 
#> Attaching package: ‘purrr’
#> The following object is masked from ‘package:magrittr’:
#> 
#>     set_names
#> List of 3
#>  $ X: int [1:2, 1:3, 1:3] 1 10 11 20 21 30 31 40 41 50 ...
#>  $ Y: num [1:2, 1:10] 1 0 0 0 0 0 0 0 0 0 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : NULL
#>   .. ..$ : chr [1:10] "a" "b" "c" "d" ...
#>  $ y: chr [1:2] "a" "j"
#> List of 3
#>  $ X: int [1:6, 1:3, 1:3] 7 5 9 4 8 2 17 15 19 14 ...
#>  $ Y: num [1:6, 1:10] 0 0 0 0 0 0 0 0 0 0 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : NULL
#>   .. ..$ : chr [1:10] "a" "b" "c" "d" ...
#>  $ y: chr [1:6] "g" "e" "i" "d" ...
#> List of 3
#>  $ X: int [1:2, 1:3, 1:3] 6 3 16 13 26 23 36 33 46 43 ...
#>  $ Y: num [1:2, 1:10] 0 0 0 0 0 1 0 0 0 0 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : NULL
#>   .. ..$ : chr [1:10] "a" "b" "c" "d" ...
#>  $ y: chr [1:2] "f" "c"


# with with array data in a data frame by splitting row-wise
if(require(tibble))
  tibble(y, X = split_along_rows(X))
#> Loading required package: tibble
#> # A tibble: 10 x 2
#>    y     X                
#>    <chr> <list>           
#>  1 a     <int[,3] [3 × 3]>
#>  2 b     <int[,3] [3 × 3]>
#>  3 c     <int[,3] [3 × 3]>
#>  4 d     <int[,3] [3 × 3]>
#>  5 e     <int[,3] [3 × 3]>
#>  6 f     <int[,3] [3 × 3]>
#>  7 g     <int[,3] [3 × 3]>
#>  8 h     <int[,3] [3 × 3]>
#>  9 i     <int[,3] [3 × 3]>
#> 10 j     <int[,3] [3 × 3]>