I'm using Julia to ingest a large two dimensional data array of size 1000 x 32768; I need to break up the array into smaller square arrays. Currently, I'm doing this through a function I built to decimate the raw dataset:
function decimate_square(data,fraction=4)
# Read size of input data / calculate length of square side
sy,sx = size(data)
square_side = Int(round(sy/fraction))
# Number of achievable full squares
itersx,itersy = [Int(floor(s/square_side)) for s in [sx,sy]]
# Find left/right X values
for ix in 1:itersx
if ix!=itersx
# Full sliding square can be calculated
left = square_side*(ix-1) 1
right = square_side*(ix)
else
# Capture last square of data
left = sx-square_side 1
right = sx
end
# Find top/bottom Y values for each X
for iy in 1:itersy
if iy!=itersy
# Full sliding square can be calculated
top = square_side*(iy-1) 1
bottom = square_side*(iy)
else
# Capture last square of data
top = sy-square_side 1
bottom = sy
end
# Record data in 3d stack
cursquare = data[top:bottom,left:right]
if (ix==1)&&(iy==1); global dstack=cursquare
else; dstack=cat(dstack,cursquare,dims=3)
end
end
end
return dstack
end
Which currently takes ~20 seconds to run:
rand_arr = rand(1000,32768)
t1 = Dates.now()
dec_arr = decimate_square(rand_arr)
t2 = Dates.now()
@info(t2-t1)
[ Info: 19666 milliseconds
This is the biggest bottleneck of my analysis. Is there a pre-built function that I can use, or is there a more efficient way to decimate my array?
CodePudding user response:
You could just go with views. Suppose you want to slice your data into 64 matrices, each having size 1000 x 512. In that case you could do:
dats = view.(Ref(rand_arr),Ref(1:1000), [range(1 (i-1)*512,i*512) for i in 1:64])
The time for this on my machine is 600 nanoseconds:
julia> @btime view.(Ref($rand_arr),Ref(1:1000), [range(1 (i-1)*512,i*512) for i in 1:64]);
595.604 ns (3 allocations: 4.70 KiB)
CodePudding user response:
There's a lot of stuff going on in your code with is not recommended and making things slow. Here's a somewhat idiomatic solution, with the additional bonus of generalizing to arbitrary ranks:
julia> function square_indices(data; fraction=4)
splits = cld.(size(data), fraction)
return Iterators.map(CartesianIndices, Iterators.product(Iterators.partition.(axes(data), splits)...))
end
square_indices (generic function with 1 method)
The result of this is an iterator over CartesianIndices
, which are objects that you can use to index your squares. Either the regular data[ix]
, or view(data, ix)
, which does not create a copy. (Different fractions per dimension are possible, try test_square_indices(println, 4, 4, 4; fraction=(2, 1, 1))
.)
And to see whether it works as expected:
julia> function test_square_indices(f, s...; fraction=4)
arr = reshape(1:prod(s), s...)
for ix in square_indices(arr; fraction)
f(view(arr, ix))
end
end
test_square_indices (generic function with 1 method)
julia> # just try this on some moderatly costly function
@btime test_square_indices(v -> inv.(v), 1000, 32768)
81.980 ms (139 allocations: 250.01 MiB)
julia> test_square_indices(println, 9)
[1, 2, 3]
[4, 5, 6]
[7, 8, 9]
julia> test_square_indices(println, 9, 5)
[1 10; 2 11; 3 12]
[4 13; 5 14; 6 15]
[7 16; 8 17; 9 18]
[19 28; 20 29; 21 30]
[22 31; 23 32; 24 33]
[25 34; 26 35; 27 36]
[37; 38; 39;;]
[40; 41; 42;;]
[43; 44; 45;;]
julia> reshape(1:9*5, 9, 5)
9×5 reshape(::UnitRange{Int64}, 9, 5) with eltype Int64:
1 10 19 28 37
2 11 20 29 38
3 12 21 30 39
4 13 22 31 40
5 14 23 32 41
6 15 24 33 42
7 16 25 34 43
8 17 26 35 44
9 18 27 36 45
julia> test_square_indices(println, 4, 4, 4; fraction=2)
[1 5; 2 6;;; 17 21; 18 22]
[3 7; 4 8;;; 19 23; 20 24]
[9 13; 10 14;;; 25 29; 26 30]
[11 15; 12 16;;; 27 31; 28 32]
[33 37; 34 38;;; 49 53; 50 54]
[35 39; 36 40;;; 51 55; 52 56]
[41 45; 42 46;;; 57 61; 58 62]
[43 47; 44 48;;; 59 63; 60 64]
julia> reshape(1:4*4*4, 4, 4, 4)
4×4×4 reshape(::UnitRange{Int64}, 4, 4, 4) with eltype Int64:
[:, :, 1] =
1 5 9 13
2 6 10 14
3 7 11 15
4 8 12 16
[:, :, 2] =
17 21 25 29
18 22 26 30
19 23 27 31
20 24 28 32
[:, :, 3] =
33 37 41 45
34 38 42 46
35 39 43 47
36 40 44 48
[:, :, 4] =
49 53 57 61
50 54 58 62
51 55 59 63
52 56 60 64
Here's a bit of an illustration of how this works:
julia> data = reshape(1:9*5, 9, 5); fraction = 3;
julia> size(data)
(9, 5)
julia> # chunk sizes
splits = cld.(size(data), fraction)
(3, 2)
julia> # every dimension chunked
Iterators.partition.(axes(data), splits) .|> collect
(UnitRange{Int64}[1:3, 4:6, 7:9], UnitRange{Int64}[1:2, 3:4, 5:5])
julia> # cross product of all chunks
Iterators.product(Iterators.partition.(axes(data), splits)...) .|> collect
3×3 Matrix{Vector{UnitRange{Int64}}}:
[1:3, 1:2] [1:3, 3:4] [1:3, 5:5]
[4:6, 1:2] [4:6, 3:4] [4:6, 5:5]
[7:9, 1:2] [7:9, 3:4] [7:9, 5:5]