-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Closes #21. This PR implements the functionality to run FastIce on multiple GPUs and multiple nodes using MPI for communication. Features include overlapping MPI communication and computations on GPU, compatibility with Fields and BoundaryConditions.
- Loading branch information
Showing
74 changed files
with
2,495 additions
and
1,342 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
style = "yas" | ||
margin = 140 | ||
align_assignment = true | ||
whitespace_ops_in_indices = false | ||
import_to_using = false | ||
pipe_to_function_call = false | ||
always_use_return = false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Library | ||
|
||
## Modules | ||
|
||
### Grids | ||
|
||
```@autodocs | ||
Modules = [FastIce.Grids, FastIce.Distributed] | ||
Order = [:type, :function] | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Library | ||
|
||
## Running tests | ||
|
||
### CPU tests | ||
|
||
To run the FastIce test suite on the CPU, simple run `test` from within the package mode or using `Pkg`: | ||
```julia-repl | ||
using Pkg | ||
Pkg.test("FastIce") | ||
``` | ||
|
||
### GPU tests | ||
|
||
To run the FastIce test suite on CUDA or ROC Backend (Nvidia or AMD GPUs), respectively, run the tests using `Pkg` adding following `test_args`: | ||
|
||
#### For CUDA backend (Nvidia GPU): | ||
|
||
```julia-repl | ||
using Pkg | ||
Pkg.test("FastIce"; test_args=["--backend=CUDA"]) | ||
``` | ||
|
||
#### For ROC backend (AMD GPU): | ||
|
||
```julia-repl | ||
using Pkg | ||
Pkg.test("FastIce"; test_args=["--backend=AMDGPU"]) | ||
``` |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
module FastIceAMDGPUExt | ||
|
||
using FastIce, AMDGPU, AMDGPU.ROCKernels | ||
import FastIce.Architectures: heuristic_groupsize, set_device!, get_device | ||
|
||
set_device!(dev::HIPDevice) = AMDGPU.device!(dev) | ||
|
||
get_device(::ROCBackend, id::Integer) = HIPDevice(id) | ||
|
||
heuristic_groupsize(::HIPDevice, ::Val{1}) = (256, ) | ||
heuristic_groupsize(::HIPDevice, ::Val{2}) = (128, 2, ) | ||
heuristic_groupsize(::HIPDevice, ::Val{3}) = (128, 2, 1, ) | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
module FastIceCUDAExt | ||
|
||
using FastIce, CUDA, CUDA.CUDAKernels | ||
|
||
import FastIce.Architectures: heuristic_groupsize, set_device!, get_device | ||
|
||
set_device!(dev::CuDevice) = CUDA.device!(dev) | ||
|
||
get_device(::CUDABackend, id::Integer) = CuDevice(id - 1) | ||
|
||
heuristic_groupsize(::CuDevice, ::Val{1}) = (256,) | ||
heuristic_groupsize(::CuDevice, ::Val{2}) = (32, 8) | ||
heuristic_groupsize(::CuDevice, ::Val{3}) = (32, 8, 1) | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
[deps] | ||
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" | ||
FastIce = "e0de9f13-a007-490e-b696-b07d031015ca" | ||
GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a" | ||
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" | ||
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" | ||
MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
using FastIce.Architectures | ||
using FastIce.Distributed | ||
using FastIce.Fields | ||
using FastIce.Grids | ||
using FastIce.BoundaryConditions | ||
using FastIce.KernelLaunch | ||
|
||
using KernelAbstractions | ||
using MPI | ||
|
||
@kernel function fill_field!(f, val, offset=nothing) | ||
I = @index(Global, Cartesian) | ||
if !isnothing(offset) | ||
I += offset | ||
end | ||
f[I] = val | ||
end | ||
|
||
MPI.Init() | ||
|
||
arch = Architecture(CPU(), (2, 2, 2)) | ||
grid = CartesianGrid(; origin=(0.0, 0.0, 0.0), extent=(1.0, 1.0, 1.0), size=(5, 7, 5)) | ||
field = Field(backend(arch), grid, (Center(), Center(), Center()); halo=1) | ||
|
||
me = global_rank(details(arch)) | ||
|
||
fill!(parent(field), Inf) | ||
|
||
bc = BoundaryConditionsBatch((field,), (DirichletBC{FullCell}(-me-10),)) | ||
|
||
boundary_conditions = override_boundary_conditions(arch, ((bc, bc), (bc, bc), (bc, bc)); exchange=true) | ||
|
||
hide_boundaries = HideBoundaries{3}(arch) | ||
|
||
outer_width = (2, 2, 2) | ||
|
||
launch!(arch, grid, fill_field! => (field, me); location=location(field), hide_boundaries, boundary_conditions, outer_width) | ||
|
||
# sleep(0.25me) | ||
# @show coordinates(details(arch)) | ||
# display(parent(field)) | ||
|
||
field_g = if global_rank(details(arch)) == 0 | ||
KernelAbstractions.allocate(Architectures.backend(arch), eltype(field), dimensions(details(arch)) .* size(field)) | ||
else | ||
nothing | ||
end | ||
|
||
gather!(arch, field_g, field) | ||
|
||
if global_rank(details(arch)) == 0 | ||
println("global matrix:") | ||
display(field_g) | ||
end | ||
|
||
MPI.Finalize() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
using FastIce.Grids | ||
using FastIce.GridOperators | ||
using FastIce.Fields | ||
using FastIce.Architectures | ||
using FastIce.BoundaryConditions | ||
using FastIce.Distributed | ||
using FastIce.KernelLaunch | ||
|
||
using KernelAbstractions | ||
using MPI | ||
|
||
using Plots | ||
|
||
@kernel function update_C!(C, qC, dt, Δ, offset=nothing) | ||
I = @index(Global, Cartesian) | ||
isnothing(offset) || (I += offset) | ||
@inbounds if checkbounds(Bool, C, I) | ||
C[I] -= dt * (∂ᶜx(qC.x, I) / Δ.x + | ||
∂ᶜy(qC.y, I) / Δ.y) | ||
end | ||
end | ||
|
||
@kernel function update_qC!(qC, C, dc, Δ, offset=nothing) | ||
I = @index(Global, Cartesian) | ||
isnothing(offset) || (I += offset) | ||
@inbounds if checkbounds(Bool, qC.x, I) | ||
qC.x[I] = -dc * ∂ᵛx(C, I) / Δ.x | ||
end | ||
@inbounds if checkbounds(Bool, qC.y, I) | ||
qC.y[I] = -dc * ∂ᵛy(C, I) / Δ.y | ||
end | ||
end | ||
|
||
function diffusion_2D(ka_backend=CPU()) | ||
# setup arch | ||
arch = Architecture(ka_backend, (0, 0)) | ||
topo = details(arch) | ||
# physics | ||
lx, ly = 10.0, 10.0 | ||
dc = 1 | ||
# numerics | ||
size_g = (32, 32) | ||
nt = 1000 | ||
# preprocessing | ||
size_g = global_grid_size(topo, size_g) | ||
global_grid = CartesianGrid(; origin=(-0.5lx, -0.5ly), | ||
extent=(lx, ly), | ||
size=size_g) | ||
grid = local_grid(global_grid, topo) | ||
Δ = NamedTuple{(:x, :y)}(spacing(global_grid)) | ||
dt = minimum(Δ)^2 / dc / ndims(grid) / 2.1 | ||
hide_boundaries = HideBoundaries{ndims(grid)}(arch) | ||
outer_width = (4, 4) | ||
# fields | ||
C = Field(arch, grid, Center(); halo=1) | ||
qC = (x = Field(arch, grid, (Vertex(), Center()); halo=1), | ||
y = Field(arch, grid, (Center(), Vertex()); halo=1)) | ||
C_g = if global_rank(topo) == 0 | ||
KernelAbstractions.allocate(Architectures.backend(arch), eltype(C), size_g) | ||
else | ||
nothing | ||
end | ||
# initial condition | ||
foreach(comp -> fill!(parent(comp), 0.0), qC) | ||
# fill!(parent(C), me) | ||
set!(C, grid, (x, y) -> exp(-x^2 - y^2)) | ||
# set!(C, me) | ||
# boundary conditions | ||
zero_flux_bc = DirichletBC{FullCell}(0.0) | ||
bc_q = (x = BoundaryConditionsBatch((qC.x, qC.y), (zero_flux_bc, nothing)), | ||
y = BoundaryConditionsBatch((qC.x, qC.y), (nothing, zero_flux_bc))) | ||
# zero flux at physical boundaries and nothing at MPI boundaries | ||
bc_q = override_boundary_conditions(arch, ((bc_q.x, bc_q.x), (bc_q.y, bc_q.y)); exchange=true) | ||
# nothing at physical boundaries and communication at MPI boundaries | ||
bc_c = BoundaryConditionsBatch((C,), nothing) | ||
bc_c = override_boundary_conditions(arch, ((bc_c, bc_c), (bc_c, bc_c)); exchange=true) | ||
for D in ndims(grid):-1:1 | ||
apply_boundary_conditions!(Val(1), Val(D), arch, grid, bc_c[D][1]) | ||
apply_boundary_conditions!(Val(2), Val(D), arch, grid, bc_c[D][2]) | ||
apply_boundary_conditions!(Val(1), Val(D), arch, grid, bc_q[D][1]) | ||
apply_boundary_conditions!(Val(2), Val(D), arch, grid, bc_q[D][2]) | ||
end | ||
# time loop | ||
if global_rank(topo) == 0 | ||
anim = Animation() | ||
end | ||
for it in 1:nt | ||
(global_rank(topo) == 0) && println("it = $it") | ||
launch!(arch, grid, update_qC! => (qC, C, dc, Δ); location=Vertex(), hide_boundaries, boundary_conditions=bc_q, outer_width) | ||
launch!(arch, grid, update_C! => (C, qC, dt, Δ); location=Center(), expand=1) | ||
synchronize(Architectures.backend(arch)) | ||
if it % 5 == 0 | ||
gather!(arch, C_g, C) | ||
if global_rank(topo) == 0 | ||
heatmap(C_g; aspect_ratio=1, size=(600, 600), clims=(0, 1)) | ||
frame(anim) | ||
end | ||
end | ||
end | ||
if global_rank(topo) == 0 | ||
gif(anim, "C.gif") | ||
end | ||
|
||
return | ||
end | ||
|
||
MPI.Init() | ||
diffusion_2D() | ||
MPI.Finalize() |
Oops, something went wrong.