diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml new file mode 100644 index 00000000..85a6f88b --- /dev/null +++ b/.JuliaFormatter.toml @@ -0,0 +1,7 @@ +style = "yas" +margin = 140 +align_assignment = true +whitespace_ops_in_indices = false +import_to_using = false +pipe_to_function_call = false +always_use_return = false diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 79e07aba..92ee9d2a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -28,7 +28,7 @@ steps: matrix: setup: version: - - "1.9" + - "1.10" plugins: - JuliaCI/julia#v1: version: "{{matrix.version}}" @@ -45,10 +45,11 @@ steps: agents: queue: "juliagpu" rocm: "*" + rocmgpu: "gfx1101" timeout_in_minutes: 120 soft_fail: - exit_status: 3 env: JULIA_NUM_THREADS: 4 env: - SECRET_CODECOV_TOKEN: "0IoqMRJlTdzvkxpJfv/d4uQBzH0u5Odph6JiQLEASjdh7OPCxmy8ADN7tRPYECguthAFTVnsKeIWpgCyvaJcLY6+sFqlYraL0XOGGX/BCrBQfRvMNKfY8WRf6Hc3NFCyHqFkONFYbxYnFbpXYtdZKbfWDkRHB0bu2JqCbzhN2Yk29dmj2PZPAtUkM+0Uab7cDEzfM/FDwOEssm8bnR/HQRe02DASAyxQGVxcnSZJGZr9IWiPLq6a5qyvN7tkk6FnkMbobwkA48L2fffZQCQF/jlIxc4/yOk9r7P9RVTjWIoSxA59mfuUbKlVHokvXwlVvNS9gXbGOf9gqabfyjcqUA==;U2FsdGVkX19S+m5lHSaFCpYeyDqSxPrqJ9OGWCWUTNDao2X1lzTtCEYQG7YI4abf+9pMnp2msk8JAuw2W7ugQQ==" \ No newline at end of file + SECRET_CODECOV_TOKEN: "0IoqMRJlTdzvkxpJfv/d4uQBzH0u5Odph6JiQLEASjdh7OPCxmy8ADN7tRPYECguthAFTVnsKeIWpgCyvaJcLY6+sFqlYraL0XOGGX/BCrBQfRvMNKfY8WRf6Hc3NFCyHqFkONFYbxYnFbpXYtdZKbfWDkRHB0bu2JqCbzhN2Yk29dmj2PZPAtUkM+0Uab7cDEzfM/FDwOEssm8bnR/HQRe02DASAyxQGVxcnSZJGZr9IWiPLq6a5qyvN7tkk6FnkMbobwkA48L2fffZQCQF/jlIxc4/yOk9r7P9RVTjWIoSxA59mfuUbKlVHokvXwlVvNS9gXbGOf9gqabfyjcqUA==;U2FsdGVkX19S+m5lHSaFCpYeyDqSxPrqJ9OGWCWUTNDao2X1lzTtCEYQG7YI4abf+9pMnp2msk8JAuw2W7ugQQ==" diff --git a/Project.toml b/Project.toml index 618e7a90..1977a8c5 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "FastIce" uuid = "e0de9f13-a007-490e-b696-b07d031015ca" authors = ["Ludovic Raess , Ivan Utkin and contributors"] -version = "0.1.0" +version = "0.2.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" @@ -13,19 +13,27 @@ LightXML = "9c8b4983-aa76-5018-a973-4c85ecc9e179" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" -MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" Preferences = "21216c6a-2e73-6563-6e65-726566657250" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" [compat] Adapt = "3" +AMDGPU = "0.7" +CUDA = "5" ElasticArrays = "1" GeometryBasics = "0.4" HDF5 = "0.17" KernelAbstractions = "0.9" LightXML = "0.9" MPI = "0.20" -MPIPreferences = "0.1" OffsetArrays = "1" -Preferences = "1" \ No newline at end of file +Preferences = "1" + +[weakdeps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" + +[extensions] +FastIceCUDAExt = "CUDA" +FastIceAMDGPUExt = "AMDGPU" diff --git a/README.md b/README.md index eec2c53a..b92d5422 100644 --- a/README.md +++ b/README.md @@ -8,4 +8,4 @@ Parallel multi-xPU iterative **FastIce** flow solvers FastIce is currently under active development in order to run at scale on the LUMI AMD-powered GPU supercomputer within the EuroHPC [**STREAM** project](https://ptsolvers.github.io/GPU4GEO/stream/). -Checkout the non-existing [documentation](https://PTsolvers.github.io/FastIce.jl/dev). +Checkout the [documentation](https://PTsolvers.github.io/FastIce.jl/dev) for API reference and usage. diff --git a/docs/make.jl b/docs/make.jl index ff0dfb53..a6eb60dd 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -5,11 +5,13 @@ push!(LOAD_PATH,"../src/") makedocs( sitename = "FastIce", - authors="Ludovic Räss, Ivan utkin and contributors", + authors="Ludovic Räss, Ivan Utkin and contributors", format = Documenter.HTML(; prettyurls=get(ENV, "CI", nothing) == "true"), # easier local build modules = [FastIce], pages=[ "Home" => "index.md", + "Usage" => "usage.md", + "Library" => "library.md" ] ) diff --git a/docs/src/library.md b/docs/src/library.md new file mode 100644 index 00000000..73037e89 --- /dev/null +++ b/docs/src/library.md @@ -0,0 +1,10 @@ +# Library + +## Modules + +### Grids + +```@autodocs +Modules = [FastIce.Grids, FastIce.Distributed] +Order = [:type, :function] +``` diff --git a/docs/src/usage.md b/docs/src/usage.md new file mode 100644 index 00000000..7890ed66 --- /dev/null +++ b/docs/src/usage.md @@ -0,0 +1,29 @@ +# Library + +## Running tests + +### CPU tests + +To run the FastIce test suite on the CPU, simple run `test` from within the package mode or using `Pkg`: +```julia-repl +using Pkg +Pkg.test("FastIce") +``` + +### GPU tests + +To run the FastIce test suite on CUDA or ROC Backend (Nvidia or AMD GPUs), respectively, run the tests using `Pkg` adding following `test_args`: + +#### For CUDA backend (Nvidia GPU): + +```julia-repl +using Pkg +Pkg.test("FastIce"; test_args=["--backend=CUDA"]) +``` + +#### For ROC backend (AMD GPU): + +```julia-repl +using Pkg +Pkg.test("FastIce"; test_args=["--backend=AMDGPU"]) +``` diff --git a/ext/AMDGPUExt/AMDGPUExt.jl b/ext/AMDGPUExt/AMDGPUExt.jl deleted file mode 100644 index 5bb5f5a7..00000000 --- a/ext/AMDGPUExt/AMDGPUExt.jl +++ /dev/null @@ -1,14 +0,0 @@ -module AMDGPUExt - -using AMDGPU -using KernelAbstractions - -using FastIce.Architecture - -set_device!(dev::HIPDevice) = device!(dev) - -heuristic_groupsize(::ROCBackend, ::Val{1}) = (256, ) -heuristic_groupsize(::ROCBackend, ::Val{2}) = (128, 2, ) -heuristic_groupsize(::ROCBackend, ::Val{3}) = (128, 2, 1, ) - -end \ No newline at end of file diff --git a/ext/CUDAExt/CUDAExt.jl b/ext/CUDAExt/CUDAExt.jl deleted file mode 100644 index 27f8841f..00000000 --- a/ext/CUDAExt/CUDAExt.jl +++ /dev/null @@ -1,14 +0,0 @@ -module CUDAExt - -using CUDA -using KernelAbstractions - -using FastIce.Architecture - -set_device!(dev::CuDevice) = device!(dev) - -heuristic_groupsize(::CUDABackend, ::Val{1}) = (256, ) -heuristic_groupsize(::CUDABackend, ::Val{2}) = (32, 8, ) -heuristic_groupsize(::CUDABackend, ::Val{3}) = (32, 8, 1, ) - -end \ No newline at end of file diff --git a/ext/FastIceAMDGPUExt/FastIceAMDGPUExt.jl b/ext/FastIceAMDGPUExt/FastIceAMDGPUExt.jl new file mode 100644 index 00000000..30e5a159 --- /dev/null +++ b/ext/FastIceAMDGPUExt/FastIceAMDGPUExt.jl @@ -0,0 +1,14 @@ +module FastIceAMDGPUExt + +using FastIce, AMDGPU, AMDGPU.ROCKernels +import FastIce.Architectures: heuristic_groupsize, set_device!, get_device + +set_device!(dev::HIPDevice) = AMDGPU.device!(dev) + +get_device(::ROCBackend, id::Integer) = HIPDevice(id) + +heuristic_groupsize(::HIPDevice, ::Val{1}) = (256, ) +heuristic_groupsize(::HIPDevice, ::Val{2}) = (128, 2, ) +heuristic_groupsize(::HIPDevice, ::Val{3}) = (128, 2, 1, ) + +end diff --git a/ext/FastIceCUDAExt/FastIceCUDAExt.jl b/ext/FastIceCUDAExt/FastIceCUDAExt.jl new file mode 100644 index 00000000..9918672e --- /dev/null +++ b/ext/FastIceCUDAExt/FastIceCUDAExt.jl @@ -0,0 +1,15 @@ +module FastIceCUDAExt + +using FastIce, CUDA, CUDA.CUDAKernels + +import FastIce.Architectures: heuristic_groupsize, set_device!, get_device + +set_device!(dev::CuDevice) = CUDA.device!(dev) + +get_device(::CUDABackend, id::Integer) = CuDevice(id - 1) + +heuristic_groupsize(::CuDevice, ::Val{1}) = (256,) +heuristic_groupsize(::CuDevice, ::Val{2}) = (32, 8) +heuristic_groupsize(::CuDevice, ::Val{3}) = (32, 8, 1) + +end diff --git a/scripts_future_API/Project.toml b/scripts_future_API/Project.toml index 54f6d3a4..6d8438c2 100644 --- a/scripts_future_API/Project.toml +++ b/scripts_future_API/Project.toml @@ -1,5 +1,6 @@ [deps] +CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" FastIce = "e0de9f13-a007-490e-b696-b07d031015ca" +GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" -MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" diff --git a/scripts_future_API/benchmark_dbc.jl b/scripts_future_API/benchmark_dbc.jl new file mode 100644 index 00000000..ba40cadd --- /dev/null +++ b/scripts_future_API/benchmark_dbc.jl @@ -0,0 +1,56 @@ +using FastIce.Architectures +using FastIce.Distributed +using FastIce.Fields +using FastIce.Grids +using FastIce.BoundaryConditions +using FastIce.KernelLaunch + +using KernelAbstractions +using MPI + +@kernel function fill_field!(f, val, offset=nothing) + I = @index(Global, Cartesian) + if !isnothing(offset) + I += offset + end + f[I] = val +end + +MPI.Init() + +arch = Architecture(CPU(), (2, 2, 2)) +grid = CartesianGrid(; origin=(0.0, 0.0, 0.0), extent=(1.0, 1.0, 1.0), size=(5, 7, 5)) +field = Field(backend(arch), grid, (Center(), Center(), Center()); halo=1) + +me = global_rank(details(arch)) + +fill!(parent(field), Inf) + +bc = BoundaryConditionsBatch((field,), (DirichletBC{FullCell}(-me-10),)) + +boundary_conditions = override_boundary_conditions(arch, ((bc, bc), (bc, bc), (bc, bc)); exchange=true) + +hide_boundaries = HideBoundaries{3}(arch) + +outer_width = (2, 2, 2) + +launch!(arch, grid, fill_field! => (field, me); location=location(field), hide_boundaries, boundary_conditions, outer_width) + +# sleep(0.25me) +# @show coordinates(details(arch)) +# display(parent(field)) + +field_g = if global_rank(details(arch)) == 0 + KernelAbstractions.allocate(Architectures.backend(arch), eltype(field), dimensions(details(arch)) .* size(field)) +else + nothing +end + +gather!(arch, field_g, field) + +if global_rank(details(arch)) == 0 + println("global matrix:") + display(field_g) +end + +MPI.Finalize() diff --git a/scripts_future_API/benchmark_diffusion_2D.jl b/scripts_future_API/benchmark_diffusion_2D.jl new file mode 100644 index 00000000..8f920a29 --- /dev/null +++ b/scripts_future_API/benchmark_diffusion_2D.jl @@ -0,0 +1,109 @@ +using FastIce.Grids +using FastIce.GridOperators +using FastIce.Fields +using FastIce.Architectures +using FastIce.BoundaryConditions +using FastIce.Distributed +using FastIce.KernelLaunch + +using KernelAbstractions +using MPI + +using Plots + +@kernel function update_C!(C, qC, dt, Δ, offset=nothing) + I = @index(Global, Cartesian) + isnothing(offset) || (I += offset) + @inbounds if checkbounds(Bool, C, I) + C[I] -= dt * (∂ᶜx(qC.x, I) / Δ.x + + ∂ᶜy(qC.y, I) / Δ.y) + end +end + +@kernel function update_qC!(qC, C, dc, Δ, offset=nothing) + I = @index(Global, Cartesian) + isnothing(offset) || (I += offset) + @inbounds if checkbounds(Bool, qC.x, I) + qC.x[I] = -dc * ∂ᵛx(C, I) / Δ.x + end + @inbounds if checkbounds(Bool, qC.y, I) + qC.y[I] = -dc * ∂ᵛy(C, I) / Δ.y + end +end + +function diffusion_2D(ka_backend=CPU()) + # setup arch + arch = Architecture(ka_backend, (0, 0)) + topo = details(arch) + # physics + lx, ly = 10.0, 10.0 + dc = 1 + # numerics + size_g = (32, 32) + nt = 1000 + # preprocessing + size_g = global_grid_size(topo, size_g) + global_grid = CartesianGrid(; origin=(-0.5lx, -0.5ly), + extent=(lx, ly), + size=size_g) + grid = local_grid(global_grid, topo) + Δ = NamedTuple{(:x, :y)}(spacing(global_grid)) + dt = minimum(Δ)^2 / dc / ndims(grid) / 2.1 + hide_boundaries = HideBoundaries{ndims(grid)}(arch) + outer_width = (4, 4) + # fields + C = Field(arch, grid, Center(); halo=1) + qC = (x = Field(arch, grid, (Vertex(), Center()); halo=1), + y = Field(arch, grid, (Center(), Vertex()); halo=1)) + C_g = if global_rank(topo) == 0 + KernelAbstractions.allocate(Architectures.backend(arch), eltype(C), size_g) + else + nothing + end + # initial condition + foreach(comp -> fill!(parent(comp), 0.0), qC) + # fill!(parent(C), me) + set!(C, grid, (x, y) -> exp(-x^2 - y^2)) + # set!(C, me) + # boundary conditions + zero_flux_bc = DirichletBC{FullCell}(0.0) + bc_q = (x = BoundaryConditionsBatch((qC.x, qC.y), (zero_flux_bc, nothing)), + y = BoundaryConditionsBatch((qC.x, qC.y), (nothing, zero_flux_bc))) + # zero flux at physical boundaries and nothing at MPI boundaries + bc_q = override_boundary_conditions(arch, ((bc_q.x, bc_q.x), (bc_q.y, bc_q.y)); exchange=true) + # nothing at physical boundaries and communication at MPI boundaries + bc_c = BoundaryConditionsBatch((C,), nothing) + bc_c = override_boundary_conditions(arch, ((bc_c, bc_c), (bc_c, bc_c)); exchange=true) + for D in ndims(grid):-1:1 + apply_boundary_conditions!(Val(1), Val(D), arch, grid, bc_c[D][1]) + apply_boundary_conditions!(Val(2), Val(D), arch, grid, bc_c[D][2]) + apply_boundary_conditions!(Val(1), Val(D), arch, grid, bc_q[D][1]) + apply_boundary_conditions!(Val(2), Val(D), arch, grid, bc_q[D][2]) + end + # time loop + if global_rank(topo) == 0 + anim = Animation() + end + for it in 1:nt + (global_rank(topo) == 0) && println("it = $it") + launch!(arch, grid, update_qC! => (qC, C, dc, Δ); location=Vertex(), hide_boundaries, boundary_conditions=bc_q, outer_width) + launch!(arch, grid, update_C! => (C, qC, dt, Δ); location=Center(), expand=1) + synchronize(Architectures.backend(arch)) + if it % 5 == 0 + gather!(arch, C_g, C) + if global_rank(topo) == 0 + heatmap(C_g; aspect_ratio=1, size=(600, 600), clims=(0, 1)) + frame(anim) + end + end + end + if global_rank(topo) == 0 + gif(anim, "C.gif") + end + + return +end + +MPI.Init() +diffusion_2D() +MPI.Finalize() diff --git a/scripts_future_API/benchmark_diffusion_3D.jl b/scripts_future_API/benchmark_diffusion_3D.jl new file mode 100644 index 00000000..ebcb661e --- /dev/null +++ b/scripts_future_API/benchmark_diffusion_3D.jl @@ -0,0 +1,147 @@ +using FastIce.Grids +using FastIce.GridOperators +using FastIce.Fields +using FastIce.Architectures +using FastIce.BoundaryConditions +using FastIce.KernelLaunch + +using KernelAbstractions +using AMDGPU + +using FastIce.Distributed +using MPI + +using Printf +using Plots + +# @inline isin(Field) = (I <= CartesianIndex(size(grid, location(Field)))) + +@kernel function update_C!(C, qC, dt, Δ, offset=nothing) + I = @index(Global, Cartesian) + isnothing(offset) || (I += offset) + @inbounds if checkbounds(Bool, C, I) + C[I] -= dt * (∂ᶜx(qC.x, I) / Δ.x + + ∂ᶜy(qC.y, I) / Δ.y + + ∂ᶜz(qC.z, I) / Δ.z) + end +end + +@kernel function update_qC!(qC, C, dc, Δ, offset=nothing) + I = @index(Global, Cartesian) + isnothing(offset) || (I += offset) + @inbounds if checkbounds(Bool, qC.x, I) + qC.x[I] = -dc * ∂ᵛx(C, I) / Δ.x + end + @inbounds if checkbounds(Bool, qC.y, I) + qC.y[I] = -dc * ∂ᵛy(C, I) / Δ.y + end + @inbounds if checkbounds(Bool, qC.z, I) + qC.z[I] = -dc * ∂ᵛz(C, I) / Δ.z + end +end + +function compute(arch, grid, hide_boundaries, bc_q, bc_c, outer_width, qC, C, dc, dt, Δ, iters) + tic = time_ns() + for _ in 1:iters + launch!(arch, grid, update_qC! => (qC, C, dc, Δ); location=Vertex(), hide_boundaries, boundary_conditions=bc_q, outer_width) + launch!(arch, grid, update_C! => (C, qC, dt, Δ); location=Center(), hide_boundaries, boundary_conditions=bc_c, outer_width) + synchronize(Architectures.backend(arch)) + end + wtime = (time_ns() - tic) * 1e-9 + return wtime +end + +function diffusion_3D(ka_backend=CPU(), dTyp::DataType=Float64, dims=(0, 0, 0); do_visu=true) + # setup arch + arch = Architecture(ka_backend, dims) + topo = details(arch) + set_device!(arch) + me = global_rank(topo) + comm = cartesian_communicator(topo) + # physics + lx, ly, lz = 10.0, 10.0, 10.0 + dc = 1 + # numerics + size = (1023, 1023, 1023) + nt = 100 + iters, warmup = 20, 5 + # preprocessing + size_g = global_grid_size(topo, size) + global_grid = CartesianGrid(; origin=(-0.5lx, -0.5ly, -0.5lz), + extent=(lx, ly, lz), + size=size_g) + grid = local_grid(global_grid, topo) + Δ = NamedTuple{(:x, :y, :z)}(spacing(global_grid)) + dt = minimum(Δ)^2 / dc / ndims(grid) / 2.1 + hide_boundaries = HideBoundaries{ndims(grid)}(arch) + outer_width = (128, 32, 4) + # fields + C = Field(arch, grid, Center(), dTyp; halo=1) + qC = (x = Field(arch, grid, (Vertex(), Center(), Center()), dTyp), + y = Field(arch, grid, (Center(), Vertex(), Center()), dTyp), + z = Field(arch, grid, (Center(), Center(), Vertex()), dTyp)) + # initial condition + foreach(comp -> set!(comp, 0.0), qC) + set!(C, grid, (x, y, z) -> exp(-x^2 - y^2 - z^2)) + # boundary conditions + zero_flux_bc = DirichletBC{FullCell}(0.0) + # zero flux at physical boundaries and nothing at MPI boundaries + bc_q = ntuple(Val(3)) do D + ntuple(Val(2)) do S + has_neighbor(topo, D, S) ? nothing : BoundaryConditionsBatch((qC[D],), (zero_flux_bc,)) + end + end + # nothing at physical boundaries and communication at MPI boundaries + bc_c = ntuple(Val(3)) do D + ntuple(Val(2)) do S + has_neighbor(topo, D, S) ? BoundaryConditionsBatch((C,), (ExchangeInfo(Val(S), Val(D), C),)) : nothing + end + end + KernelLaunch.apply_all_boundary_conditions!(arch, grid, bc_c) + # # time loop + # for it in 1:nt + # (me == 0) && println("it = $it") + # launch!(arch, grid, update_qC! => (qC, C, dc, Δ); location=Vertex(), hide_boundaries, boundary_conditions=bc_q, outer_width) + # launch!(arch, grid, update_C! => (C, qC, dt, Δ); location=Center(), hide_boundaries, boundary_conditions=bc_c, outer_width) + # synchronize(Architectures.backend(arch)) + # end + + # measure perf + # warmup + compute(arch, grid, hide_boundaries, bc_q, bc_c, outer_width, qC, C, dc, dt, Δ, warmup) + # time + MPI.Barrier(comm) + for ex in 1:5 + (me == 0) && (sleep(2); println("Experiment = $ex")) + MPI.Barrier(comm) + wtime = compute(arch, grid, hide_boundaries, bc_q, bc_c, outer_width, qC, C, dc, dt, Δ, (iters - warmup)) + # report + A_eff = 8 / 1e9 * prod(size) * sizeof(dTyp) + wtime_it = wtime ./ (iters - warmup) + T_eff = A_eff ./ wtime_it + @printf(" Executed %d steps in = %1.3e sec (@ T_eff = %1.2f GB/s - device %s) \n", (iters - warmup), wtime, + round(T_eff, sigdigits=3), AMDGPU.device_id(AMDGPU.device())) + end + + if do_visu + ENV["GKSwstype"] = "nul" + C_g = (me == 0) ? KernelAbstractions.allocate(CPU(), eltype(C), size_g) : nothing + C_v = Array(C) + gather!(arch, C_g, C_v) + if me == 0 + p1 = heatmap(xcenters(global_grid), ycenters(global_grid), C_g[:, :, size_g[3]÷2]; + aspect_ratio=1, xlims=extrema(xcenters(global_grid)), ylims=extrema(ycenters(global_grid))) + png(p1, "C.png") + end + end + + return +end + +ka_backend = ROCBackend() +T::DataType = Float64 +dims = (0, 0, 0) + +MPI.Init() +diffusion_3D(ka_backend, T, dims; do_visu=false) +MPI.Finalize() diff --git a/scripts_future_API/exchanger.jl b/scripts_future_API/exchanger.jl deleted file mode 100644 index 2080d268..00000000 --- a/scripts_future_API/exchanger.jl +++ /dev/null @@ -1,160 +0,0 @@ -using KernelAbstractions -using MPI -using CUDA - -include("mpi_utils.jl") - -mutable struct Exchanger - @atomic done::Bool - top::Base.Event - bottom::Base.Event - @atomic err - task::Task - - function Exchanger(f::F, backend::Backend) where F - top = Base.Event(#=autoreset=# true) - bottom = Base.Event(#=autoreset=# true) - this = new(false, top, bottom, nothing) - - this.task = Threads.@spawn begin - KernelAbstractions.priority!(backend, :high) - try - while !(@atomic this.done) - wait(top) - f() - notify(bottom) - end - catch err - @atomic this.done = true - @atomic this.err = err - end - end - errormonitor(this.task) - return this - end -end - -setdone!(exc::Exchanger) = @atomic exc.done = true - -Base.isdone(exc::Exchanger) = @atomic exc.done -function Base.notify(exc::Exchanger) - if !(@atomic exc.done) - notify(exc.top) - else - error("notify: Exchanger is not running") - end -end -function Base.wait(exc::Exchanger) - if !(@atomic exc.done) - wait(exc.bottom) - else - error("wait: Exchanger is not running") - end -end - -# TODO: Implement in MPI.jl -function cooperative_test!(req) - done = false - while !done - done, _ = MPI.Test(req, MPI.Status) - yield() - end -end - -@kernel function do_work!(A, me, offset) - ix, iy, iz = @index(Global, NTuple) - ix += offset[1] - 1 - iy += offset[2] - 1 - iz += offset[3] - 1 - for _ in 1:10 - # if (ix > 1 && ix < size(A, 1)) && - # (iy > 1 && iy < size(A, 2)) && - # (iz > 1 && iz < size(A, 3)) - A[ix, iy, iz] = me - # end - end -end - -backend = CUDABackend() -T::DataType = Int -dims = (0, 0, 0) - -# function main(backend = CPU(), T::DataType = Float64, dims = (0, 0, 0)) -MPI.Init() - -nprocs = MPI.Comm_size(MPI.COMM_WORLD) - -dims = Tuple(MPI.Dims_create(nprocs, dims)) - -# create MPI communicator -comm = MPI.Cart_create(MPI.COMM_WORLD, dims) -me = MPI.Comm_rank(comm) -neighbors = ntuple(Val(length(dims))) do idim - MPI.Cart_shift(comm, idim-1, 1) -end -coords = Tuple(MPI.Cart_coords(comm)) -# create communicator for the node and select device -comm_node = MPI.Comm_split_type(comm, MPI.COMM_TYPE_SHARED, me) -pid = MPI.Comm_rank(comm_node) -CUDA.device!(pid) - -nx, ny, nz = 6, 6, 6 -bx, by, bz = 2, 2, 2 -A = KernelAbstractions.allocate(backend, T, nx, ny, nz) -fill!(A, -1) - -ranges = split_ndrange(A, (bx, by, bz)) - -get_recv_view(::Val{1}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? 1 : Colon(), Val(ndims(A)))...) -get_recv_view(::Val{2}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? size(A, D) : Colon(), Val(ndims(A)))...) - -get_send_view(::Val{1}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? 2 : Colon(), Val(ndims(A)))...) -get_send_view(::Val{2}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? size(A, D) - 1 : Colon(), Val(ndims(A)))...) - -exchangers = ntuple(Val(length(neighbors))) do dim - ntuple(2) do side - Exchanger(backend) do - rank = neighbors[dim][side] - if rank != -1 - recv_buf = get_recv_view(Val(side), Val(dim), A) - recv = MPI.Irecv!(recv_buf,comm;source=rank) - end - - I = 2*(dim-1) + side - - do_work!(backend, 256)(A, me, first(ranges[I]); ndrange=size(ranges[I])) - KernelAbstractions.synchronize(backend) - - if rank != -1 - send_buf = get_send_view(Val(side), Val(dim), A) - send = MPI.Isend(send_buf,comm;dest=rank) - cooperative_test!(recv) - cooperative_test!(send) - end - end - end -end - -do_work!(backend, 256)(A, me, first(ranges[end]); ndrange=size(ranges[end])) - -for dim in reverse(eachindex(neighbors)) - notify.(exchangers[dim]) - wait.(exchangers[dim]) -end - -KernelAbstractions.synchronize(backend) - -# for dim in eachindex(neighbors) -# setdone!.(exchangers[dim]) -# end - -sleep(me) -@info "me == $me" -display(A) - -MPI.Finalize() - -# return -# end - -# main() \ No newline at end of file diff --git a/scripts_future_API/exchanger2.jl b/scripts_future_API/exchanger2.jl deleted file mode 100644 index 775dec21..00000000 --- a/scripts_future_API/exchanger2.jl +++ /dev/null @@ -1,81 +0,0 @@ -using KernelAbstractions -using MPI - -using AMDGPU - -# using CUDA -# using NVTX - -include("mpi_utils.jl") -include("mpi_utils2.jl") - -@kernel function do_work!(A, me, offset) - ix, iy, iz = @index(Global, NTuple) - ix += offset[1] - 1 - iy += offset[2] - 1 - iz += offset[3] - 1 - for _ in 1:10 - # if (ix > 1 && ix < size(A, 1)) && - # (iy > 1 && iy < size(A, 2)) && - # (iz > 1 && iz < size(A, 3)) - A[ix, iy, iz] = me - # end - end -end - -function main(backend = CPU(), T::DataType = Float64, dims = (0, 0, 0)) - - # numerics - dims, comm, me, neighbors, coords, device = init_distributed(dims; init_MPI=true) - - nx, ny, nz = 6, 6, 6 - b_width = (2, 2, 2) - - # init arrays - A = KernelAbstractions.allocate(backend, T, nx, ny, nz) - fill!(A, -1) - - ranges = split_ndrange(A, b_width) - - exchangers = ntuple(Val(length(neighbors))) do _ - ntuple(_ -> Exchanger(backend, device), Val(2)) - end - - do_work!(backend, 256)(A, me, first(ranges[end]); ndrange=size(ranges[end])) - - for dim in reverse(eachindex(neighbors)) - ntuple(Val(2)) do side - rank = neighbors[dim][side] - halo = get_recv_view(Val(side), Val(dim), A) - border = get_send_view(Val(side), Val(dim), A) - range = ranges[2*(dim-1) + side] - offset, ndrange = first(range), size(range) - start_exchange(exchangers[dim][side], comm, rank, halo, border) do compute_bc - do_work!(backend, 256)(A, me, offset; ndrange) - if compute_bc - # apply_bcs!(Val(dim), fields, bcs.velocity) - end - KernelAbstractions.synchronize(backend) - end - end - wait.(exchangers[dim]) - end - KernelAbstractions.synchronize(backend) - - # for dim in eachindex(neighbors) - # setdone!.(exchangers[dim]) - # end - - sleep(2me) - @info "me == $me" - display(A) - - finalize_distributed(; finalize_MPI=true) - return -end - -backend = ROCBackend() -T::DataType = Int -dims = (0, 0, 1) - -main(backend, T, dims) \ No newline at end of file diff --git a/scripts_future_API/laplacian_3d.jl b/scripts_future_API/laplacian_3d.jl deleted file mode 100644 index e2491ead..00000000 --- a/scripts_future_API/laplacian_3d.jl +++ /dev/null @@ -1,103 +0,0 @@ -using AMDGPU -using KernelAbstractions -using MPI -using Printf -# using BenchmarkTools - -# node_id = parse(Int, ENV["SLURM_NODEID"]) -# task_id = parse(Int, ENV["SLURM_LOCALID"]) - -# println("node $node_id, gpu $task_id") - -# AMDGPU.device_id!(task_id + 1) -# dev_id = AMDGPU.device_id(AMDGPU.device()) -# @show AMDGPU.device() - -@kernel function update_H!(H2, @Const(H), dc, dt, _dx2, _dy2, _dz2) - ix, iy, iz = @index(Global, NTuple) - @inbounds H2[ix+1, iy+1, iz+1] = H[ix+1, iy+1, iz+1] + dt #= * dc * ((H[ix, iy+1, iz+1] - 2H[ix+1, iy+1, iz+1] + H[ix+2, iy+1, iz+1]) * _dx2 + - (H[ix+1, iy, iz+1] - 2H[ix+1, iy+1, iz+1] + H[ix+1, iy+2, iz+1]) * _dy2 + - (H[ix+1, iy+1, iz] - 2H[ix+1, iy+1, iz+1] + H[ix+1, iy+1, iz+2]) * _dz2) =# -end - -function update_H_roc!(H2, H, dc, dt, _dx2, _dy2, _dz2) - ix = (workgroupIdx().x - UInt32(1)) * workgroupDim().x + workitemIdx().x - iy = (workgroupIdx().y - UInt32(1)) * workgroupDim().y + workitemIdx().y - iz = (workgroupIdx().z - UInt32(1)) * workgroupDim().z + workitemIdx().z - if (ix < size(H, 1) - 2 && iy < size(H, 2) - 2 && iz < size(H, 3) - 2) - @inbounds H2[ix+1, iy+1, iz+1] = H[ix+1, iy+1, iz+1] + dt #= * dc * ((H[ix, iy+1, iz+1] - 2H[ix+1, iy+1, iz+1] + H[ix+2, iy+1, iz+1]) * _dx2 + - (H[ix+1, iy, iz+1] - 2H[ix+1, iy+1, iz+1] + H[ix+1, iy+2, iz+1]) * _dy2 + - (H[ix+1, iy+1, iz] - 2H[ix+1, iy+1, iz+1] + H[ix+1, iy+1, iz+2]) * _dz2) =# - end - return -end - -function compute_ka(backend, nx, ny, nz, H2, H, dc, dt, _dx2, _dy2, _dz2, iters) - tic = time_ns() - for _ = 1:iters - update_H!(backend, 256, (nx - 2, ny - 2, nz - 2))(H2, H, dc, dt, _dx2, _dy2, _dz2) - KernelAbstractions.synchronize(backend) - end - wtime = (time_ns() - tic) * 1e-9 - return wtime -end - -function compute_roc(nblocks, nthreads, H2, H, dc, dt, _dx2, _dy2, _dz2, iters) - tic = time_ns() - for _ = 1:iters - AMDGPU.@sync @roc gridsize=nblocks groupsize=nthreads update_H_roc!(H2, H, dc, dt, _dx2, _dy2, _dz2) - end - wtime = (time_ns() - tic) * 1e-9 - return wtime -end - -function run(backend=CPU(), dims=(0, 0, 0); nx=128, ny=128, nz=128, dtype=Float64) - MPI.Init() - nprocs = MPI.Comm_size(MPI.COMM_WORLD) - dims = Tuple(MPI.Dims_create(nprocs, dims)) - # create MPI communicator - comm = MPI.Cart_create(MPI.COMM_WORLD, dims) - me = MPI.Comm_rank(comm) - # create communicator for the node and select device - comm_node = MPI.Comm_split_type(comm, MPI.COMM_TYPE_SHARED, me) - dev_id = MPI.Comm_rank(comm_node) - @show device = AMDGPU.device_id!(dev_id + 1) - dev_id = AMDGPU.device_id(AMDGPU.device()) - # physics - dc = 1.0 - _dx2, _dy2, _dz2 = 1.0, 1.0, 1.0 - dt = 1.0 / (_dx2 * dc * 6.1) - # allocate - H = KernelAbstractions.zeros(backend, dtype, nx, ny, nz) - # benchmark - iters, warmup = 35, 5 - nthreads = (256, 1, 1) - nblocks = cld.((nx, ny, nz), nthreads) - H2 = copy(H) - KernelAbstractions.synchronize(backend) - println("Let's fucking gooooooooo!") - - # GC.gc() - # GC.enable(false) - - # compute_ka(backend, nx, ny, nz, H2, H, dc, dt, _dx2, _dy2, _dz2, warmup) - # wtime = compute_ka(backend, nx, ny, nz, H2, H, dc, dt, _dx2, _dy2, _dz2, (iters - warmup)) - - compute_roc(nblocks, nthreads, H2, H, dc, dt, _dx2, _dy2, _dz2, warmup) - wtime = compute_roc(nblocks, nthreads, H2, H, dc, dt, _dx2, _dy2, _dz2, (iters - warmup)) - - # GC.enable(true) - # GC.gc() - - # perf - A_eff = 2 / 2^30 * (nx - 2) * (ny - 2) * (nz - 2) * sizeof(Float64) - # A_eff = 2 / 2^30 * nx * ny * nz * sizeof(Float64) - wtime_it = wtime / (iters - warmup) - T_eff = A_eff / wtime_it - @printf("Executed %d steps in = %1.3e sec (@ T_eff = %1.2f GB/s - device %s) \n", (iters - warmup), wtime, round(T_eff, sigdigits=3), dev_id) - println("Done") - MPI.Finalize() - return -end - -run(ROCBackend(), (0, 0, 0); nx=1024, ny=1024, nz=1024) \ No newline at end of file diff --git a/scripts_future_API/mpi_utils.jl b/scripts_future_API/mpi_utils.jl deleted file mode 100644 index 37c20128..00000000 --- a/scripts_future_API/mpi_utils.jl +++ /dev/null @@ -1,27 +0,0 @@ -@inline subrange(nr,bw,I,::Val{1}) = 1:bw[I] -@inline subrange(nr,bw,I,::Val{2}) = (size(nr,I)-bw[I]+1):size(nr,I) -@inline subrange(nr,bw,I,::Val{3}) = (bw[I]+1):(size(nr,I)-bw[I]) - -@inline split_ndrange(ndrange,ndwidth) = split_ndrange(CartesianIndices(ndrange),ndwidth) - -function split_ndrange(ndrange::CartesianIndices{N},ndwidth::NTuple{N,<:Integer}) where N - @assert all(size(ndrange) .> ndwidth.*2) - @inline ndsubrange(I,::Val{J}) where J = ntuple(Val(N)) do idim - if idim < I - 1:size(ndrange,idim) - elseif idim == I - subrange(ndrange,ndwidth,idim,Val(J)) - else - subrange(ndrange,ndwidth,idim,Val(3)) - end - end - ndinner = ntuple(idim -> subrange(ndrange,ndwidth,idim,Val(3)), Val(N)) - return ntuple(Val(2N+1)) do i - if i == 2N+1 - ndrange[ndinner...] - else - idim,idir = divrem(i-1,2) .+ 1 - ndrange[ndsubrange(idim,Val(idir))...] - end - end -end \ No newline at end of file diff --git a/scripts_future_API/mpi_utils2.jl b/scripts_future_API/mpi_utils2.jl deleted file mode 100644 index 10121c9a..00000000 --- a/scripts_future_API/mpi_utils2.jl +++ /dev/null @@ -1,117 +0,0 @@ -# MPI -function init_distributed(dims::Tuple=(0, 0, 0); init_MPI=true) - init_MPI && MPI.Init() - nprocs = MPI.Comm_size(MPI.COMM_WORLD) - dims = Tuple(MPI.Dims_create(nprocs, dims)) - # create MPI communicator - comm = MPI.Cart_create(MPI.COMM_WORLD, dims) - me = MPI.Comm_rank(comm) - neighbors = ntuple(Val(length(dims))) do idim - MPI.Cart_shift(comm, idim-1, 1) - end - coords = Tuple(MPI.Cart_coords(comm)) - # create communicator for the node and select device - comm_node = MPI.Comm_split_type(comm, MPI.COMM_TYPE_SHARED, me) - dev_id = MPI.Comm_rank(comm_node) - # @show device = CUDA.device!(dev_id) - # @show device = AMDGPU.device_id!(dev_id + 1) - @show device = AMDGPU.device_id!(dev_id*2 + 1) - return (dims, comm, me, neighbors, coords, device) -end - -function finalize_distributed(; finalize_MPI=true) - finalize_MPI && MPI.Finalize() - return -end - -# exchanger -mutable struct Exchanger - @atomic done::Bool - channel::Channel - bottom::Base.Event - @atomic err - task::Task - - function Exchanger(backend::Backend, device) - channel = Channel() - bottom = Base.Event(true) - - this = new(false, channel, bottom, nothing) - - recv_buf = nothing - send_buf = nothing - - this.task = Threads.@spawn begin - # CUDA.device!(device) - AMDGPU.device!(device) - KernelAbstractions.priority!(backend, :high) - try - while !(@atomic this.done) - f, comm, rank, halo, border = take!(channel) - - has_neighbor = rank != MPI.PROC_NULL - compute_bc = !has_neighbor - - if isnothing(recv_buf) - recv_buf = similar(halo) - send_buf = similar(border) - end - if has_neighbor - recv = MPI.Irecv!(recv_buf, comm; source=rank) - end - f(compute_bc) - if has_neighbor - copyto!(send_buf, border) - KernelAbstractions.synchronize(backend) - send = MPI.Isend(send_buf, comm; dest=rank) - flag = false - while true - test_recv = MPI.Test(recv) - test_send = MPI.Test(send) - if test_recv && !flag - copyto!(halo, recv_buf) - flag = true - end - if test_recv && test_send break end - yield() - end - end - KernelAbstractions.synchronize(backend) - notify(bottom) - end - catch err - @show err - @atomic this.done = true - @atomic this.err = err - end - end - errormonitor(this.task) - return this - end -end - -setdone!(exc::Exchanger) = @atomic exc.done = true - -Base.isdone(exc::Exchanger) = @atomic exc.done - -function start_exchange(f, exc::Exchanger, comm, rank, halo, border) - if !(@atomic exc.done) - put!(exc.channel, (f, comm, rank, halo, border)) - else - error("notify: Exchanger is not running") - end -end - -function Base.wait(exc::Exchanger) - if !(@atomic exc.done) - wait(exc.bottom) - else - error("wait: Exchanger is not running") - end -end - -get_recv_view(::Val{1}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? 1 : Colon(), Val(ndims(A)))...) -get_recv_view(::Val{2}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? size(A, D) : Colon(), Val(ndims(A)))...) - -get_send_view(::Val{1}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? 2 : Colon(), Val(ndims(A)))...) -get_send_view(::Val{2}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? size(A, D) - 1 : Colon(), Val(ndims(A)))...) diff --git a/scripts_future_API/profileme.sh b/scripts_future_API/profileme.sh deleted file mode 100755 index dced62b6..00000000 --- a/scripts_future_API/profileme.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -source /users/lurass/scratch/setenv_lumi.sh - -# srun -N1 -n2 --ntasks-per-node=2 --gpus-per-node=2 --gpu-bind=closest profileme.sh - -# srun --cpu-bind=map_cpu:49,57,17,25,1,9,33,41 -N1 -n8 --gpus-per-node=8 profileme.sh - -# julia --project laplacian_3d.jl -julia --project bench3d.jl -# julia --project exchanger2.jl -# julia --project rocmaware.jl - -# ENABLE_JITPROFILING=1 ../../myrocprof --hsa-trace --hip-trace julia --project ./exchanger2.jl - -# export HIP_VISIBLE_DEVICES=$SLURM_LOCALID -# export HIP_VISIBLE_DEVICES="0,2,4,6" - -# ENABLE_JITPROFILING=1 rocprof --hsa-trace --hip-trace -d ./prof_out${SLURM_PROCID} -o ./prof_out${SLURM_PROCID}/results${SLURM_PROCID}.csv julia --project bench3d.jl \ No newline at end of file diff --git a/scripts_future_API/rocmaware.jl b/scripts_future_API/rocmaware.jl deleted file mode 100644 index b2387ab1..00000000 --- a/scripts_future_API/rocmaware.jl +++ /dev/null @@ -1,25 +0,0 @@ -using MPI -using AMDGPU -MPI.Init() -comm = MPI.COMM_WORLD -rank = MPI.Comm_rank(comm) -# select device -comm_l = MPI.Comm_split_type(comm, MPI.COMM_TYPE_SHARED, rank) -rank_l = MPI.Comm_rank(comm_l) -device = AMDGPU.device_id!(rank_l+1) -gpu_id = AMDGPU.device_id(AMDGPU.device()) -# select device -size = MPI.Comm_size(comm) -dst = mod(rank+1, size) -src = mod(rank-1, size) -println("rank=$rank rank_loc=$rank_l (gpu_id=$gpu_id - $device), size=$size, dst=$dst, src=$src") -N = 4 -send_mesg = ROCArray{Float64}(undef, N) -recv_mesg = ROCArray{Float64}(undef, N) -fill!(send_mesg, Float64(rank)) -fill!(recv_mesg, Float64(0.0)) -AMDGPU.synchronize() -rank==0 && println("start sending...") -MPI.Sendrecv!(send_mesg, dst, 0, recv_mesg, src, 0, comm) -println("recv_mesg on proc $rank: $recv_mesg") -rank==0 && println("done.") \ No newline at end of file diff --git a/scripts_future_API/run_stokes3D.sh b/scripts_future_API/run_stokes3D.sh new file mode 100755 index 00000000..d7ae7850 --- /dev/null +++ b/scripts_future_API/run_stokes3D.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +module load LUMI/22.08 +module load partition/G +module load rocm/5.3.3 + +export MPICH_GPU_SUPPORT_ENABLED=1 + +julia --project -O3 tm_stokes_mpi_wip.jl diff --git a/scripts_future_API/runme.sh b/scripts_future_API/runme.sh index 45ed56ab..f7c803f1 100755 --- a/scripts_future_API/runme.sh +++ b/scripts_future_API/runme.sh @@ -1,14 +1,23 @@ #!/bin/bash -module load LUMI/22.08 -module load partition/G -module load rocm/5.3.3 +source /users/lurass/scratch/setenv_lumi.sh +# module load LUMI/22.08 +# module load partition/G +# module load rocm/5.3.3 # ROCm-aware MPI set to 1, else 0 export MPICH_GPU_SUPPORT_ENABLED=1 -export IGG_ROCMAWARE_MPI=1 -# Needs to know about location of GTL lib -export LD_PRELOAD=${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so +## basic +# srun --cpu-bind=map_cpu:49,57,17,25,1,9,33,41 -N1 -n8 --gpus-per-node=8 profileme.sh -julia --project rocmaware.jl \ No newline at end of file +## optimal using only single GCD per MI250x Module +# srun --cpu-bind=map_cpu:49,17,1,33 -N1 -n1 --gpus-per-node=8 profileme.sh +# srun --cpu-bind=map_cpu:49,17,1,33 -N4 -n16 --gpus-per-node=8 profileme.sh +export ROCR_VISIBLE_DEVICES=0,2,4,6 + +# julia --project benchmark_diffusion_3D.jl +julia --project --color=yes tm_stokes_mpi_wip.jl + +# Profiling +# ENABLE_JITPROFILING=1 rocprof --hip-trace --hsa-trace -d ./prof_out${SLURM_PROCID} -o ./prof_out${SLURM_PROCID}/results${SLURM_PROCID}.csv julia --project bench3d.jl diff --git a/scripts_future_API/stokes_ms.jl b/scripts_future_API/stokes_ms.jl new file mode 100644 index 00000000..1cdf48ed --- /dev/null +++ b/scripts_future_API/stokes_ms.jl @@ -0,0 +1,151 @@ +using Printf + +using FastIce +using FastIce.Architectures +using FastIce.Grids +using FastIce.Fields +using FastIce.Utils +using FastIce.BoundaryConditions +using FastIce.Models.FullStokes.Isothermal +using FastIce.Physics +using FastIce.KernelLaunch + +const VBC = BoundaryCondition{Velocity} +const TBC = BoundaryCondition{Traction} +const SBC = BoundaryCondition{Slip} + +using LinearAlgebra +using KernelAbstractions + +# manufactured solution for the confined Stokes flow with free-slip boundaries +# helper functions +f(ξ, η) = cos(π * ξ) * (η^2 - 1)^2 - + cos(π * η) * (ξ^2 - 1)^2 +g(ξ, η) = sin(π * η) * (ξ^2 - 1) * ξ - + sin(π * ξ) * (η^2 - 1) * η +p(ξ, η) = cos(π * η) * (1 - 3 * ξ^2) * 2 - + cos(π * ξ) * (1 - 3 * η^2) * 2 +# velocity +vx(x, y, z) = sin(π * x) * f(y, z) +vy(x, y, z) = sin(π * y) * f(z, x) +vz(x, y, z) = sin(π * z) * f(x, y) +# diagonal deviatoric stress +τxx(x, y, z, η) = 2 * η * π * cos(π * x) * f(y, z) +τyy(x, y, z, η) = 2 * η * π * cos(π * y) * f(z, x) +τzz(x, y, z, η) = 2 * η * π * cos(π * z) * f(x, y) +# off-diagonal deviatoric stress +τxy(x, y, z, η) = 4 * η * cos(π * z) * g(x, y) +τxz(x, y, z, η) = 4 * η * cos(π * y) * g(z, x) +τyz(x, y, z, η) = 4 * η * cos(π * x) * g(y, z) +# forcing terms +ρgx(x, y, z, η) = -2 * η * sin(π * x) * (f(y, z) * π^2 - p(y, z)) +ρgy(x, y, z, η) = -2 * η * sin(π * y) * (f(z, x) * π^2 - p(z, x)) +ρgz(x, y, z, η) = -2 * η * sin(π * z) * (f(x, y) * π^2 - p(x, y)) + +@views function main() + backend = CUDABackend() + arch = Architecture(backend, 2) + set_device!(arch) + + outer_width = (4, 4, 4) + + # physics + η0 = 1.0 + A0 = 0.5 + + # geometry + grid = CartesianGrid(; origin=(-1.0, -1.0, -1.0), + extent=(2.0, 2.0, 2.0), + size=(256, 256, 256)) + + free_slip = SBC(0.0, 0.0, 0.0) + xface = (Vertex(), Center(), Center()) + yface = (Center(), Vertex(), Center()) + zface = (Center(), Center(), Vertex()) + + boundary_conditions = (x = (free_slip, free_slip), + y = (free_slip, free_slip), + z = (free_slip, free_slip)) + + gravity = (x=FunctionField(ρgx, grid, xface; parameters=η0), + y=FunctionField(ρgy, grid, yface; parameters=η0), + z=FunctionField(ρgz, grid, zface; parameters=η0)) + + # numerics + niter = 10maximum(size(grid)) + ncheck = maximum(size(grid)) + + # PT params + r = 0.7 + re_mech = 8π + lτ_re_m = minimum(extent(grid)) / re_mech + vdτ = minimum(spacing(grid)) / sqrt(ndims(grid) * 1.1) + θ_dτ = lτ_re_m * (r + 4 / 3) / vdτ + dτ_r = 1.0 / (θ_dτ + 1.0) + nudτ = vdτ * lτ_re_m + + iter_params = (η_rel=1e-1, + Δτ=(Pr=r / θ_dτ, τ=(xx=dτ_r, yy=dτ_r, zz=dτ_r, xy=dτ_r, xz=dτ_r, yz=dτ_r), V=(x=nudτ, y=nudτ, z=nudτ))) + + physics = (rheology=GlensLawRheology(1),) + other_fields = (A=Field(backend, grid, Center()),) + + model = IsothermalFullStokesModel(; + arch, + grid, + physics, + gravity, + boundary_conditions, + iter_params, + outer_width, + other_fields) + + set!(model.fields.Pr, 0.0) + foreach(x -> set!(x, 0.0), model.fields.τ) + foreach(x -> set!(x, 0.0), model.fields.V) + + set!(other_fields.A, A0) + set!(model.fields.η, grid, (grid, loc, I, fields) -> physics.rheology(grid, I, fields); discrete=true, parameters=(model.fields,)) + + KernelLaunch.apply_all_boundary_conditions!(arch, grid, model.boundary_conditions.stress) + KernelLaunch.apply_all_boundary_conditions!(arch, grid, model.boundary_conditions.velocity) + KernelLaunch.apply_all_boundary_conditions!(arch, grid, model.boundary_conditions.rheology) + + for iter in 1:niter + advance_iteration!(model, 0.0, 1.0) + if iter % ncheck == 0 + compute_residuals!(model) + err = (Pr = norm(model.fields.r_Pr, Inf), + Vx = norm(model.fields.r_V.x, Inf), + Vy = norm(model.fields.r_V.y, Inf), + Vz = norm(model.fields.r_V.z, Inf)) + if any(.!isfinite.(values(err))) + error("simulation failed, err = $err") + end + iter_nx = iter / maximum(size(grid)) + @printf(" iter/nx = %.1f, err = [Pr = %1.3e, Vx = %1.3e, Vy = %1.3e, Vz = %1.3e]\n", iter_nx, err...) + end + end + + Vx_m = Field(backend, grid, location(model.fields.V.x)) + Vy_m = Field(backend, grid, location(model.fields.V.y)) + Vz_m = Field(backend, grid, location(model.fields.V.z)) + + set!(Vx_m, grid, vx) + set!(Vy_m, grid, vy) + set!(Vz_m, grid, vz) + + dVx = interior(Vx_m) .- interior(model.fields.V.x) + dVy = interior(Vy_m) .- interior(model.fields.V.y) + dVz = interior(Vz_m) .- interior(model.fields.V.z) + + err = (norm(dVx, Inf) / norm(Vx_m, Inf), + norm(dVy, Inf) / norm(Vy_m, Inf), + norm(dVz, Inf) / norm(Vz_m, Inf)) + + @printf("err = [Vx: %1.3e, Vy: %1.3e, Vz: %1.3e]\n", err...) + + return +end + +main() diff --git a/scripts_future_API/submit.sh b/scripts_future_API/submit.sh new file mode 100644 index 00000000..77ff13fe --- /dev/null +++ b/scripts_future_API/submit.sh @@ -0,0 +1,18 @@ +#!/bin/bash -l +#SBATCH --job-name="FastIce3D" +#SBATCH --output=FastIce3D.%j.o +#SBATCH --error=FastIce3D.%j.e +#SBATCH --time=00:10:00 +#SBATCH --nodes=4 +#SBATCH --ntasks=16 +# #SBATCH --ntasks-per-node=8 # this somehow fails... +#SBATCH --gpus-per-node=8 +#SBATCH --partition=standard-g +#SBATCH --account project_465000557 + +# CPU_BIND="map_cpu:49,57,17,25,1,9,33,41" + +# export ROCR_VISIBLE_DEVICES=0,2,4,6 # -> done in runme.sh +CPU_BIND="map_cpu:49,17,1,33" + +srun --cpu-bind=${CPU_BIND} ./runme.sh diff --git a/scripts_future_API/submit_stokes3D.sh b/scripts_future_API/submit_stokes3D.sh new file mode 100755 index 00000000..04615068 --- /dev/null +++ b/scripts_future_API/submit_stokes3D.sh @@ -0,0 +1,15 @@ +#!/bin/bash -l +#SBATCH --job-name="FastIce3D" +#SBATCH --output=FastIce3D.%j.o +#SBATCH --error=FastIce3D.%j.e +#SBATCH --time=01:00:00 +#SBATCH --nodes=2 +#SBATCH --ntasks-per-node=8 +#SBATCH --gpus-per-node=8 +#SBATCH --partition=standard-g +#SBATCH --account project_465000557 + +# export ROCR_VISIBLE_DEVICES=0,2,4,6 +# srun --cpu-bind=map_cpu:49,17,1,33 ./run_stokes3D.sh + +srun --cpu-bind=map_cpu:49,57,17,25,1,9,33,41 ./run_stokes3D.sh diff --git a/scripts_future_API/test_stokes_ms.jl b/scripts_future_API/test_stokes_ms.jl new file mode 100644 index 00000000..2dea8982 --- /dev/null +++ b/scripts_future_API/test_stokes_ms.jl @@ -0,0 +1,125 @@ +using FastIce +using FastIce.Architectures +using FastIce.Grids +using FastIce.Fields +using FastIce.Utils +using FastIce.BoundaryConditions +using FastIce.Models.FullStokes.Isothermal +using FastIce.Physics +using FastIce.KernelLaunch + +const VBC = BoundaryCondition{Velocity} +const TBC = BoundaryCondition{Traction} +const SBC = BoundaryCondition{Slip} + +using KernelAbstractions +# using AMDGPU + +using FastIce.Distributed +using MPI + +# manufactured solution for the confined Stokes flow with free-slip boundaries +# helper functions +f(ξ, η) = cos(π * ξ) * (η^2 - 1)^2 - + cos(π * η) * (ξ^2 - 1)^2 +g(ξ, η) = sin(π * η) * (ξ^2 - 1) * ξ - + sin(π * ξ) * (η^2 - 1) * η +p(ξ, η) = cos(π * η) * (1 - 3 * ξ^2) * 2 - + cos(π * ξ) * (1 - 3 * η^2) * 2 +# velocity +vx(x, y, z) = sin(π * x) * f(y, z) +vy(x, y, z) = sin(π * y) * f(z, x) +vz(x, y, z) = sin(π * z) * f(x, y) +# diagonal deviatoric stress +τxx(x, y, z, η) = 2 * η * π * cos(π * x) * f(y, z) +τyy(x, y, z, η) = 2 * η * π * cos(π * y) * f(z, x) +τzz(x, y, z, η) = 2 * η * π * cos(π * z) * f(x, y) +# off-diagonal deviatoric stress +τxy(x, y, z, η) = 4 * η * cos(π * z) * g(x, y) +τxz(x, y, z, η) = 4 * η * cos(π * y) * g(z, x) +τyz(x, y, z, η) = 4 * η * cos(π * x) * g(y, z) +# forcing terms +ρgx(x, y, z, η) = -2 * η * sin(π * x) * (f(y, z) * π^2 - p(y, z)) +ρgy(x, y, z, η) = -2 * η * sin(π * y) * (f(z, x) * π^2 - p(z, x)) +ρgz(x, y, z, η) = -2 * η * sin(π * z) * (f(x, y) * π^2 - p(x, y)) + +function main() + MPI.Init() + # architecture + backend = CPU() + dims = (1, 1, 1) + arch = Architecture(backend, dims, MPI.COMM_WORLD) + set_device!(arch) + topo = details(arch) + # geometry + size_l = (64, 64, 64) # local grid size + size_g = global_grid_size(topo, size_l) + if global_rank(topo) == 0 + @show dimensions(topo) + @show size_g + end + grid_g = CartesianGrid(; origin=(-1.0, -1.0, -1.0), + extent=(2.0, 2.0, 2.0), + size=size_g) + + grid_l = local_grid(grid_g, topo) + # physics + η0 = 1.0 + A0 = 0.5 + # boundary conditions + free_slip = SBC(0.0, 0.0, 0.0) + boundary_conditions = (x = (free_slip, free_slip), + y = (free_slip, free_slip), + z = (free_slip, free_slip)) + gravity = (x=0.0, y=0.0, z=0.0) + physics = (rheology=GlensLawRheology(1),) + other_fields = (A=Field(backend, grid_l, Center()),) + # numerics + niter = 20maximum(size(grid_g)) + ncheck = 1maximum(size(grid_g)) + # PT params + r = 0.7 + re_mech = 5π + lτ_re_m = minimum(extent(grid_g)) / re_mech + vdτ = minimum(spacing(grid_g)) / sqrt(ndims(grid_g) * 10.1) + θ_dτ = lτ_re_m * (r + 4 / 3) / vdτ + dτ_r = 1.0 / (θ_dτ + 1.0) + nudτ = vdτ * lτ_re_m + # pack PT params + iter_params = (η_rel=1e-1, Δτ=(Pr=r / θ_dτ, τ=(xx=dτ_r, yy=dτ_r, zz=dτ_r, xy=dτ_r, xz=dτ_r, yz=dτ_r), V=(x=nudτ, y=nudτ, z=nudτ))) + # model + model = IsothermalFullStokesModel(; arch, + grid=grid_l, + physics, + gravity, + boundary_conditions, + iter_params, + other_fields) + # init + fill!(parent(model.fields.Pr), 0.0) + foreach(x -> fill!(parent(x), 0.0), model.fields.τ) + foreach(x -> fill!(parent(x), 0.0), model.fields.V) + fill!(parent(other_fields.A), A0) + set!(model.fields.η, grid_l, (grid, loc, I, fields) -> physics.rheology(grid, I, fields); discrete=true, parameters=(model.fields,)) + + KernelLaunch.apply_all_boundary_conditions!(arch, grid_l, model.boundary_conditions.stress) + KernelLaunch.apply_all_boundary_conditions!(arch, grid_l, model.boundary_conditions.velocity) + KernelLaunch.apply_all_boundary_conditions!(arch, grid_l, model.boundary_conditions.rheology) + + if global_rank(topo) == 0 + println("action") + end + + for iter in 1:niter + advance_iteration!(model, 0.0, 1.0) + if (iter % ncheck == 0) && (global_rank(topo) == 0) + println("iter/nx = $(iter/maximum(size(grid_g)))") + end + end + + KernelAbstractions.synchronize(backend) + MPI.Finalize() + return +end + +main() diff --git a/scripts_future_API/tm_stokes_mpi_wip.jl b/scripts_future_API/tm_stokes_mpi_wip.jl index 5baf8087..b2cda295 100644 --- a/scripts_future_API/tm_stokes_mpi_wip.jl +++ b/scripts_future_API/tm_stokes_mpi_wip.jl @@ -1,129 +1,273 @@ using FastIce +using FastIce.Architectures using FastIce.Grids using FastIce.Fields using FastIce.Utils using FastIce.BoundaryConditions using FastIce.Models.FullStokes.Isothermal using FastIce.Physics -using FastIce.Distributed +using FastIce.KernelLaunch -using MPI +const VBC = BoundaryCondition{Velocity} +const TBC = BoundaryCondition{Traction} +const SBC = BoundaryCondition{Slip} +using LinearAlgebra, Printf using KernelAbstractions +# using AMDGPU + +using FastIce.Distributed +using MPI + +using CairoMakie + +norm_g(A) = (sum2_l = sum(interior(A) .^ 2); sqrt(MPI.Allreduce(sum2_l, MPI.SUM, MPI.COMM_WORLD))) +max_abs_g(A) = (max_l = maximum(abs.(interior(A))); MPI.Allreduce(max_l, MPI.MAX, MPI.COMM_WORLD)) + +@views avx(A) = 0.5 .* (A[1:end-1, :, :] .+ A[2:end, :, :]) +@views avy(A) = 0.5 .* (A[:, 1:end-1, :] .+ A[:, 2:end, :]) +@views avz(A) = 0.5 .* (A[:, :, 1:end-1] .+ A[:, :, 2:end]) + +@views av_xy(A) = 0.25 .* (A[1:end-1, 1:end-1, :] .+ A[2:end, 1:end-1, :] .+ A[2:end, 2:end, :] .+ A[1:end-1, 2:end, :]) +@views av_xz(A) = 0.25 .* (A[1:end-1, :, 1:end-1] .+ A[2:end, :, 1:end-1, :] .+ A[2:end, :, 2:end, :] .+ A[1:end-1, :, 2:end]) +@views av_yz(A) = 0.25 .* (A[:, 1:end-1, 1:end-1] .+ A[:, 2:end, 1:end-1] .+ A[:, 2:end, 2:end] .+ A[:, 1:end-1, 2:end]) + +function fastice_intro(; kwargs...) + intro = raw""" + ┌──────────────────────────────────────────────────────────┐ + │ ______ __ ____ _ __ │ + │ / ____/____ _ _____ / /_ / _/_____ ___ (_)/ / │ + │ / /_ / __ `// ___// __/ / / / ___// _ \ / // / │ + │ / __/ / /_/ /(__ )/ /_ _/ / / /__ / __/_ / // / │ + │ /_/ \__,_//____/ \__//___/ \___/ \___/(_)__/ //_/ │ + │ /___/ │ + └──────────────────────────────────────────────────────────┘ + """ + printstyled(intro; kwargs...) +end + +function main(; do_visu=false, do_save=false) + MPI.Init() + + backend = CPU() + # dims = (4, 2, 2) + # dims = (4, 2, 2) + dims = (1, 1, 1) + topo = CartesianTopology(dims) + arch = Architecture(backend, topo) + set_device!(arch) + + comm = cartesian_communicator(topo) + me = global_rank(topo) # rank + + size_l = (30, 30, 30) + size_g = global_grid_size(topo, size_l) + + outer_width = (3, 3, 3) #(128, 32, 4)# + + grid_g = CartesianGrid(; origin=(-2.0, -1.0, 0.0), + extent=(4.0, 2.0, 2.0), + size=size_g) + + grid_l = local_grid(grid_g, topo) + + if me == 0 + fastice_intro(bold=true, color=:blue) + printstyled("Running FastIce.jl 🧊 \n"; bold=true, color=:blue) + printstyled(grid_g; bold=true) + end + + no_slip = VBC(0.0, 0.0, 0.0) + free_slip = SBC(0.0, 0.0, 0.0) + free_surface = TBC(0.0, 0.0, 0.0) + + boundary_conditions = (x=(free_slip, free_slip), + y=(free_slip, free_slip), + z=(no_slip, free_surface)) + + ρgx(x, y, z) = 0.25 + ρgy(x, y, z) = 0.0 + ρgz(x, y, z) = 1.0 + gravity = (x=FunctionField(ρgx, grid_l, (Vertex(), Center(), Center())), + y=FunctionField(ρgy, grid_l, (Center(), Vertex(), Center())), + z=FunctionField(ρgz, grid_l, (Center(), Center(), Vertex()))) + + # numerics + niter = 10maximum(size(grid_g)) + ncheck = 2maximum(size(grid_g)) + + r = 0.7 + re_mech = 4π + lτ_re_m = minimum(extent(grid_g)) / re_mech + vdτ = minimum(spacing(grid_g)) / sqrt(ndims(grid_g) * 1.5) + θ_dτ = lτ_re_m * (r + 4 / 3) / vdτ + dτ_r = 1.0 / (θ_dτ + 1.0) + nudτ = vdτ * lτ_re_m + + iter_params = (η_rel=1e-1, + Δτ=(Pr=r / θ_dτ, τ=(xx=dτ_r, yy=dτ_r, zz=dτ_r, xy=dτ_r, xz=dτ_r, yz=dτ_r), V=(x=nudτ, y=nudτ, z=nudτ))) + + physics = (rheology=GlensLawRheology(1),) + other_fields = (A=Field(backend, grid_l, Center()),) + + model = IsothermalFullStokesModel(; + arch, + grid=grid_l, + physics, + gravity, + boundary_conditions, + outer_width, + iter_params, + other_fields) + + (me == 0) && printstyled("Model created \n"; bold=true, color=:light_blue) + + if do_save || do_visu + if me == 0 + Pr_g = zeros(size(grid_g)) + τxx_g = zeros(size(grid_g)) + τyy_g = zeros(size(grid_g)) + τzz_g = zeros(size(grid_g)) + τxy_g = zeros(size(grid_g)) + τxz_g = zeros(size(grid_g)) + τyz_g = zeros(size(grid_g)) + Vx_g = zeros(size(grid_g)) + Vy_g = zeros(size(grid_g)) + Vz_g = zeros(size(grid_g)) + else + Pr_g = nothing + τxx_g = nothing + τyy_g = nothing + τzz_g = nothing + τxy_g = nothing + τxz_g = nothing + τyz_g = nothing + Vx_g = nothing + Vy_g = nothing + Vz_g = nothing + end + Pr_v = zeros(size(grid_l)) + τxx_v = zeros(size(grid_l)) + τyy_v = zeros(size(grid_l)) + τzz_v = zeros(size(grid_l)) + τxy_v = zeros(size(grid_l)) + τxz_v = zeros(size(grid_l)) + τyz_v = zeros(size(grid_l)) + Vx_v = zeros(size(grid_l)) + Vy_v = zeros(size(grid_l)) + Vz_v = zeros(size(grid_l)) + end + + fill!(parent(model.fields.Pr), 0.0) + foreach(x -> fill!(parent(x), 0.0), model.fields.τ) + foreach(x -> fill!(parent(x), 0.0), model.fields.V) + fill!(parent(other_fields.A), 1.0) + + set!(model.fields.η, grid_l, (grid, loc, I, fields) -> physics.rheology(grid, I, fields); discrete=true, parameters=(model.fields,)) + + KernelLaunch.apply_all_boundary_conditions!(arch, grid_l, model.boundary_conditions.stress) + KernelLaunch.apply_all_boundary_conditions!(arch, grid_l, model.boundary_conditions.velocity) + KernelLaunch.apply_all_boundary_conditions!(arch, grid_l, model.boundary_conditions.rheology) + + MPI.Barrier(comm) + + (me == 0) && printstyled("Action \n"; bold=true, color=:light_blue) + + ttot_ns = UInt64(0) + for iter in 1:niter + if iter == 10 + MPI.Barrier(comm) + ttot_ns = time_ns() + end + advance_iteration!(model, 0.0, 1.0) + if (iter % ncheck == 0) + compute_residuals!(model) + err = (Pr = max_abs_g(model.fields.r_Pr), + Vx = max_abs_g(model.fields.r_V.x), + Vy = max_abs_g(model.fields.r_V.y), + Vz = max_abs_g(model.fields.r_V.z)) + if (me == 0) + any(.!isfinite.(values(err))) && error("simulation failed, err = $err") + iter_nx = iter / maximum(size(grid_g)) + @printf(" iter/nx = %.1f, err = [Pr = %1.3e, Vx = %1.3e, Vy = %1.3e, Vz = %1.3e]\n", iter_nx, err...) + end + end + end + ttot = float(time_ns() - ttot_ns) + ttot /= (niter - 10) + + MPI.Barrier(comm) + + ttot_min = MPI.Allreduce(ttot, MPI.MIN, comm) + ttot_max = MPI.Allreduce(ttot, MPI.MAX, comm) + + if me == 0 + Teff_min = 23 * 8 * prod(size(grid_l)) / ttot_max + Teff_max = 23 * 8 * prod(size(grid_l)) / ttot_min + printstyled("Performance: T_eff [min max] = $(round(Teff_min, sigdigits=4)) $(round(Teff_max, sigdigits=4)) \n"; bold=true, color=:green) + end + + if do_save || do_visu + copyto!(Pr_v, interior(model.fields.Pr)) + copyto!(τxx_v, interior(model.fields.τ.xx)) + copyto!(τyy_v, interior(model.fields.τ.yy)) + copyto!(τzz_v, interior(model.fields.τ.zz)) + copyto!(τxy_v, av_xy(interior(model.fields.τ.xy))) + copyto!(τxz_v, av_xz(interior(model.fields.τ.xz))) + copyto!(τyz_v, av_yz(interior(model.fields.τ.yz))) + copyto!(Vx_v, avx(interior(model.fields.V.x))) + copyto!(Vy_v, avy(interior(model.fields.V.y))) + copyto!(Vz_v, avz(interior(model.fields.V.z))) + + KernelAbstractions.synchronize(backend) + + gather!(Pr_g, Pr_v, comm) + gather!(τxx_g, τxx_v, comm) + gather!(τyy_g, τyy_v, comm) + gather!(τzz_g, τzz_v, comm) + gather!(τxy_g, τxy_v, comm) + gather!(τxz_g, τxz_v, comm) + gather!(τyz_g, τyz_v, comm) + gather!(Vx_g, Vx_v, comm) + gather!(Vy_g, Vy_v, comm) + gather!(Vz_g, Vz_v, comm) + + if (me == 0) && do_visu + fig = Figure() + axs = (Pr=Axis(fig[1, 1][1, 1]; aspect=DataAspect(), xlabel="x", ylabel="z", title="Pr"), + Vx=Axis(fig[1, 2][1, 1]; aspect=DataAspect(), xlabel="x", ylabel="z", title="Vx"), + Vy=Axis(fig[2, 1][1, 1]; aspect=DataAspect(), xlabel="x", ylabel="z", title="Vy"), + Vz=Axis(fig[2, 2][1, 1]; aspect=DataAspect(), xlabel="x", ylabel="z", title="Vz")) + plt = (Pr = heatmap!(axs.Pr, xcenters(grid_g), zcenters(grid_g), Pr_g[:, size(grid_g, 2)÷2+1, :]; colormap=:turbo), + Vx = heatmap!(axs.Vx, xvertices(grid_g), zcenters(grid_g), Vx_g[:, size(grid_g, 2)÷2+1, :]; colormap=:turbo), + Vy = heatmap!(axs.Vy, xcenters(grid_g), zcenters(grid_g), Vy_g[:, size(grid_g, 2)÷2+1, :]; colormap=:turbo), + Vz = heatmap!(axs.Vz, xcenters(grid_g), zvertices(grid_g), Vz_g[:, size(grid_g, 2)÷2+1, :]; colormap=:turbo)) + Colorbar(fig[1, 1][1, 2], plt.Pr) + Colorbar(fig[1, 2][1, 2], plt.Vx) + Colorbar(fig[2, 1][1, 2], plt.Vy) + Colorbar(fig[2, 2][1, 2], plt.Vz) + save("fig.png", fig) + end + + if me == 0 && do_save + open("data.bin", "w") do io + write(io, Pr_g) + write(io, τxx_g) + write(io, τyy_g) + write(io, τzz_g) + write(io, τxy_g) + write(io, τxz_g) + write(io, τyz_g) + write(io, Vx_g) + write(io, Vy_g) + write(io, Vz_g) + end + end + end -using Printf + MPI.Finalize() -MPI.Init() - -dims = (0, 0, 0) + return +end -topology = CartesianTopology(dims) - -me = global_rank(topology) -device_id = shared_rank(topology) -comm = cartesian_communicator(topology) -dims = dimensions(topology) - -size_l = (64, 64, 64) - -size_g = global_grid_size(topology, size_l) - -# physics -ebg = 1.0 - -global_grid = CartesianGrid( - origin = (-0.5, -0.5, 0.0), - extent = ( 1.0, 1.0, 1.0), - size = size_g, -) - -grid = local_grid(global_grid, topology) - -@printf("process %d/%d on node %s, global rank %02d\n", device_id + 1, node_size(topology), node_name(topology), me + 1) - -# psh_x(x, _, _) = -x*ebg -# psh_y(_, y, _) = y*ebg - -# x_bc = BoundaryFunction(psh_x; reduce_dims=false) -# y_bc = BoundaryFunction(psh_y; reduce_dims=false) - -# boundary_conditions = ( -# west = BoundaryCondition{Velocity}(x_bc, y_bc, 0.0), -# east = BoundaryCondition{Velocity}(x_bc, y_bc, 0.0), -# south = BoundaryCondition{Velocity}(x_bc, y_bc, 0.0), -# north = BoundaryCondition{Velocity}(x_bc, y_bc, 0.0), -# bottom = BoundaryCondition{Velocity}(0.0 , 0.0 , 0.0), -# top = BoundaryCondition{Velocity}(0.0 , 0.0 , 0.0), -# ) - -# r = 0.7 -# re_mech = 10π -# lτ_re_m = minimum(extent(grid)) / re_mech -# vdτ = minimum(spacing(grid)) / sqrt(10.1) -# θ_dτ = lτ_re_m * (r + 4 / 3) / vdτ -# dτ_r = 1.0 / (θ_dτ + 1.0) -# nudτ = vdτ * lτ_re_m - -# iter_params = ( -# η_rel = 1e-1, -# Δτ = ( Pr = r / θ_dτ, τ = (xx = dτ_r, yy = dτ_r, zz = dτ_r, xy = dτ_r, xz = dτ_r, yz = dτ_r), V = (x = nudτ, y = nudτ, z = nudτ)), -# ) - -# backend = CPU() - -# physics = (rheology = GlensLawRheology(1), ) -# other_fields = ( -# A = Field(backend, grid, Center()), -# ) - -# init_incl(x, y, z, x0, y0, z0, r, Ai, Am) = ifelse((x-x0)^2 + (y-y0)^2 + (z-z0)^2 < r^2, Ai, Am) -# set!(other_fields.A, grid, init_incl; parameters = (x0 = 0.0, y0 = 0.0, z0 = 0.5, r = 0.2, Ai = 1e1, Am = 1.0)) - -# model = IsothermalFullStokesModel(; -# backend, -# grid, -# physics, -# boundary_conditions, -# iter_params, -# other_fields -# ) - -# fig = Figure(resolution=(1000,1000), fontsize=32) -# axs = ( -# Pr = Axis(fig[1,1][1,1]; aspect=DataAspect(), xlabel="x", ylabel="y", title="Pr"), -# Vx = Axis(fig[2,1][1,1]; aspect=DataAspect(), xlabel="x", ylabel="y", title="Vx"), -# Vy = Axis(fig[2,2][1,1]; aspect=DataAspect(), xlabel="x", ylabel="y", title="Vy"), -# ) - -# plt = ( -# Pr = heatmap!(axs.Pr, xcenters(grid), ycenters(grid), interior(model.fields.Pr)[:, :, size(grid,3)÷2]; colormap=:turbo), -# Vx = heatmap!(axs.Vx, xvertices(grid), ycenters(grid), interior(model.fields.V.x)[:, :, size(grid,3)÷2]; colormap=:turbo), -# Vy = heatmap!(axs.Vy, xcenters(grid), yvertices(grid), interior(model.fields.V.y)[:, :, size(grid,3)÷2]; colormap=:turbo), -# ) -# Colorbar(fig[1,1][1,2], plt.Pr) -# Colorbar(fig[2,1][1,2], plt.Vx) -# Colorbar(fig[2,2][1,2], plt.Vy) - -# set!(model.fields.Pr, 0.0) -# foreach(x -> set!(x, 0.0), model.fields.τ) -# Isothermal._apply_bcs!(model.backend, model.grid, model.fields, model.boundary_conditions.stress) - -# set!(model.fields.V.x, grid, psh_x) -# set!(model.fields.V.y, grid, psh_y) -# set!(model.fields.V.z, 0.0) -# Isothermal._apply_bcs!(model.backend, model.grid, model.fields, model.boundary_conditions.velocity) - -# set!(model.fields.η, other_fields.A) -# extrapolate!(model.fields.η) - -# for it in 1:10 -# advance_iteration!(model, 0.0, 1.0; async = false) -# if it % 10 == 0 -# plt.Pr[3][] = interior(model.fields.Pr)[:, :, size(grid,3)÷2] -# plt.Vx[3][] = interior(model.fields.V.x)[:, :, size(grid,3)÷2] -# plt.Vy[3][] = interior(model.fields.V.y)[:, :, size(grid,3)÷2] -# yield() -# end -# end - -MPI.Finalize() \ No newline at end of file +main(; do_visu=false, do_save=false) diff --git a/scripts_future_API/tm_stokes_wip.jl b/scripts_future_API/tm_stokes_wip.jl index d532e2ec..ca12b24f 100644 --- a/scripts_future_API/tm_stokes_wip.jl +++ b/scripts_future_API/tm_stokes_wip.jl @@ -1,105 +1,140 @@ using FastIce +using FastIce.Architectures using FastIce.Grids using FastIce.Fields using FastIce.Utils using FastIce.BoundaryConditions using FastIce.Models.FullStokes.Isothermal using FastIce.Physics +using FastIce.KernelLaunch -using KernelAbstractions +const VBC = BoundaryCondition{Velocity} +const TBC = BoundaryCondition{Traction} +const SBC = BoundaryCondition{Slip} -using GLMakie - -# physics -ebg = 1.0 - -grid = CartesianGrid( - origin = (-0.5, -0.5, 0.0), - extent = ( 1.0, 1.0, 1.0), - size = ( 64, 64, 64 ), -) - -psh_x(x, _, _) = -x*ebg -psh_y(_, y, _) = y*ebg - -x_bc = BoundaryFunction(psh_x; reduce_dims=false) -y_bc = BoundaryFunction(psh_y; reduce_dims=false) - -boundary_conditions = ( - west = BoundaryCondition{Velocity}(x_bc, y_bc, 0.0), - east = BoundaryCondition{Velocity}(x_bc, y_bc, 0.0), - south = BoundaryCondition{Velocity}(x_bc, y_bc, 0.0), - north = BoundaryCondition{Velocity}(x_bc, y_bc, 0.0), - bottom = BoundaryCondition{Velocity}(0.0 , 0.0 , 0.0), - top = BoundaryCondition{Velocity}(0.0 , 0.0 , 0.0), -) - -r = 0.7 -re_mech = 10π -lτ_re_m = minimum(extent(grid)) / re_mech -vdτ = minimum(spacing(grid)) / sqrt(10.1) -θ_dτ = lτ_re_m * (r + 4 / 3) / vdτ -dτ_r = 1.0 / (θ_dτ + 1.0) -nudτ = vdτ * lτ_re_m - -iter_params = ( - η_rel = 1e-1, - Δτ = ( Pr = r / θ_dτ, τ = (xx = dτ_r, yy = dτ_r, zz = dτ_r, xy = dτ_r, xz = dτ_r, yz = dτ_r), V = (x = nudτ, y = nudτ, z = nudτ)), -) - -backend = CPU() - -physics = (rheology = GlensLawRheology(1), ) -other_fields = ( - A = Field(backend, grid, Center()), -) - -init_incl(x, y, z, x0, y0, z0, r, Ai, Am) = ifelse((x-x0)^2 + (y-y0)^2 + (z-z0)^2 < r^2, Ai, Am) -set!(other_fields.A, grid, init_incl; parameters = (x0 = 0.0, y0 = 0.0, z0 = 0.5, r = 0.2, Ai = 1e1, Am = 1.0)) - -model = IsothermalFullStokesModel(; - backend, - grid, - physics, - boundary_conditions, - iter_params, - other_fields -) - -fig = Figure(resolution=(1000,1000), fontsize=32) -axs = ( - Pr = Axis(fig[1,1][1,1]; aspect=DataAspect(), xlabel="x", ylabel="y", title="Pr"), - Vx = Axis(fig[2,1][1,1]; aspect=DataAspect(), xlabel="x", ylabel="y", title="Vx"), - Vy = Axis(fig[2,2][1,1]; aspect=DataAspect(), xlabel="x", ylabel="y", title="Vy"), -) - -plt = ( - Pr = heatmap!(axs.Pr, xcenters(grid), ycenters(grid), interior(model.fields.Pr)[:, :, size(grid,3)÷2]; colormap=:turbo), - Vx = heatmap!(axs.Vx, xvertices(grid), ycenters(grid), interior(model.fields.V.x)[:, :, size(grid,3)÷2]; colormap=:turbo), - Vy = heatmap!(axs.Vy, xcenters(grid), yvertices(grid), interior(model.fields.V.y)[:, :, size(grid,3)÷2]; colormap=:turbo), -) -Colorbar(fig[1,1][1,2], plt.Pr) -Colorbar(fig[2,1][1,2], plt.Vx) -Colorbar(fig[2,2][1,2], plt.Vy) - -set!(model.fields.Pr, 0.0) -foreach(x -> set!(x, 0.0), model.fields.τ) -Isothermal._apply_bcs!(model.backend, model.grid, model.fields, model.boundary_conditions.stress) - -set!(model.fields.V.x, grid, psh_x) -set!(model.fields.V.y, grid, psh_y) -set!(model.fields.V.z, 0.0) -Isothermal._apply_bcs!(model.backend, model.grid, model.fields, model.boundary_conditions.velocity) - -set!(model.fields.η, other_fields.A) -extrapolate!(model.fields.η) - -for it in 1:100 - advance_iteration!(model, 0.0, 1.0; async = false) - if it % 10 == 0 - plt.Pr[3][] = interior(model.fields.Pr)[:, :, size(grid,3)÷2] - plt.Vx[3][] = interior(model.fields.V.x)[:, :, size(grid,3)÷2] - plt.Vy[3][] = interior(model.fields.V.y)[:, :, size(grid,3)÷2] - yield() +using LinearAlgebra, Printf +using KernelAbstractions +# using CUDA +# using AMDGPU + +using CairoMakie +# using GLMakie +# Makie.inline!(true) + +@views function main() + backend = CPU() + arch = Architecture(backend, 2) + set_device!(arch) + + # physics + ebg = 2.0 + + b_width = (16, 4, 4) #(128, 32, 4)# + + grid = CartesianGrid(; origin=(-0.5, -0.5, 0.0), + extent=(1.0, 1.0, 1.0), + size=(62, 62, 62)) + + psh_x(x, _, _) = -x * ebg + psh_y(_, y, _) = y * ebg + + x_bc = BoundaryFunction(psh_x; reduce_dims=false) + y_bc = BoundaryFunction(psh_y; reduce_dims=false) + + free_slip = SBC(0.0, 0.0, 0.0) + free_surface = TBC(0.0, 0.0, 0.0) + + boundary_conditions = (x=(VBC(x_bc, y_bc, 0.0), VBC(x_bc, y_bc, 0.0)), + y=(VBC(x_bc, y_bc, 0.0), VBC(x_bc, y_bc, 0.0)), + z=(free_slip, free_surface)) + # TODO: Add ConstantField + ρg(x, y, z) = 0.0 + gravity = (x=FunctionField(ρg, grid, (Vertex(), Center(), Center())), + y=FunctionField(ρg, grid, (Center(), Vertex(), Center())), + z=FunctionField(ρg, grid, (Center(), Center(), Vertex()))) + + # numerics + niter = 20maximum(size(grid)) + ncheck = maximum(size(grid)) + + r = 0.7 + re_mech = 10π + lτ_re_m = minimum(extent(grid)) / re_mech + vdτ = minimum(spacing(grid)) / sqrt(ndims(grid) * 1.5) + θ_dτ = lτ_re_m * (r + 4 / 3) / vdτ + dτ_r = 1.0 / (θ_dτ + 1.0) + nudτ = vdτ * lτ_re_m + + iter_params = (η_rel=1e-1, + Δτ=(Pr=r / θ_dτ, τ=(xx=dτ_r, yy=dτ_r, zz=dτ_r, xy=dτ_r, xz=dτ_r, yz=dτ_r), V=(x=nudτ, y=nudτ, z=nudτ))) + + physics = (rheology=GlensLawRheology(1),) + other_fields = (A=Field(backend, grid, Center()),) + + init_incl(x, y, z, x0, y0, z0, r, Ai, Am) = ifelse((x - x0)^2 + (y - y0)^2 + (z - z0)^2 < r^2, Ai, Am) + set!(other_fields.A, grid, init_incl; parameters=(x0=0.0, y0=0.0, z0=0.5, r=0.1, Ai=1e-1, Am=1.0)) + + model = IsothermalFullStokesModel(; + arch, + grid, + physics, + gravity, + boundary_conditions, + outer_width=b_width, + iter_params, + other_fields) + + fig = Figure() + axs = (Pr = Axis(fig[1, 1][1, 1]; aspect=DataAspect(), xlabel="x", ylabel="z", title="Pr"), + Vx = Axis(fig[1, 2][1, 1]; aspect=DataAspect(), xlabel="x", ylabel="z", title="Vx"), + Vy = Axis(fig[2, 1][1, 1]; aspect=DataAspect(), xlabel="x", ylabel="z", title="Vy"), + Vz = Axis(fig[2, 2][1, 1]; aspect=DataAspect(), xlabel="x", ylabel="z", title="Vz")) + plt = (Pr = heatmap!(axs.Pr, xcenters(grid), zcenters(grid), Array(interior(model.fields.Pr)[:, size(grid, 2)÷2+1, :]); colormap=:turbo), + Vx = heatmap!(axs.Vx, xvertices(grid), zcenters(grid), Array(interior(model.fields.V.x)[:, size(grid, 2)÷2+1, :]); colormap=:turbo), + Vy = heatmap!(axs.Vy, xcenters(grid), zcenters(grid), Array(interior(model.fields.V.y)[:, size(grid, 2)÷2+1, :]); colormap=:turbo), + Vz = heatmap!(axs.Vz, xcenters(grid), zvertices(grid), Array(interior(model.fields.V.z)[:, size(grid, 2)÷2+1, :]); colormap=:turbo)) + Colorbar(fig[1, 1][1, 2], plt.Pr) + Colorbar(fig[1, 2][1, 2], plt.Vx) + Colorbar(fig[2, 1][1, 2], plt.Vy) + Colorbar(fig[2, 2][1, 2], plt.Vz) + + fill!(parent(model.fields.Pr), 0.0) + foreach(x -> fill!(parent(x), 0.0), model.fields.τ) + + set!(model.fields.V.x, grid, psh_x) + set!(model.fields.V.y, grid, psh_y) + set!(model.fields.V.z, 0.0) + set!(model.fields.η, grid, (grid, loc, I, fields) -> physics.rheology(grid, I, fields); discrete=true, parameters=(model.fields,)) + + KernelLaunch.apply_all_boundary_conditions!(arch, grid, model.boundary_conditions.stress) + KernelLaunch.apply_all_boundary_conditions!(arch, grid, model.boundary_conditions.velocity) + KernelLaunch.apply_all_boundary_conditions!(arch, grid, model.boundary_conditions.rheology) + + display(fig) + + for iter in 1:niter + advance_iteration!(model, 0.0, 1.0) + if (iter % ncheck == 0) + compute_residuals!(model) + err = (Pr = norm(model.fields.r_Pr, Inf), + Vx = norm(model.fields.r_V.x, Inf), + Vy = norm(model.fields.r_V.y, Inf), + Vz = norm(model.fields.r_V.z, Inf)) + if any(.!isfinite.(values(err))) + error("simulation failed, err = $err") + end + iter_nx = iter / maximum(size(grid)) + @printf(" iter/nx = %.1f, err = [Pr = %1.3e, Vx = %1.3e, Vy = %1.3e, Vz = %1.3e]\n", iter_nx, err...) + plt.Pr[3][] = interior(model.fields.Pr)[:, size(grid, 2)÷2+1, :] + plt.Vx[3][] = interior(model.fields.V.x)[:, size(grid, 2)÷2+1, :] + plt.Vy[3][] = interior(model.fields.V.y)[:, size(grid, 2)÷2+1, :] + plt.Vz[3][] = interior(model.fields.V.z)[:, size(grid, 2)÷2+1, :] + # yield() + display(fig) + end end + + return end + +main() diff --git a/src/Architectures.jl b/src/Architectures.jl new file mode 100644 index 00000000..9e85210b --- /dev/null +++ b/src/Architectures.jl @@ -0,0 +1,43 @@ +module Architectures + +export Architecture + +export launch!, set_device!, get_device, set_device_and_priority!, heuristic_groupsize +export backend, device, details + +using FastIce.Grids + +using KernelAbstractions + +struct Architecture{Kind,B,D,Details} + backend::B + device::D + details::Details +end + +struct SingleDevice end + +function Architecture(backend::Backend, device_id::Integer=1) + device = get_device(backend, device_id) + return Architecture{SingleDevice,typeof(backend),typeof(device),Nothing}(backend, device, nothing) +end + +device(arch::Architecture) = arch.device +backend(arch::Architecture) = arch.backend +details(arch::Architecture) = arch.details + +set_device!(arch::Architecture) = set_device!(arch.device) + +function set_device_and_priority!(arch::Architecture, prio::Symbol) + set_device!(arch) + KernelAbstractions.priority!(arch.backend, prio) + return +end + +set_device!(::Architecture{Kind,CPU}) where {Kind} = nothing +get_device(::CPU, id::Integer) = nothing + +heuristic_groupsize(arch::Architecture, ::Val{N}) where {N} = heuristic_groupsize(arch.device, Val(N)) +heuristic_groupsize(::Architecture{Kind,CPU}, ::Val{N}) where {Kind,N} = 256 + +end diff --git a/src/BoundaryConditions/BoundaryConditions.jl b/src/BoundaryConditions/BoundaryConditions.jl new file mode 100644 index 00000000..e03a5748 --- /dev/null +++ b/src/BoundaryConditions/BoundaryConditions.jl @@ -0,0 +1,44 @@ +module BoundaryConditions + +export FieldBoundaryCondition, BoundaryConditionsBatch +export apply_boundary_conditions!, apply_all_boundary_conditions! + +export DirichletBC, HalfCell, FullCell +export ExtrapolateBC +export ContinuousBC, DiscreteBC +export BoundaryFunction, DiscreteBoundaryFunction, ContinuousBoundaryFunction + +export HideBoundaries, hide + +using FastIce.Grids +using FastIce.Fields +using FastIce.Utils +using FastIce.Architectures + +using KernelAbstractions +using Adapt + +abstract type FieldBoundaryCondition end + +include("field_boundary_conditions.jl") +include("utils.jl") +include("boundary_function.jl") +include("dirichlet_bc.jl") +include("extrapolate_bc.jl") +include("hide_boundaries.jl") + +struct BoundaryConditionsBatch{F,BC} + fields::F + conditions::BC +end + +@inline function apply_boundary_conditions!(::Val{S}, ::Val{D}, + arch::Architecture, + grid::CartesianGrid, + batch::BoundaryConditionsBatch; kwargs...) where {S,D} + apply_boundary_conditions!(Val(S), Val(D), arch, grid, batch.fields, batch.conditions; kwargs...) +end + +apply_boundary_conditions!(side, val, arch, grid, ::Nothing; kwargs...) = nothing + +end diff --git a/src/BoundaryConditions/boundary_conditions.jl b/src/BoundaryConditions/boundary_conditions.jl deleted file mode 100644 index 4b8ed13c..00000000 --- a/src/BoundaryConditions/boundary_conditions.jl +++ /dev/null @@ -1,47 +0,0 @@ -module BoundaryConditions - -export apply_bcs! - -export DirichletBC, HalfCell, FullCell -export ContinuousBC, DiscreteBC -export BoundaryFunction, DiscreteBoundaryFunction, ContinuousBoundaryFunction - -using FastIce.Grids -using FastIce.Fields -using FastIce.Utils - -using KernelAbstractions -using Adapt - -include("utils.jl") -include("boundary_function.jl") -include("dirichlet_bc.jl") - -function apply_bcs!(::Val{D}, backend, grid, fields, bcs; async=true) where {D} - sizes = ntuple(I -> remove_dim(Val(D), size(fields[I])), Val(length(fields))) - ndrange = max.(sizes...) - _apply_bcs!(backend, 256)(Val(D), grid, fields, sizes, bcs; ndrange) - async || KernelAbstractions.synchronize(backend) - return -end - -@kernel function _apply_bcs!(dim, grid, fields, sizes, bcs) - I = @index(Global, Cartesian) - ntuple(Val(length(fields))) do ifield - Base.@_inline_meta - if _checkindices(sizes[ifield], I) - _apply_bcs_lr!(dim, grid, fields[ifield], I, bcs[ifield]...) - end - end -end - -@inline function _apply_bcs_lr!(dim, grid, f, I, lbc, rbc) - loc = location(f) - _apply_bc!(Val(1), dim, grid, f, loc, I, lbc) - _apply_bc!(Val(2), dim, grid, f, loc, I, rbc) - return -end - -@inline _apply_bc!(side, dim, grid, f, loc, Ibc, ::Nothing) = nothing - -end \ No newline at end of file diff --git a/src/BoundaryConditions/boundary_function.jl b/src/BoundaryConditions/boundary_function.jl index 2ece4c03..0f38ec32 100644 --- a/src/BoundaryConditions/boundary_function.jl +++ b/src/BoundaryConditions/boundary_function.jl @@ -1,7 +1,7 @@ abstract type BoundaryFunction{F} end struct ReducedDimensions end -struct FullDimensions end +struct FullDimensions end @inline _reduce(::Type{ReducedDimensions}, I, dim) = remove_dim(dim, I) @inline _reduce(::Type{FullDimensions}, I, dim) = I @@ -21,14 +21,14 @@ end const CBF{F,P,RF} = ContinuousBoundaryFunction{F,P,RF} where {F,P,RF} const DBF{F,P,RF} = DiscreteBoundaryFunction{F,P,RF} where {F,P,RF} -const CDBF{F,P} = Union{CBF{F,P}, DBF{F,P}} where {F,P} +const CDBF{F,P} = Union{CBF{F,P},DBF{F,P}} where {F,P} @inline _params(::CDBF{F,Nothing}) where {F} = () @inline _params(cbf::CDBF{F}) where {F} = cbf.parameters @inline (bc::CBF{F,P,RF})(grid, loc, dim, I) where {F,P,RF} = bc.fun(_reduce(RF, coord(grid, loc, I), dim)..., _params(bc)...) -@inline (bc::DBF{F,P,RF})(grid, loc, dim, I) where {F,P,RF} = bc.fun(grid, loc, dim, Tuple(_reduce(RF, I, dim))..., _params(bc)...) +@inline (bc::DBF{F,P,RF})(grid, loc, dim, I) where {F,P,RF} = bc.fun(grid, loc, dim, Tuple(_reduce(RF, I, dim))..., _params(bc)...) # Create a continous or discrete boundary function # if discrete = true, the function has signature f(grid, loc, dim, inds...) @@ -36,4 +36,4 @@ const CDBF{F,P} = Union{CBF{F,P}, DBF{F,P}} where {F,P} function BoundaryFunction(fun::Function; discrete=false, parameters=nothing, reduce_dims=true) RF = reduce_dims ? ReducedDimensions : FullDimensions discrete ? DiscreteBoundaryFunction{RF}(fun, parameters) : ContinuousBoundaryFunction{RF}(fun, parameters) -end \ No newline at end of file +end diff --git a/src/BoundaryConditions/dirichlet_bc.jl b/src/BoundaryConditions/dirichlet_bc.jl index 804f953c..51b10c7f 100644 --- a/src/BoundaryConditions/dirichlet_bc.jl +++ b/src/BoundaryConditions/dirichlet_bc.jl @@ -5,13 +5,13 @@ struct HalfCell end struct FullCell end # First-kind Dirichlet boundary conditon parametrised by the gradient reconstruction kind (can be HalfCell or FullCell) -struct DirichletBC{Gradient,T} +struct DirichletBC{Gradient,T} <: FieldBoundaryCondition condition::T DirichletBC{Gradient}(condition::T) where {Gradient,T} = new{Gradient,T}(condition) end # Create a DirichletBC with a continuous or discrete boundary function -function DirichletBC{G}(fun::Function; kwargs...) where G +function DirichletBC{G}(fun::Function; kwargs...) where {G} condition = BoundaryFunction(fun; kwargs...) return DirichletBC{G}(condition) end @@ -24,9 +24,9 @@ Base.@propagate_inbounds (bc::DirichletBC{G,<:BoundaryFunction})(grid, loc, dim, Adapt.adapt_structure(to, f::DirichletBC{G,<:AbstractArray}) where {G} = DirichletBC{G}(Adapt.adapt(to, parent(f.condition))) Base.@propagate_inbounds _get_gradient(f2, bc::DirichletBC{HalfCell}, grid, loc, dim, I) = 2 * (bc(grid, loc, dim, I) - f2) -Base.@propagate_inbounds _get_gradient(f2, bc::DirichletBC{FullCell}, grid, loc, dim, I) = (bc(grid, loc, dim, I) - f2) +Base.@propagate_inbounds _get_gradient(f2, bc::DirichletBC{FullCell}, grid, loc, dim, I) = (bc(grid, loc, dim, I) - f2) -@inline function _apply_bc!(side, dim, grid, f, loc, Ibc, bc::DirichletBC) +@inline function _apply_field_boundary_condition!(side, dim, grid, f, loc, Ibc, bc::DirichletBC) I = _bc_index(dim, side, loc, size(f), Ibc) DI = _bc_offset(Val(ndims(grid)), dim, side) @inbounds f[I] = f[I+DI] + _get_gradient(f[I+DI], bc, grid, loc, dim, I) diff --git a/src/BoundaryConditions/extrapolate_bc.jl b/src/BoundaryConditions/extrapolate_bc.jl new file mode 100644 index 00000000..191022c8 --- /dev/null +++ b/src/BoundaryConditions/extrapolate_bc.jl @@ -0,0 +1,9 @@ +# Boundary conditions that extrapolates the values of the field outside of the domain +struct ExtrapolateBC <: FieldBoundaryCondition end + +@inline function _apply_field_boundary_condition!(side, dim, grid, f, loc, Ibc, ::ExtrapolateBC) + I = _bc_index(dim, side, loc, size(f), Ibc) + DI = _bc_offset(Val(ndims(grid)), dim, side) + @inbounds f[I] = 2 * f[I+DI] - f[I+2DI] + return +end diff --git a/src/BoundaryConditions/field_boundary_conditions.jl b/src/BoundaryConditions/field_boundary_conditions.jl new file mode 100644 index 00000000..ff287060 --- /dev/null +++ b/src/BoundaryConditions/field_boundary_conditions.jl @@ -0,0 +1,44 @@ +function apply_boundary_conditions!(::Val{S}, ::Val{D}, + arch::Architecture, + grid::CartesianGrid, + fields::NTuple{N,Field}, + conditions::NTuple{N,FieldBoundaryCondition}; async=true) where {S,D,N} + _validate_fields(fields, D, S) + sizes = ntuple(ifield -> remove_dim(Val(D), size(parent(fields[ifield]))), Val(N)) + halos = ntuple(ifield -> remove_dim(Val(D), halo(fields[ifield])), Val(N)) + offsets = ntuple(Val(N)) do ifield + NF = ndims(fields[ifield]) - 1 + ntuple(Val(NF)) do RD + -halos[ifield][RD][1] + end |> CartesianIndex + end + worksize = max.(sizes...) + _apply_boundary_conditions!(backend(arch), 256)(Val(S), Val(D), grid, sizes, offsets, fields, conditions; ndrange=worksize) + async || synchronize(backend(arch)) + return +end + +@kernel function _apply_boundary_conditions!(side, dim, grid, sizes, offsets, fields, conditions) + I = @index(Global, Cartesian) + # ntuple here unrolls the loop over fields + ntuple(Val(length(fields))) do ifield + Base.@_inline_meta + @inbounds if _checkindices(sizes[ifield], I) + Ibc = I + offsets[ifield] + field = fields[ifield] + condition = conditions[ifield] + _apply_field_boundary_condition!(side, dim, grid, field, location(field), Ibc, condition) + end + end +end + +_apply_field_boundary_condition!(side, dim, grid, field, loc, I, ::Nothing) = nothing + +function _validate_fields(fields::NTuple{N,Field}, dim, side) where {N} + for f in fields + if (location(f, Val(dim)) == Center()) && (halo(f, dim, side) < 1) + error("to apply boundary conditions, halo width must be at least 1") + end + end + return +end diff --git a/src/BoundaryConditions/hide_boundaries.jl b/src/BoundaryConditions/hide_boundaries.jl new file mode 100644 index 00000000..f4982bd6 --- /dev/null +++ b/src/BoundaryConditions/hide_boundaries.jl @@ -0,0 +1,32 @@ +struct HideBoundaries{N} + pipelines::NTuple{N,Tuple{Pipeline,Pipeline}} + function HideBoundaries{N}(arch::Architecture) where {N} + pre() = set_device_and_priority!(arch, :high) + pipelines = ntuple(Val(N)) do _ + return ntuple(_ -> Pipeline(; pre), Val(2)) + end + return new{N}(pipelines) + end +end + +function hide(fun::F, hb::HideBoundaries{N}, arch::Architecture, grid::CartesianGrid{N}, boundary_conditions, worksize; + outer_width=nothing) where {F,N} + inner_range, outer_ranges = split_ndrange(worksize, outer_width) + # execute inner range in a main Task with a normal priority + fun(inner_range) + for dim in N:-1:1 + ntuple(Val(2)) do side + pipe = hb.pipelines[dim][side] + range = outer_ranges[dim][side] + batch = boundary_conditions[dim][side] + # execute outer range and boundary conditions asynchronously + put!(pipe) do + fun(range) + apply_boundary_conditions!(Val(side), Val(dim), arch, grid, batch) + synchronize(backend(arch)) + end + end + wait.(hb.pipelines[dim]) # synchronize spatial dimension + end + return +end diff --git a/src/BoundaryConditions/utils.jl b/src/BoundaryConditions/utils.jl index 41262b04..e498162b 100644 --- a/src/BoundaryConditions/utils.jl +++ b/src/BoundaryConditions/utils.jl @@ -1,4 +1,3 @@ - # Similar to `checkindex`, but for the multidimensional case. Custom axes aren't supported @inline _checkindices(AI::Tuple, I::Tuple) = (I[1] <= AI[1]) && _checkindices(Base.tail(AI), Base.tail(I)) @inline _checkindices(AI::Tuple{}, I::Tuple{}) = true @@ -11,14 +10,14 @@ @inline _get_i(::Vertex) = 1 # Return 1D array index for the "left" and "right" sides of the array -@inline _index1(::Val{1}, L, sz) = _get_i(L) +@inline _index1(::Val{1}, L, sz) = _get_i(L) @inline _index1(::Val{2}, L, sz) = -_get_i(L) + sz + 1 # Cartesian array index to store the boundary condition @inline _bc_index(dim::Val{D}, side, loc, sz, Ibc) where {D} = insert_dim(dim, Ibc, _index1(side, loc[D], sz[D])) # Return 1D offset for the "left" and "right" sides of the array to compute the flux projection -@inline _offset1(::Val{1}) = 1 +@inline _offset1(::Val{1}) = 1 @inline _offset1(::Val{2}) = -1 -@inline _bc_offset(N, ::Val{D}, S) where {D} = ntuple(I -> I == D ? _offset1(S) : 0, N) |> CartesianIndex \ No newline at end of file +@inline _bc_offset(N, ::Val{D}, S) where {D} = ntuple(I -> I == D ? _offset1(S) : 0, N) |> CartesianIndex diff --git a/src/Distributed/Distributed.jl b/src/Distributed/Distributed.jl new file mode 100644 index 00000000..2a442a34 --- /dev/null +++ b/src/Distributed/Distributed.jl @@ -0,0 +1,36 @@ +""" + Distributed + +Tools for performing parallel computations in a distributed environment. +Contains tools for non-blocking halo exchange using MPI. +Implements `BoundaryCondition` API to conveniently define communication as an operation that fills halo buffers. +Together with `HideBoundaries` from the `BoundaryConditions` module enables hiding MPI communication behind computations. +""" +module Distributed + +using FastIce.Grids +using FastIce.Fields +using FastIce.Architectures +using FastIce.BoundaryConditions +import FastIce.BoundaryConditions: apply_boundary_conditions! + +using MPI +using KernelAbstractions + +export CartesianTopology +export global_rank, shared_rank, node_name, cartesian_communicator, shared_communicator, coordinates +export dimensions, global_size, node_size +export global_grid_size, local_grid +export neighbors, neighbor, has_neighbor +export gather! + +export ExchangeInfo, DistributedBoundaryConditions + +"Trait structure used as a type parameter to indicate that the Architecture is a distributed MPI Architecture." +struct DistributedMPI end + +include("topology.jl") +include("boundary_conditions.jl") +include("gather.jl") + +end diff --git a/src/Distributed/boundary_conditions.jl b/src/Distributed/boundary_conditions.jl new file mode 100644 index 00000000..d6ad1445 --- /dev/null +++ b/src/Distributed/boundary_conditions.jl @@ -0,0 +1,112 @@ +""" + ExchangeInfo + +Structure containing the information to exchange halos of one side of an N-dimensional array. +""" +mutable struct ExchangeInfo{SB,RB} + send_buffer::SB + recv_buffer::RB + send_request::MPI.Request + recv_request::MPI.Request + ExchangeInfo(send_buf, recv_buf) = new{typeof(send_buf),typeof(recv_buf)}(send_buf, recv_buf, MPI.REQUEST_NULL, MPI.REQUEST_NULL) +end + +""" + ExchangeInfo(::Val{S}, ::Val{D}, field::Field) where {S,D} + +Create `ExchangeInfo` to exchange halos on side `S` in the spatial direction `D`. +""" +function ExchangeInfo(::Val{S}, ::Val{D}, field::Field) where {S,D} + send_view = get_send_view(Val(S), Val(D), field) + recv_view = get_recv_view(Val(S), Val(D), field) + send_buffer = similar(parent(send_view), eltype(send_view), size(send_view)) + recv_buffer = similar(parent(recv_view), eltype(recv_view), size(recv_view)) + return ExchangeInfo(send_buffer, recv_buffer) +end + +""" + apply_boundary_conditions!(::Val{S}, ::Val{D}, + arch::Architecture, + grid::CartesianGrid, + fields::NTuple{N,Field}, + exchange_infos::NTuple{N,ExchangeInfo}; async=true) where {S,D,N} + +Perform a non-blocking MPI exchange for a set of fields. +""" +function apply_boundary_conditions!(::Val{S}, ::Val{D}, + arch::Architecture, + grid::CartesianGrid, + fields::NTuple{N,Field}, + exchange_infos::NTuple{N,ExchangeInfo}; async=true) where {S,D,N} + comm = cartesian_communicator(details(arch)) + nbrank = neighbor(details(arch), D, S) + + # initiate non-blocking MPI recieve and device-to-device copy to the send buffer + for idx in eachindex(fields) + info = exchange_infos[idx] + info.recv_request = MPI.Irecv!(info.recv_buffer, comm; source=nbrank) + send_view = get_send_view(Val(S), Val(D), fields[idx]) + copyto!(info.send_buffer, send_view) + end + KernelAbstractions.synchronize(backend(arch)) + + # initiate non-blocking MPI send + for idx in eachindex(fields) + info = exchange_infos[idx] + info.send_request = MPI.Isend(info.send_buffer, comm; dest=nbrank) + end + + recv_ready = falses(N) + send_ready = falses(N) + + # test send and receive requests, initiating device-to-device copy + # to the receive buffer if the receive is complete + while !(all(recv_ready) && all(send_ready)) + for idx in eachindex(fields) + info = exchange_infos[idx] + if MPI.Test(info.recv_request) && !recv_ready[idx] + recv_view = get_recv_view(Val(S), Val(D), fields[idx]) + copyto!(recv_view, info.recv_buffer) + recv_ready[idx] = true + end + send_ready[idx] = MPI.Test(info.send_request) + end + yield() + end + async || KernelAbstractions.synchronize(backend(arch)) + + return +end + +_overlap(::Vertex) = 1 +_overlap(::Center) = 0 + +get_recv_view(side::Val{S}, dim::Val{D}, f::Field) where {S,D} = get_recv_view(side, dim, parent(f), halo(f, D, S)) + +function get_send_view(side::Val{S}, dim::Val{D}, f::Field) where {S,D} + get_send_view(side, dim, parent(f), halo(f, D, S), _overlap(location(f, dim))) +end + +function get_recv_view(::Val{1}, ::Val{D}, array::AbstractArray, halo_width::Integer) where {D} + recv_range = Base.OneTo(halo_width) + indices = ntuple(I -> I == D ? recv_range : Colon(), Val(ndims(array))) + return view(array, indices...) +end + +function get_recv_view(::Val{2}, ::Val{D}, array::AbstractArray, halo_width::Integer) where {D} + recv_range = (size(array, D)-halo_width+1):size(array, D) + indices = ntuple(I -> I == D ? recv_range : Colon(), Val(ndims(array))) + return view(array, indices...) +end + +function get_send_view(::Val{1}, ::Val{D}, array::AbstractArray, halo_width::Integer, overlap::Integer) where {D} + send_range = (overlap+halo_width+1):(overlap+2halo_width) + indices = ntuple(I -> I == D ? send_range : Colon(), Val(ndims(array))) + return view(array, indices...) +end + +function get_send_view(::Val{2}, ::Val{D}, array::AbstractArray, halo_width::Integer, overlap::Integer) where {D} + send_range = (size(array, D)-overlap-2halo_width+1):(size(array, D)-overlap-halo_width) + indices = ntuple(I -> I == D ? send_range : Colon(), Val(ndims(array))) + return view(array, indices...) +end diff --git a/src/Distributed/distributed.jl b/src/Distributed/distributed.jl deleted file mode 100644 index ac045a45..00000000 --- a/src/Distributed/distributed.jl +++ /dev/null @@ -1,47 +0,0 @@ -module Distributed - -using FastIce.Architecture -using FastIce.Grids - -export CartesianTopology - -export global_rank, shared_rank, node_name, cartesian_communicator, shared_communicator - -export dimensions, global_size, node_size - -export global_grid_size, local_grid - -export split_ndrange - -using FastIce.Grids - -using MPI - -include("topology.jl") - -include("split_ndrange.jl") - -struct DistributedArchitecture{C,T,R} <: AbstractArchitecture - child_arch::C - topology::T - ranges::R -end - -device(arch::DistributedArchitecture) = device(arch.child_arch) - -function launch!(arch::DistributedArchitecture, grid::CartesianGrid, kernel::Pair{Kernel,Args}; boundary_conditions=nothing, async=true) where {Args} - fun, args = kernel - - worksize = size(grid, Vertex()) - groupsize = heuristic_groupsize(arch.child_arch) - - fun(arch.backend, groupsize)(args...; ndrange=size(arch.ranges[end]), offset=first(arch.ranges[end])) - - - isnothing(boundary_conditions) || apply_boundary_conditions!(boundary_conditions) - - async || synchronize(arch.backend) - return -end - -end \ No newline at end of file diff --git a/src/Distributed/exchanger.jl b/src/Distributed/exchanger.jl deleted file mode 100644 index 2659ed04..00000000 --- a/src/Distributed/exchanger.jl +++ /dev/null @@ -1,72 +0,0 @@ -mutable struct Exchanger - @atomic done::Bool - ch::Channel - bottom::Base.Event - task::Task - @atomic err - - function Exchanger(f::F, arch::AbstractArchitecture, comm, rank, halo, border) where F - top = Base.Event(true) - bottom = Base.Event(true) - - send_buf = similar(border) - recv_buf = similar(halo) - this = new(false, top, bottom, nothing) - - has_neighbor = rank != MPI.PROC_NULL - compute_bc = !has_neighbor - - this.task = Threads.@spawn begin - set_device!(device(arch)) - KernelAbstractions.priority!(backend(arch), :high) - try - while !(@atomic this.done) - wait(top) - if has_neighbor - recv = MPI.Irecv!(recv_buf, comm; source=rank) - end - f(compute_bc) - if has_neighbor - copyto!(send_buf, border) - send = MPI.Isend(send_buf, comm; dest=rank) - cooperative_test!(recv) - copyto!(halo, recv_buf) - cooperative_test!(send) - end - notify(bottom) - end - catch err - @show err - @atomic this.done = true - @atomic this.err = err - end - end - errormonitor(this.task) - return this - end -end - -setdone!(exc::Exchanger) = @atomic exc.done = true - -Base.isdone(exc::Exchanger) = @atomic exc.done - -function Base.notify(exc::Exchanger) - if !(@atomic exc.done) - notify(exc.top) - else - error("notify: Exchanger is not running") - end -end -function Base.wait(exc::Exchanger) - if !(@atomic exc.done) - wait(exc.bottom) - else - error("wait: Exchanger is not running") - end -end - -get_recv_view(::Val{1}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? 1 : Colon(), Val(ndims(A)))...) -get_recv_view(::Val{2}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? size(A, D) : Colon(), Val(ndims(A)))...) - -get_send_view(::Val{1}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? 2 : Colon(), Val(ndims(A)))...) -get_send_view(::Val{2}, ::Val{D}, A) where D = view(A, ntuple(I -> I == D ? size(A, D) - 1 : Colon(), Val(ndims(A)))...) diff --git a/src/Distributed/gather.jl b/src/Distributed/gather.jl new file mode 100644 index 00000000..b52ed47c --- /dev/null +++ b/src/Distributed/gather.jl @@ -0,0 +1,42 @@ +""" + gather!(dst::Union{AbstractArray{T,N},Nothing}, src::AbstractArray{T,N}, comm::MPI.Comm; root=0) where {T,N} + +Gather local array `src` into a global array `dst`. +Size of the global array `size(dst)` should be equal to the product of the size of a local array `size(src)` and the dimensions of a Cartesian communicator `comm`. +The array will be gathered on the process with id `root` (`root=0` by default). +Note that the memory for a global array should be allocated only on the process with id `root`, on other processes `dst` can be set to `nothing`. +""" +function gather!(dst::Union{AbstractArray{T,N},Nothing}, src::AbstractArray{T,N}, comm::MPI.Comm; root=0) where {T,N} + dims, _, _ = MPI.Cart_get(comm) + dims = Tuple(dims) + if MPI.Comm_rank(comm) == root + # make subtype for gather + offset = Tuple(0 for _ in 1:N) + subtype = MPI.Types.create_subarray(size(dst), size(src), offset, MPI.Datatype(eltype(dst))) + subtype = MPI.Types.create_resized(subtype, 0, size(src, 1) * Base.elsize(dst)) + MPI.Types.commit!(subtype) + # make VBuffer for collective communication + counts = fill(Cint(1), reverse(dims)) # gather one subarray from each MPI rank + displs = zeros(Cint, reverse(dims)) # reverse dims since MPI Cart comm is row-major + csizes = cumprod(size(src)[2:end] .* dims[1:end-1]) + strides = (1, csizes...) + for I in CartesianIndices(displs) + offset = reverse(Tuple(I - oneunit(I))) + displs[I] = sum(offset .* strides) + end + recvbuf = MPI.VBuffer(dst, vec(counts), vec(displs), subtype) + MPI.Gatherv!(src, recvbuf, comm; root) + else + MPI.Gatherv!(src, nothing, comm; root) + end + return +end + +""" + gather!(arch::Architecture{DistributedMPI}, dst, src::Field; kwargs...) + +Gather the interior of a field `src` into a global array `dst`. +""" +function gather!(arch::Architecture{DistributedMPI}, dst, src::Field; kwargs...) + gather!(dst, interior(src), cartesian_communicator(details(arch)); kwargs...) +end diff --git a/src/Distributed/split_ndrange.jl b/src/Distributed/split_ndrange.jl deleted file mode 100644 index 37c20128..00000000 --- a/src/Distributed/split_ndrange.jl +++ /dev/null @@ -1,27 +0,0 @@ -@inline subrange(nr,bw,I,::Val{1}) = 1:bw[I] -@inline subrange(nr,bw,I,::Val{2}) = (size(nr,I)-bw[I]+1):size(nr,I) -@inline subrange(nr,bw,I,::Val{3}) = (bw[I]+1):(size(nr,I)-bw[I]) - -@inline split_ndrange(ndrange,ndwidth) = split_ndrange(CartesianIndices(ndrange),ndwidth) - -function split_ndrange(ndrange::CartesianIndices{N},ndwidth::NTuple{N,<:Integer}) where N - @assert all(size(ndrange) .> ndwidth.*2) - @inline ndsubrange(I,::Val{J}) where J = ntuple(Val(N)) do idim - if idim < I - 1:size(ndrange,idim) - elseif idim == I - subrange(ndrange,ndwidth,idim,Val(J)) - else - subrange(ndrange,ndwidth,idim,Val(3)) - end - end - ndinner = ntuple(idim -> subrange(ndrange,ndwidth,idim,Val(3)), Val(N)) - return ntuple(Val(2N+1)) do i - if i == 2N+1 - ndrange[ndinner...] - else - idim,idir = divrem(i-1,2) .+ 1 - ndrange[ndsubrange(idim,Val(idir))...] - end - end -end \ No newline at end of file diff --git a/src/Distributed/topology.jl b/src/Distributed/topology.jl index e5a0c81c..f4a34a88 100644 --- a/src/Distributed/topology.jl +++ b/src/Distributed/topology.jl @@ -1,3 +1,8 @@ +""" + CartesianTopology + +Represents N-dimensional Cartesian topology of distributed processes. +""" struct CartesianTopology{N} nprocs::Int dims::NTuple{N,Int} @@ -11,48 +16,147 @@ struct CartesianTopology{N} node_name::String end -function CartesianTopology(dims::NTuple{N,Int}; comm = MPI.COMM_WORLD) where {N} - nprocs = MPI.Comm_size(comm) - dims = Tuple(MPI.Dims_create(nprocs, dims)) - cart_comm = MPI.Cart_create(MPI.COMM_WORLD, dims) +""" + CartesianTopology(dims::NTuple{N,Int}; comm=MPI.COMM_WORLD) where {N} + +Create an N-dimensional Cartesian topology with dimensions `dims` and using base MPI communicator `comm` (by default, `comm=MPI.COMM_WORLD`). +If all entries in `dims` are not equal to `0`, the product of `dims` should be equal to the total number of MPI processes `MPI.Comm_size(comm)`. +If any (or all) entries of `dims` are `0`, the dimensions in the corresponding spatial directions will be picked automatically. +""" +function CartesianTopology(dims::NTuple{N,Int}; comm=MPI.COMM_WORLD) where {N} + nprocs = MPI.Comm_size(comm) + dims = Tuple(MPI.Dims_create(nprocs, dims)) + cart_comm = MPI.Cart_create(MPI.COMM_WORLD, dims) global_rank = MPI.Comm_rank(cart_comm) shared_comm = MPI.Comm_split_type(cart_comm, MPI.COMM_TYPE_SHARED, global_rank) shared_rank = MPI.Comm_rank(shared_comm) - node_name = MPI.Get_processor_name() + node_name = MPI.Get_processor_name() cart_coords = Tuple(MPI.Cart_coords(cart_comm)) neighbors = ntuple(Val(N)) do dim - MPI.Cart_shift(cart_comm, dim-1, 1) + MPI.Cart_shift(cart_comm, dim - 1, 1) end return CartesianTopology{N}(nprocs, dims, global_rank, shared_rank, cart_coords, neighbors, comm, cart_comm, shared_comm, node_name) end +""" + global_rank(t::CartesianTopology) + +Global id of a process in a Cartesian topology. +""" global_rank(t::CartesianTopology) = t.global_rank +""" + shared_rank(t::CartesianTopology) + +Local id of a process within a single node. Can be used to set the GPU device. +""" shared_rank(t::CartesianTopology) = t.shared_rank + +""" + node_name(t::CartesianTopology) + +Name of a node according to `MPI.Get_processor_name()`. +""" node_name(t::CartesianTopology) = t.node_name + +""" + cartesian_communicator(t::CartesianTopology) + +MPI Cartesian communicator for the topology. +""" cartesian_communicator(t::CartesianTopology) = t.cart_comm +""" + shared_communicator(t::CartesianTopology) + +MPI communicator for the processes sharing the same node. +""" shared_communicator(t::CartesianTopology) = t.shared_comm + +""" + dimensions(t::CartesianTopology) + +Dimensions of the topology as NTuple. +""" dimensions(t::CartesianTopology) = t.dims +""" + coordinates(t::CartesianTopology) + +Coordinates of a current process within a Cartesian topology. +""" coordinates(t::CartesianTopology) = t.cart_coords +""" + neighbors(t::CartesianTopology) + +Neighbors of a current process. + +Returns NTuple containing process ids of the two immediate neighbors in each spatial direction, or MPI.PROC_NULL if no neighbor on a corresponding side. +""" neighbors(t::CartesianTopology) = t.neighbors +""" + neighbor(t::CartesianTopology, dim, side) + +Returns id of a neighbor process in spatial direction `dim` on the side `side`, if this neighbor exists, or MPI.PROC_NULL otherwise. +""" +neighbor(t::CartesianTopology, dim, side) = t.neighbors[dim][side] + + +""" + has_neighbor(t::CartesianTopology, dim, side) + +Returns true if there a neighbor process in spatial direction `dim` on the side `side`, or false otherwise. +""" +has_neighbor(t::CartesianTopology, dim, side) = t.neighbors[dim][side] != MPI.PROC_NULL + +""" + global_size(t::CartesianTopology) + +Total number of processes withing the topology. +""" global_size(t::CartesianTopology) = MPI.Comm_size(t.cart_comm) + +""" + node_size(t::CartesianTopology) + +Number of processes sharing the same node. +""" node_size(t::CartesianTopology) = MPI.Comm_size(t.shared_comm) -global_grid_size(t::CartesianTopology, local_size) = t.dims .* local_size +""" + global_grid_size(t::CartesianTopology, local_size) +Return the global size for a structured grid. +""" +global_grid_size(t::CartesianTopology, local_size) = t.dims .* local_size + + +""" + local_grid(g::CartesianGrid, t::CartesianTopology) + +Return a `CartesianGrid` covering the subdomain which corresponds to the current process. +""" function local_grid(g::CartesianGrid, t::CartesianTopology) local_extent = extent(g) ./ t.dims - local_size = size(g) .÷ t.dims + local_size = size(g) .÷ t.dims local_origin = origin(g) .+ local_extent .* t.cart_coords return CartesianGrid(local_origin, local_extent, local_size) -end \ No newline at end of file +end + +""" + Architecture(backend::Backend, topology::CartesianTopology) where {N} + +Create a distributed Architecture using `backend` and `topology`. For GPU backends, device will be selected automatically based on a process id within a node. +""" +function Architectures.Architecture(backend::Backend, topology::CartesianTopology) + device = get_device(backend, shared_rank(topology)+1) + return Architecture{DistributedMPI,typeof(backend),typeof(device),typeof(topology)}(backend, device, topology) +end diff --git a/src/FastIce.jl b/src/FastIce.jl index 72fb62ca..e1491a03 100644 --- a/src/FastIce.jl +++ b/src/FastIce.jl @@ -1,19 +1,18 @@ module FastIce -using KernelAbstractions - +# core modules include("Grids/Grids.jl") +include("GridOperators.jl") +include("Logging.jl") +include("Architectures.jl") +include("Fields/Fields.jl") +include("Utils/Utils.jl") +include("BoundaryConditions/BoundaryConditions.jl") +include("KernelLaunch.jl") +include("Distributed/Distributed.jl") +include("Physics.jl") -include("grid_operators.jl") -include("logging.jl") -include("fields.jl") -include("utils.jl") - -include("physics.jl") - -include("BoundaryConditions/boundary_conditions.jl") +# ice flow models include("Models/models.jl") -# include("Distributed/distributed.jl") - end # module diff --git a/src/fields.jl b/src/Fields/Fields.jl similarity index 74% rename from src/fields.jl rename to src/Fields/Fields.jl index c2deab90..47f88dd1 100644 --- a/src/fields.jl +++ b/src/Fields/Fields.jl @@ -2,13 +2,18 @@ module Fields export AbstractField export Field, interior +export FunctionField export location, data, halo, set! using Adapt using OffsetArrays using KernelAbstractions -import FastIce.Grids: Location, CartesianGrid, coord +using FastIce.Grids +using FastIce.GridOperators +using FastIce.Architectures + +import LinearAlgebra abstract type AbstractField{T,N,L} <: AbstractArray{T,N} end @@ -44,6 +49,8 @@ function Field(backend::Backend, grid::CartesianGrid, T::DataType, loc::L, halo: return Field{L}(data, halo, indices) end +Field(arch::Architecture, args...; kwargs...) = Field(backend(arch), args...; kwargs...) + expand_axis_halo(::Nothing) = (0, 0) expand_axis_halo(halo::Integer) = (halo, halo) expand_axis_halo(halo::Tuple) = (something(halo[1], 0), something(halo[2], 0)) @@ -53,8 +60,8 @@ expand_halo(::Val{N}, halo::Tuple) where {N} = ntuple(I -> expand_axis_halo(halo expand_halo(::Val{N}, halo::Integer) where {N} = ntuple(I -> (halo, halo), Val(N)) expand_halo(::Val{N}, halo::Nothing) where {N} = ntuple(I -> (0, 0), Val(N)) -expand_loc(::Val{N}, loc::NTuple{N, Location}) where N = loc -expand_loc(::Val{N}, loc::Location) where N = ntuple(_ -> loc, Val(N)) +expand_loc(::Val{N}, loc::NTuple{N,Location}) where {N} = loc +expand_loc(::Val{N}, loc::Location) where {N} = ntuple(_ -> loc, Val(N)) function Field(backend::Backend, grid::CartesianGrid, loc::L, T::DataType=eltype(grid); halo::H=nothing) where {L,H} N = ndims(grid) @@ -62,10 +69,10 @@ function Field(backend::Backend, grid::CartesianGrid, loc::L, T::DataType=eltype end Base.checkbounds(f::Field, I...) = checkbounds(f.data, I...) -Base.checkbounds(f::Field, I::Union{CartesianIndex, AbstractArray{<:CartesianIndex}}) = checkbounds(f.data, I) +Base.checkbounds(f::Field, I::Union{CartesianIndex,AbstractArray{<:CartesianIndex}}) = checkbounds(f.data, I) Base.checkbounds(::Type{Bool}, f::Field, I...) = checkbounds(Bool, f.data, I...) -Base.checkbounds(::Type{Bool}, f::Field, I::Union{CartesianIndex, AbstractArray{<:CartesianIndex}}) = checkbounds(Bool, f.data, I) +Base.checkbounds(::Type{Bool}, f::Field, I::Union{CartesianIndex,AbstractArray{<:CartesianIndex}}) = checkbounds(Bool, f.data, I) Base.size(f::Field) = length.(f.indices) Base.parent(f::Field) = parent(f.data) @@ -76,8 +83,8 @@ Base.view(f::Field, I...) = view(f.data, I...) data(f::Field) = f.data halo(f::Field) = f.halo -halo(f::Field, dim) = f.halo[dim] -halo(f::Field, dim, side) = f.halo[dim][side] +halo(f::Field, dim::Integer) = f.halo[dim] +halo(f::Field, dim::Integer, side::Integer) = f.halo[dim][side] Adapt.adapt_structure(to, f::Field{T,N,L}) where {T,N,L} = Field{L}(Adapt.adapt(to, f.data), f.halo, f.indices) @@ -91,7 +98,7 @@ Base.@propagate_inbounds Base.lastindex(f::Field, dim) = lastindex(f.data, dim) function interior_indices(f::Field) return ntuple(ndims(f)) do I - (firstindex(parent(f), I) + f.halo[I][1]):(lastindex(parent(f), I) - f.halo[I][2]) + (firstindex(parent(f), I)+f.halo[I][1]):(lastindex(parent(f), I)-f.halo[I][2]) end end @@ -103,14 +110,14 @@ set!(f::Field, other::Field) = (copy!(interior(f), interior(other)); nothing) set!(f::Field{T}, val::T) where {T<:Number} = (fill!(interior(f), val); nothing) set!(f::Field, A::AbstractArray) = (copy!(interior(f), A); nothing) -@kernel function _set_continuous!(dst, grid, loc, fun, args...) +@kernel function _set_continuous!(dst, grid, loc, fun::F, args...) where {F} I = @index(Global, Cartesian) dst[I] = fun(coord(grid, loc, I)..., args...) end -@kernel function _set_discrete!(dst, grid, loc, fun, args...) +@kernel function _set_discrete!(dst, grid, loc, fun::F, args...) where {F} I = @index(Global, Cartesian) - dst[I] = fun(grid, loc, Tuple(I)..., args...) + dst[I] = fun(grid, loc, I, args...) end function set!(f::Field{T,N}, grid::CartesianGrid{N}, fun::F; discrete=false, parameters=()) where {T,F,N} @@ -124,4 +131,15 @@ function set!(f::Field{T,N}, grid::CartesianGrid{N}, fun::F; discrete=false, par return end -end \ No newline at end of file +# grid operators +Base.@propagate_inbounds ∂(f::AbstractField, I, dim) = ∂(f, I, dim, location(f, dim)) +Base.@propagate_inbounds ∂(f::AbstractField, I, dim, ::Vertex) = ∂ᶜ(f, I, dim) +Base.@propagate_inbounds ∂(f::AbstractField, I, dim, ::Center) = ∂ᵛ(f, I, dim) + +# field norm +LinearAlgebra.norm(f::Field) = LinearAlgebra.norm(interior(f)) +LinearAlgebra.norm(f::Field, p::Real) = LinearAlgebra.norm(interior(f), p) + +include("function_field.jl") + +end diff --git a/src/Fields/function_field.jl b/src/Fields/function_field.jl new file mode 100644 index 00000000..e522583e --- /dev/null +++ b/src/Fields/function_field.jl @@ -0,0 +1,46 @@ +struct Continuous end +struct Discrete end + +struct FunctionField{T,N,L,CD,F,G,P} <: AbstractField{T,N,L} + func::F + grid::G + parameters::P + function FunctionField{CD,L}(func::F, grid::G, parameters::P) where {CD,L,F,G,P} + N = ndims(grid) + T = eltype(grid) + return new{T,N,L,CD,F,G,P}(func, grid, parameters) + end +end + +function FunctionField(func::F, grid::G, loc; discrete=false, parameters=nothing) where {F,G} + loc = expand_loc(Val(ndims(grid)), loc) + L = typeof(loc) + CD = discrete ? Discrete : Continuous + return FunctionField{CD,L}(func, grid, parameters) +end + +Base.size(f::FunctionField) = size(f.grid, location(f)) + +@inline func_type(::FunctionField{T,N,L,CD}) where {T,N,L,CD} = CD + +@inline _params(::Nothing) = () +@inline _params(p) = p + +Base.@propagate_inbounds function call_func(func::F, grid, loc, I, params, ::Type{Continuous}) where {F} + func(coord(grid, loc, I)..., params...) +end + +Base.@propagate_inbounds function call_func(func::F, grid, loc, I, params, ::Type{Discrete}) where {F} + func(grid, loc, I, params...) +end + +Base.@propagate_inbounds Base.getindex(f::FunctionField{T,N}, inds::Vararg{Integer,N}) where {T,N} = getindex(f, CartesianIndex(inds)) +Base.@propagate_inbounds function Base.getindex(f::FunctionField{T,N}, I::CartesianIndex{N}) where {T,N} + call_func(f.func, f.grid, location(f), I, _params(f.parameters), func_type(f)) +end + +function Adapt.adapt_structure(to, f::FunctionField{T,N,L,CD}) where {T,N,L,CD} + FunctionField{CD,L}(Adapt.adapt(to, f.func), + Adapt.adapt(to, f.grid), + Adapt.adapt(to, f.parameters)) +end diff --git a/src/grid_operators.jl b/src/GridOperators.jl similarity index 91% rename from src/grid_operators.jl rename to src/GridOperators.jl index 67b1d7c7..183fb1c1 100644 --- a/src/grid_operators.jl +++ b/src/GridOperators.jl @@ -7,7 +7,9 @@ export ∂ᶜx, ∂ᶜy, ∂ᶜz, avᶜx, avᶜy, avᶜz, avᶜxy, avᶜxz, av import Base.@propagate_inbounds -Base.@assume_effects :foldable δ(op, I::CartesianIndex{N}, ::Val{D}) where {N,D} = ntuple(i -> i == D ? op(I[i], 1) : I[i], Val(N)) |> CartesianIndex +Base.@assume_effects :foldable function δ(op, I::CartesianIndex{N}, ::Val{D}) where {N,D} + ntuple(i -> i == D ? op(I[i], 1) : I[i], Val(N)) |> CartesianIndex +end Base.@assume_effects :foldable function δ(op, I::CartesianIndex{N}, ::Val{D1}, ::Val{D2}) where {N,D1,D2} δI = ntuple(Val(N)) do i @@ -54,7 +56,7 @@ for (dim, val1, val2) in ((:xy, 1, 2), (:xz, 1, 3), (:yz, 2, 3)) @eval begin @propagate_inbounds $av(f, I) = $avl(f, I, Val($val1), Val($val2)) end - end + end end -end \ No newline at end of file +end diff --git a/src/Grids/Grids.jl b/src/Grids/Grids.jl index 0540f959..e37785fc 100644 --- a/src/Grids/Grids.jl +++ b/src/Grids/Grids.jl @@ -15,4 +15,4 @@ struct Vertex <: Location end include("discrete_axis.jl") include("cartesian_grid.jl") -end \ No newline at end of file +end diff --git a/src/Grids/cartesian_grid.jl b/src/Grids/cartesian_grid.jl index 8adc6670..3f13220a 100644 --- a/src/Grids/cartesian_grid.jl +++ b/src/Grids/cartesian_grid.jl @@ -1,11 +1,31 @@ +""" +Rectilinear grid with uniform spacing. + +# Examples + +```jldoctest +julia> grid = CartesianGrid(origin=(0.0,0.0), extent=(1.0,1.0), size=(4,4)) +2D 4×4 CartesianGrid{Float64}: + x ∈ [0.0–1.0]; Δx = 0.25 + y ∈ [0.0–1.0]; Δy = 0.25 +``` +""" struct CartesianGrid{N,T<:AbstractFloat,I<:Integer} axes::NTuple{N,DiscreteAxis{T,I}} end -CartesianGrid(origin::NTuple{N,T}, extent::NTuple{N,T}, size::NTuple{N,I}) where {N,T,I} = CartesianGrid(DiscreteAxis.(origin, extent, size)) +function CartesianGrid(origin::NTuple{N,T}, extent::NTuple{N,T}, size::NTuple{N,I}) where {N,T,I} + CartesianGrid(DiscreteAxis.(origin, extent, size)) +end + +""" + CartesianGrid(origin::NTuple{N,T}, extent::NTuple{N,T}, size::NTuple{N,I}) +Create a Cartesian grid with a specified origin (bottom-south-west corner in 3D), spatial extent, and number of grid cells. +""" CartesianGrid(; origin::NTuple{N,T}, extent::NTuple{N,T}, size::NTuple{N,I}) where {N,T,I} = CartesianGrid(origin, extent, size) +# overload methods from Base @pure Base.eltype(::CartesianGrid{N,T}) where {N,T} = T @pure Base.ndims(::CartesianGrid{N}) where {N} = N @@ -17,11 +37,49 @@ Base.size(grid::CartesianGrid) = length.(grid.axes) Base.size(grid::CartesianGrid{N}, locs::NTuple{N,Location}) where {N} = length.(grid.axes, locs) Base.size(grid::CartesianGrid, loc::Location) = ntuple(D -> length(grid.axes[D], loc), Val(ndims(grid))) +# pretty-printing +function Base.show(io::IO, grid::CartesianGrid{N,T}) where {N,T} + print(io, "$(N)D $(join(size(grid), "×")) CartesianGrid{$T}:\n") + symbols = (:x, :y, :z) + for idim in 1:N + l, r = origin(grid, idim), origin(grid, idim) + extent(grid, idim) + print(io, " $(symbols[idim]) ∈ [$(l)–$(r)]; Δ$(symbols[idim]) = $(spacing(grid, idim))\n") + end +end + +""" + axis(grid::CartesianGrid, dim::Integer) + +Return a `DiscreteAxis` corresponding to the spatial dimension `dim`. +""" axis(grid::CartesianGrid, dim::Integer) = grid.axes[dim] +""" + origin(grid::CartesianGrid) + +Return the origin of a `CartesianGrid`. The origin corresponds to bottom-south-west corner of the grid in 3D. +""" origin(grid::CartesianGrid) = origin.(grid.axes) + +""" + extent(grid::CartesianGrid) + +Return the spatial extent of a `CartesianGrid` as a tuple. +""" extent(grid::CartesianGrid) = extent.(grid.axes) + +""" + spacing(grid::CartesianGrid) + +Return the spacing of a `CartesianGrid` as a tuple. +""" spacing(grid::CartesianGrid) = spacing.(grid.axes) + +""" + Δ(grid::CartesianGrid) + +Return the spacing of a `CartesianGrid` as a tuple. Same as `spacing`. +""" Δ(grid::CartesianGrid) = spacing(grid) @propagate_inbounds origin(grid::CartesianGrid, dim::Integer) = origin(grid.axes[dim]) @@ -29,8 +87,15 @@ spacing(grid::CartesianGrid) = spacing.(grid.axes) @propagate_inbounds spacing(grid::CartesianGrid, dim::Integer) = spacing(grid.axes[dim]) @propagate_inbounds Δ(grid::CartesianGrid, dim::Integer) = spacing(grid.axes[dim]) -@propagate_inbounds coord(grid::CartesianGrid{N}, loc::Location, inds::NTuple{N}) where {N} = coord.(grid.axes, Ref(loc), inds) +""" + coord(grid::CartesianGrid{N}, loc::NTuple{N,Location}, inds::NTuple{N}) + +Return a tuple of spatial coordinates of a grid point at location `loc` and indices `inds`. + +For vertex-centered locations, first grid point is at the origin. +For cell-centered locations, first grid point at half-spacing distance from the origin. +""" @propagate_inbounds function coord(grid::CartesianGrid{N}, loc::NTuple{N,Location}, inds::NTuple{N}) where {N} ntuple(Val(N)) do I Base.@_inline_meta @@ -38,12 +103,20 @@ spacing(grid::CartesianGrid) = spacing.(grid.axes) end end +@propagate_inbounds coord(grid::CartesianGrid{N}, loc::Location, inds::NTuple{N}) where {N} = coord.(grid.axes, Ref(loc), inds) + coord(grid::CartesianGrid{N}, loc, I::CartesianIndex{N}) where {N} = coord(grid, loc, Tuple(I)) @propagate_inbounds coord(grid::CartesianGrid, loc::Location, dim::Integer, i::Integer) = coord(grid.axes[dim], loc, i) -@propagate_inbounds coord(grid::CartesianGrid{N}, loc::Location, dim::Integer, inds::NTuple{N}) where {N} = coord(grid.axes[dim], loc, inds[dim]) -@propagate_inbounds coord(grid::CartesianGrid{N}, locs::NTuple{N,Location}, dim::Integer, inds::NTuple{N}) where {N} = coord(grid.axes[dim], locs[dim], inds[dim]) -@propagate_inbounds coord(grid::CartesianGrid{N}, locs::NTuple{N,Location}, dim::Integer, i::Integer) where {N} = coord(grid.axes[dim], locs[dim], i) +@propagate_inbounds function coord(grid::CartesianGrid{N}, loc::Location, dim::Integer, inds::NTuple{N}) where {N} + coord(grid.axes[dim], loc, inds[dim]) +end +@propagate_inbounds function coord(grid::CartesianGrid{N}, locs::NTuple{N,Location}, dim::Integer, inds::NTuple{N}) where {N} + coord(grid.axes[dim], locs[dim], inds[dim]) +end +@propagate_inbounds function coord(grid::CartesianGrid{N}, locs::NTuple{N,Location}, dim::Integer, i::Integer) where {N} + coord(grid.axes[dim], locs[dim], i) +end @propagate_inbounds coord(grid::CartesianGrid, loc, ::Val{D}, i) where {D} = coord(grid, loc, D, i) @@ -111,4 +184,4 @@ end xvertices(grid::CartesianGrid; kwargs...) = vertices(grid, Val(1); kwargs...) yvertices(grid::CartesianGrid; kwargs...) = vertices(grid, Val(2); kwargs...) -zvertices(grid::CartesianGrid; kwargs...) = vertices(grid, Val(3); kwargs...) \ No newline at end of file +zvertices(grid::CartesianGrid; kwargs...) = vertices(grid, Val(3); kwargs...) diff --git a/src/Grids/discrete_axis.jl b/src/Grids/discrete_axis.jl index c1d763d9..87e1e728 100644 --- a/src/Grids/discrete_axis.jl +++ b/src/Grids/discrete_axis.jl @@ -1,3 +1,6 @@ +""" +Discretised one-dimensional interval. Grid type `CartesianGrid` is defined as a tuple of `DiscreteAxis`'s. +""" struct DiscreteAxis{T<:AbstractFloat,I<:Integer} <: AbstractRange{T} origin::T extent::T @@ -6,6 +9,8 @@ struct DiscreteAxis{T<:AbstractFloat,I<:Integer} <: AbstractRange{T} DiscreteAxis(origin::T, extent::T, len::I) where {T,I} = new{T,I}(origin, extent, extent / len, len) end +# overload methods from Base + import Base.@pure import Base.@propagate_inbounds @@ -53,4 +58,4 @@ end end centers(ax::DiscreteAxis; kwargs...) = coords(ax, Center(); kwargs...) -vertices(ax::DiscreteAxis; kwargs...) = coords(ax, Vertex(); kwargs...) \ No newline at end of file +vertices(ax::DiscreteAxis; kwargs...) = coords(ax, Vertex(); kwargs...) diff --git a/src/KernelLaunch.jl b/src/KernelLaunch.jl new file mode 100644 index 00000000..e25fa6e2 --- /dev/null +++ b/src/KernelLaunch.jl @@ -0,0 +1,87 @@ +module KernelLaunch + +using FastIce.Grids +using FastIce.Architectures +using FastIce.BoundaryConditions + +using KernelAbstractions + +export launch! + +""" + launch!(arch::Architecture, grid::CartesianGrid, kernel::Pair{K,Args}; ) where {K,Args} + +Launch a KernelAbstraction `kernel` on a `grid` using the backend from `arch`. +Either `worksize` or `location` must be provided as keyword arguments. + +# Keyword Arguments +- `worksize`: worksize of a kernel, i.e. how many grid points are included in each spatial direction. +- `location[=nothing]`: compute worksize as a size of the grid at a specified location. + If only one location is provided, e.g. `location=Vertex()`, then this location will be used for all spacial directions. +- `offset[=nothing]`: index offset for all grid indices as a `CartesianIndex`. +- `expand[=nothing]`: if provided, the worksize is increased by `2*expand`, and offset is set to `-expand`, or combined with user-provided offset. +- `hide_boundaries[=nothing]`: instance of `HideBoundaries`, that will be used to overlap boundary processing with computations at inner points of the domain. +- `outer_width[=nothing]`: if `hide_boundaries` is specified, used to determine the decomposition of the domain into inner and outer regions. +- `boundary_conditions[=nothing]`: a tuple of boundary condition batches for each side of every spatial direction. +- `async[=true]`: if set to `false`, will block the host until the kernel is finished executing. +""" +function launch!(arch::Architecture, grid::CartesianGrid, kernel::Pair{K,Args}; + worksize=nothing, + location=nothing, + offset=nothing, + expand=nothing, + boundary_conditions=nothing, + hide_boundaries=nothing, + outer_width=nothing, + async=true) where {K,Args} + fun, args = kernel + + if isnothing(location) && isnothing(worksize) + throw(ArgumentError("either grid location or worksize must me specified")) + end + + if !isnothing(location) && !isnothing(worksize) + throw(ArgumentError("either grid location or worksize must me specified, but not both")) + end + + if isnothing(worksize) + worksize = size(grid, location) + end + + if !isnothing(expand) + if !isnothing(offset) + offset -= oneunit(CartesianIndex{ndims(grid)}) + else + offset = -oneunit(CartesianIndex{ndims(grid)}) + end + worksize = worksize .+ 2 .* expand + end + + groupsize = heuristic_groupsize(arch, Val(length(worksize))) + + if isnothing(hide_boundaries) + fun(backend(arch), groupsize, worksize)(args..., offset) + isnothing(boundary_conditions) || apply_all_boundary_conditions!(arch, grid, boundary_conditions) + else + hide(hide_boundaries, arch, grid, boundary_conditions, worksize; outer_width) do indices + sub_offset, ndrange = first(indices) - oneunit(first(indices)), size(indices) + if !isnothing(offset) + sub_offset += offset + end + fun(backend(arch), groupsize)(args..., sub_offset; ndrange) + end + end + + async || KernelAbstractions.synchronize(backend(arch)) + return +end + +function apply_all_boundary_conditions!(arch::Architecture, grid::CartesianGrid{N}, boundary_conditions) where {N} + ntuple(Val(N)) do D + apply_boundary_conditions!(Val(1), Val(D), arch, grid, boundary_conditions[D][1]) + apply_boundary_conditions!(Val(2), Val(D), arch, grid, boundary_conditions[D][2]) + end + return +end + +end diff --git a/src/logging.jl b/src/Logging.jl similarity index 61% rename from src/logging.jl rename to src/Logging.jl index f3b7eebb..b2d81aa4 100644 --- a/src/logging.jl +++ b/src/Logging.jl @@ -11,14 +11,14 @@ struct MPILogger{B<:AbstractLogger} <: AbstractLogger base_logger::B end -function handle_message(l::MPILogger,args...;kwargs...) +function handle_message(l::MPILogger, args...; kwargs...) if MPI.Comm_rank(l.comm) == l.rank - handle_message(l.base_logger,args...;kwargs...) + handle_message(l.base_logger, args...; kwargs...) end end -shouldlog(l::MPILogger,args...) = (MPI.Comm_rank(l.comm) == l.rank) && shouldlog(l.base_logger,args...) +shouldlog(l::MPILogger, args...) = (MPI.Comm_rank(l.comm) == l.rank) && shouldlog(l.base_logger, args...) min_enabled_level(l::MPILogger) = min_enabled_level(l.base_logger) -end \ No newline at end of file +end diff --git a/src/Models/full_stokes/isothermal/boundary_conditions.jl b/src/Models/full_stokes/isothermal/boundary_conditions.jl index c3531cc5..4aeb6a3f 100644 --- a/src/Models/full_stokes/isothermal/boundary_conditions.jl +++ b/src/Models/full_stokes/isothermal/boundary_conditions.jl @@ -1,15 +1,18 @@ struct Traction end struct Velocity end -struct Slip end +struct Slip end -struct BoundaryCondition{Kind, N, C} +""" +Boundary condition for Stokes problem. +""" +struct BoundaryCondition{Kind,N,C} components::C - BoundaryCondition{Kind}(components::Tuple) where {Kind} = new{Kind, length(components), typeof(components)}(components) + BoundaryCondition{Kind}(components::Tuple) where {Kind} = new{Kind,length(components),typeof(components)}(components) end BoundaryCondition{Kind}(components...) where {Kind} = BoundaryCondition{Kind}(components) -const _COORDINATES = ((:x, 1), (:y, 2), (:z, 3)) +const _COORDINATES = ((:x, 1), (:y, 2), (:z, 3)) for (dim, val) in _COORDINATES td = remove_dim(Val(val), _COORDINATES) @@ -20,7 +23,7 @@ for (dim, val) in _COORDINATES N = Symbol(:τ, dim, dim) ex1 = Expr(:(=), :Pr, :(DirichletBC{HalfCell}(bc.components[$val]))) - ex2 = Expr(:(=), N, :(DirichletBC{HalfCell}(convert(eltype(bc.components[$val]), 0)))) + ex2 = Expr(:(=), N, :(DirichletBC{HalfCell}(convert(eltype(bc.components[$val]), 0)))) ex3 = ntuple(Val(length(TN))) do I Expr(:(=), TN[I], :(DirichletBC{FullCell}(bc.components[$(td[I][2])]))) end @@ -39,75 +42,110 @@ for (dim, val) in _COORDINATES ex_slip_vel = Expr(:(=), Symbol(:V, dim), :(DirichletBC{FullCell}(bc.components[$val]))) ex_slip_vel = Expr(:tuple, ex_slip_vel) - @eval begin - function extract_boundary_conditions(::Val{$val}, bc::BoundaryCondition{Traction}) - return $ex_tr, NamedTuple() - end - - function extract_boundary_conditions(::Val{$val}, bc::BoundaryCondition{Velocity}) - return NamedTuple(), $ex_vel - end - - function extract_boundary_conditions(::Val{$val}, bc::BoundaryCondition{Slip}) - return $ex_slip_tr, $ex_slip_vel - end + ex_res_vel = ntuple(length(_COORDINATES)) do I + kind = I == val ? :(FullCell) : :(HalfCell) + Expr(:(=), Symbol(:r_V, _COORDINATES[I][1]), :(DirichletBC{$kind}(0.0))) end -end - -no_bcs(names) = NamedTuple(f => nothing for f in names) -unique_names(a, b) = Tuple(unique(tuple(a..., b...))) + ex_res_vel = Expr(:tuple, ex_res_vel...) -function expand_boundary_conditions(left, right) - names = unique_names(keys(left), keys(right)) + ex_res_slip_vel = Expr(:(=), Symbol(:r_V, dim), :(DirichletBC{FullCell}(0.0))) + ex_res_slip_vel = Expr(:tuple, ex_res_slip_vel) - if isempty(names) - return nothing - end + @eval begin + extract_stress_bc(::Val{$val}, bc::BoundaryCondition{Traction}) = $ex_tr + extract_stress_bc(::Val{$val}, bc::BoundaryCondition{Velocity}) = () + extract_stress_bc(::Val{$val}, bc::BoundaryCondition{Slip}) = $ex_slip_tr - default = no_bcs(names) - left = merge(default, left) - right = merge(default, right) + extract_velocity_bc(::Val{$val}, bc::BoundaryCondition{Traction}) = () + extract_velocity_bc(::Val{$val}, bc::BoundaryCondition{Velocity}) = $ex_vel + extract_velocity_bc(::Val{$val}, bc::BoundaryCondition{Slip}) = $ex_slip_vel - return NamedTuple{names}(zip(left, right)) + extract_residuals_vel_bc(::Val{$val}, bc::BoundaryCondition{Traction}) = () + extract_residuals_vel_bc(::Val{$val}, bc::BoundaryCondition{Velocity}) = $ex_res_vel + extract_residuals_vel_bc(::Val{$val}, bc::BoundaryCondition{Slip}) = $ex_res_slip_vel + end end -function make_field_boundary_conditions(bcs) - ordering = ( - (:west , :east), - (:south , :north), - (:bottom, :top), - ) - - field_bcs = ntuple(Val(length(ordering))) do dim - left, right = bcs[ordering[dim]] - - left = extract_boundary_conditions(Val(dim), left) - right = extract_boundary_conditions(Val(dim), right) - - stress, velocity = Tuple(zip(left, right)) +function make_batch(bcs::NamedTuple, fields::NamedTuple) + batch_fields = Tuple(fields[name] for name in eachindex(bcs)) + return BoundaryConditionsBatch(batch_fields, values(bcs)) +end - stress = expand_boundary_conditions(stress...) - velocity = expand_boundary_conditions(velocity...) +function make_stress_bc(arch::Architecture{Kind}, ::CartesianGrid{N}, fields, bc) where {Kind,N} + ordering = (:x, :y, :z) + ntuple(Val(N)) do D + ntuple(Val(2)) do S + if (Kind == Distributed.DistributedMPI) && has_neighbor(details(arch), D, S) + nothing + else + new_bc = extract_stress_bc(Val(D), bc[ordering[D]][S]) + if isempty(new_bc) + nothing + else + make_batch(new_bc, fields) + end + end + end + end +end - stress, velocity +function make_velocity_bc(arch::Architecture{Kind}, ::CartesianGrid{N}, fields::NamedTuple{names}, bc) where {Kind,N,names} + ordering = (:x, :y, :z) + ntuple(Val(N)) do D + ntuple(Val(2)) do S + if (Kind == Distributed.DistributedMPI) && has_neighbor(details(arch), D, S) + new_bc = NamedTuple{names}(ExchangeInfo(Val(S), Val(D), V) for V in fields) + make_batch(new_bc, fields) + else + new_bc = extract_velocity_bc(Val(D), bc[ordering[D]][S]) + if isempty(new_bc) + nothing + else + make_batch(new_bc, fields) + end + end + end end +end - return NamedTuple{(:stress, :velocity)}(zip(field_bcs...)) +function make_residuals_bc(arch::Architecture{Kind}, ::CartesianGrid{N}, fields::NamedTuple{names}, bc) where {Kind,N,names} + ordering = (:x, :y, :z) + ntuple(Val(N)) do D + ntuple(Val(2)) do S + if (Kind == Distributed.DistributedMPI) && has_neighbor(details(arch), D, S) + nothing + else + if !(bc[ordering[D]][S] isa BoundaryCondition{Traction}) + Vn = Symbol(:r_V, ordering[D]) + BoundaryConditionsBatch((fields[Vn], ), (DirichletBC{FullCell}(0.0), )) + else + nothing + end + end + end + end end -function _apply_bcs!(backend, grid, fields, bcs) - field_map = (Pr = fields.Pr, - τxx = fields.τ.xx, τyy = fields.τ.yy, τzz = fields.τ.zz, - τxy = fields.τ.xy, τxz = fields.τ.xz, τyz = fields.τ.yz, - Vx = fields.V.x , Vy = fields.V.y , Vz = fields.V.z) - - ntuple(Val(length(bcs))) do D - if !isnothing(bcs[D]) - fs = Tuple( field_map[f] for f in eachindex(bcs[D]) ) - apply_bcs!(Val(D), backend, grid, fs, values(bcs[D])) +function make_rheology_bc(arch::Architecture{Kind}, ::CartesianGrid{N}, η) where {Kind,N} + ntuple(Val(N)) do D + ntuple(Val(2)) do S + if (Kind == Distributed.DistributedMPI) && has_neighbor(details(arch), D, S) + BoundaryConditionsBatch((η,), (ExchangeInfo(Val(S), Val(D), η),)) + else + BoundaryConditionsBatch((η,), (ExtrapolateBC(),)) + end end end +end + +function make_field_boundary_conditions(arch::Architecture, grid::CartesianGrid, fields, bc) + stress_fields = (; Pr=fields.Pr, NamedTuple{Symbol.(:τ, keys(fields.τ))}(values(fields.τ))...) + velocity_fields = NamedTuple{Symbol.(:V, keys(fields.V))}(values(fields.V)) + residual_fields = NamedTuple{Symbol.(:r_V, keys(fields.r_V))}(values(fields.r_V)) - return + return (stress = make_stress_bc(arch, grid, stress_fields, bc), + velocity = make_velocity_bc(arch, grid, velocity_fields, bc), + rheology = make_rheology_bc(arch, grid, fields.η), + residual = make_residuals_bc(arch, grid, residual_fields, bc)) end diff --git a/src/Models/full_stokes/isothermal/isothermal.jl b/src/Models/full_stokes/isothermal/isothermal.jl index 20ead3e2..f76993de 100644 --- a/src/Models/full_stokes/isothermal/isothermal.jl +++ b/src/Models/full_stokes/isothermal/isothermal.jl @@ -1,60 +1,87 @@ module Isothermal export BoundaryCondition, Traction, Velocity, Slip -export IsothermalFullStokesModel, advance_iteration!, advance_timestep! +export IsothermalFullStokesModel, advance_iteration!, advance_timestep!, compute_residuals! +using FastIce.Architectures using FastIce.Physics using FastIce.Grids using FastIce.Fields using FastIce.BoundaryConditions using FastIce.Utils +using FastIce.KernelLaunch +using FastIce.Distributed include("kernels.jl") include("boundary_conditions.jl") -function default_physics(::Type{T}) where T - return ( - equation_of_state=default(IncompressibleIceEOS{T}), - thermal_properties=default(IceThermalProperties{T}), - rheology=default(GlensLawRheology{Int64}) - ) +function default_physics(::Type{T}) where {T} + return (equation_of_state=default(IncompressibleIceEOS{T}), + thermal_properties=default(IceThermalProperties{T}), + rheology=default(GlensLawRheology{Int64})) end -struct IsothermalFullStokesModel{Backend,Grid,BC,Physics,IterParams,Fields} - backend::Backend +struct IsothermalFullStokesModel{Arch,Grid,BC,HB,OW,Physics,Gravity,IterParams,Fields} + arch::Arch grid::Grid boundary_conditions::BC + hide_boundaries::HB + outer_width::OW physics::Physics + gravity::Gravity iter_params::IterParams fields::Fields end -function make_fields_mechanics(backend, grid) - return ( - Pr = Field(backend, grid, Center(); halo=1), - τ = ( - xx = Field(backend, grid, Center(); halo=1), - yy = Field(backend, grid, Center(); halo=1), - zz = Field(backend, grid, Center(); halo=1), - xy = Field(backend, grid, (Vertex(), Vertex(), Center()); halo=0), - xz = Field(backend, grid, (Vertex(), Center(), Vertex()); halo=0), - yz = Field(backend, grid, (Center(), Vertex(), Vertex()); halo=0), - ), - V = ( - x = Field(backend, grid, (Vertex(), Center(), Center()); halo=1), - y = Field(backend, grid, (Center(), Vertex(), Center()); halo=1), - z = Field(backend, grid, (Center(), Center(), Vertex()); halo=1), - ) - ) +function make_fields_mechanics(backend, grid::CartesianGrid{2}) + return (Pr=Field(backend, grid, Center(); halo=1), + # deviatoric stress + τ=(xx=Field(backend, grid, Center(); halo=1), + yy=Field(backend, grid, Center(); halo=1), + xy=Field(backend, grid, (Vertex(), Vertex()))), + # velocity + V=(x=Field(backend, grid, (Vertex(), Center()); halo=1), + y=Field(backend, grid, (Center(), Vertex()); halo=1)), + # residual + r_Pr=Field(backend, grid, Center()), + r_V=(x=Field(backend, grid, (Vertex(), Center())), + y=Field(backend, grid, (Center(), Vertex())))) end +function make_fields_mechanics(backend, grid::CartesianGrid{3}) + return (Pr=Field(backend, grid, Center(); halo=1), + # deviatoric stress + τ=(xx=Field(backend, grid, Center(); halo=1), + yy=Field(backend, grid, Center(); halo=1), + zz=Field(backend, grid, Center(); halo=1), + xy=Field(backend, grid, (Vertex(), Vertex(), Center())), + xz=Field(backend, grid, (Vertex(), Center(), Vertex())), + yz=Field(backend, grid, (Center(), Vertex(), Vertex()))), + # velocity + V=(x=Field(backend, grid, (Vertex(), Center(), Center()); halo=1), + y=Field(backend, grid, (Center(), Vertex(), Center()); halo=1), + z=Field(backend, grid, (Center(), Center(), Vertex()); halo=1)), + # residual + r_Pr=Field(backend, grid, Center()), + r_V=(x=Field(backend, grid, (Vertex(), Center(), Center())), + y=Field(backend, grid, (Center(), Vertex(), Center())), + z=Field(backend, grid, (Center(), Center(), Vertex())))) +end - -function IsothermalFullStokesModel(; backend, grid, boundary_conditions, physics=nothing, iter_params, fields=nothing, other_fields=nothing) +function IsothermalFullStokesModel(; + arch, + grid, + boundary_conditions, + iter_params, + gravity, + outer_width=(2, 2, 2), + physics=nothing, + fields=nothing, + other_fields=nothing) if isnothing(fields) - mechanic_fields = make_fields_mechanics(backend, grid) - rheology_fields = (η = Field(backend, grid, Center(); halo=1),) + mechanic_fields = make_fields_mechanics(backend(arch), grid) + rheology_fields = (η=Field(backend(arch), grid, Center(); halo=1),) fields = merge(mechanic_fields, rheology_fields) end @@ -66,47 +93,53 @@ function IsothermalFullStokesModel(; backend, grid, boundary_conditions, physics physics = default_physics(eltype(grid)) end - boundary_conditions = make_field_boundary_conditions(boundary_conditions) + boundary_conditions = make_field_boundary_conditions(arch, grid, fields, boundary_conditions) + hide_boundaries = HideBoundaries{ndims(grid)}(arch) - return IsothermalFullStokesModel(backend, grid, boundary_conditions, physics, iter_params, fields) + return IsothermalFullStokesModel(arch, grid, boundary_conditions, hide_boundaries, outer_width, physics, gravity, iter_params, fields) end fields(model::IsothermalFullStokesModel) = model.fields grid(model::IsothermalFullStokesModel) = model.grid -function advance_iteration!(model::IsothermalFullStokesModel, t, Δt; async = true) +function advance_iteration!(model::IsothermalFullStokesModel, t, Δt) (; Pr, τ, V, η) = model.fields - (; η_rel, Δτ) = model.iter_params - η_rh = model.physics.rheology + (; η_rel, Δτ) = model.iter_params + η_rh = model.physics.rheology + ρg = model.gravity + hide_boundaries = model.hide_boundaries + outer_width = model.outer_width + Δ = NamedTuple{(:x, :y, :z)}(spacing(model.grid)) - nx, ny, nz = size(model.grid) - backend = model.backend - set_bcs!(bcs) = _apply_bcs!(model.backend, model.grid, model.fields, bcs) + launch!(model.arch, model.grid, update_σ! => (Pr, τ, V, η, Δτ, Δ); + location=Center(), expand=1, boundary_conditions=model.boundary_conditions.stress, hide_boundaries, outer_width, async=false) - # stress + launch!(model.arch, model.grid, update_V! => (V, Pr, τ, η, ρg, Δτ, model.grid, Δ); + location=Vertex(), boundary_conditions=model.boundary_conditions.velocity, hide_boundaries, outer_width, async=false) - # launch!(arch, grid, update_σ!, Pr, τ, V, η, Δτ, Δ) + # rheology + launch!(model.arch, model.grid, update_η! => (η, η_rh, η_rel, model.grid, model.fields); + location=Center(), boundary_conditions=model.boundary_conditions.rheology, hide_boundaries, outer_width, async=false) + return +end - update_σ!(backend, 256, (nx+1, ny+1, nz+1))(Pr, τ, V, η, Δτ, Δ) - set_bcs!(model.boundary_conditions.stress) - # velocity +function compute_residuals!(model::IsothermalFullStokesModel) + (; Pr, τ, V, r_Pr, r_V) = model.fields + ρg = model.gravity + boundary_conditions = model.boundary_conditions.residual - # launch!(arch, grid, (update_res_V! => (rV, V, Pr, τ, η, Δτ, Δ), update_V! => (V, rV, dt)); exchangers = exchangers.velocity, boundary_conditions = boundary_conditions.velocity) - update_V!(backend, 256, (nx+1, ny+1, nz+1))(V, Pr, τ, η, Δτ, Δ) - set_bcs!(model.boundary_conditions.velocity) - # rheology - update_η!(backend, 256, (nx, ny, nz))(η, η_rh, η_rel, model.grid, model.fields) - extrapolate!(η) + Δ = NamedTuple{(:x, :y, :z)}(spacing(model.grid)) - async || synchronize(backend) + launch!(model.arch, model.grid, compute_residuals! => (r_V, r_Pr, Pr, τ, V, ρg, model.grid, Δ); + location=Vertex(), boundary_conditions, async=false) return end function advance_timestep!(model::IsothermalFullStokesModel, t, Δt) # TODO - + return end -end \ No newline at end of file +end diff --git a/src/Models/full_stokes/isothermal/kernels.jl b/src/Models/full_stokes/isothermal/kernels.jl index ea435442..c9bb4fa8 100644 --- a/src/Models/full_stokes/isothermal/kernels.jl +++ b/src/Models/full_stokes/isothermal/kernels.jl @@ -2,14 +2,27 @@ using KernelAbstractions using FastIce.GridOperators -@kernel function update_η!(η, η_rh, χ, grid, fields, args...) +Base.@propagate_inbounds @generated function within(grid::CartesianGrid{N}, f::Field{T,N}, I::CartesianIndex{N}) where {T,N} + quote + Base.Cartesian.@nall $N i->I[i] <= size(grid, location(f, Val(i)), i) + end +end + +"Update viscosity using relaxation in log-space. Improves stability of iterative methods" +@kernel function update_η!(η, η_rh, χ, grid, fields, offset=nothing) I = @index(Global, Cartesian) - ηt = η_rh(grid, I, fields, args...) - @inbounds η[I] = exp(log(η[I]) * (1 - χ) + log(ηt) * χ) + isnothing(offset) || (I += offset) + @inbounds begin + ηt = η_rh(grid, I, fields) + η[I] = exp(log(η[I]) * (1 - χ) + log(ηt) * χ) + end end -@kernel function update_σ!(Pr, τ, V, η, Δτ, Δ) +# pseudo-transient update rules + +@kernel function update_σ!(Pr, τ, V, η, Δτ, Δ, offset=nothing) I = @index(Global, Cartesian) + isnothing(offset) || (I += offset) @inbounds if checkbounds(Bool, Pr, I) ε̇xx = ∂ᶜx(V.x, I) / Δ.x ε̇yy = ∂ᶜy(V.y, I) / Δ.y @@ -36,24 +49,83 @@ end end end -@kernel function update_V!(V, Pr, τ, η, Δτ, Δ) +@kernel function update_V!(V, Pr, τ, η, ρg, Δτ, grid, Δ, offset=nothing) + I = @index(Global, Cartesian) + isnothing(offset) || (I += offset) + @inbounds if within(grid, V.x, I) + ∂σxx_∂x = (-∂ᵛx(Pr, I) + ∂ᵛx(τ.xx, I)) / Δ.x + ∂τxy_∂y = ∂ᶜy(τ.xy, I) / Δ.y + ∂τxz_∂z = ∂ᶜz(τ.xz, I) / Δ.z + V.x[I] += (∂σxx_∂x + ∂τxy_∂y + ∂τxz_∂z - ρg.x[I]) / maxlᵛx(η, I) * Δτ.V.x + end + @inbounds if within(grid, V.y, I) + ∂σyy_∂y = (-∂ᵛy(Pr, I) + ∂ᵛy(τ.yy, I)) / Δ.y + ∂τxy_∂x = ∂ᶜx(τ.xy, I) / Δ.x + ∂τyz_∂z = ∂ᶜz(τ.yz, I) / Δ.z + V.y[I] += (∂σyy_∂y + ∂τxy_∂x + ∂τyz_∂z - ρg.y[I]) / maxlᵛy(η, I) * Δτ.V.y + end + @inbounds if within(grid, V.z, I) + ∂σzz_∂z = (-∂ᵛz(Pr, I) + ∂ᵛz(τ.zz, I)) / Δ.z + ∂τxz_∂x = ∂ᶜx(τ.xz, I) / Δ.x + ∂τyz_∂y = ∂ᶜy(τ.yz, I) / Δ.y + V.z[I] += (∂σzz_∂z + ∂τxz_∂x + ∂τyz_∂y - ρg.z[I]) / maxlᵛz(η, I) * Δτ.V.z + end +end + +# helper kernels + +@kernel function compute_τ!(τ, V, η, Δ, offset=nothing) + I = @index(Global, Cartesian) + isnothing(offset) || (I += offset) + @inbounds if checkbounds(Bool, Pr, I) + ε̇xx = ∂ᶜx(V.x, I) / Δ.x + ε̇yy = ∂ᶜy(V.y, I) / Δ.y + ε̇zz = ∂ᶜz(V.z, I) / Δ.z + ∇V = ε̇xx + ε̇yy + ε̇zz + # deviatoric diagonal + τ.xx[I] = 2.0 * η[I] * (ε̇xx - ∇V / 3.0) + τ.yy[I] = 2.0 * η[I] * (ε̇yy - ∇V / 3.0) + τ.zz[I] = 2.0 * η[I] * (ε̇zz - ∇V / 3.0) + end + @inbounds if checkbounds(Bool, τ.xy, I) + ε̇xy = 0.5 * (∂ᵛx(V.y, I) / Δ.x + ∂ᵛy(V.x, I) / Δ.y) + τ.xy[I] = 2.0 * avᵛxy(η, I) * ε̇xy + end + @inbounds if checkbounds(Bool, τ.xz, I) + ε̇xz = 0.5 * (∂ᵛx(V.z, I) / Δ.x + ∂ᵛz(V.x, I) / Δ.z) + τ.xz[I] = 2.0 * avᵛxz(η, I) * ε̇xz + end + @inbounds if checkbounds(Bool, τ.yz, I) + ε̇yz = 0.5 * (∂ᵛy(V.z, I) / Δ.y + ∂ᵛz(V.y, I) / Δ.z) + τ.yz[I] = 2.0 * avᵛyz(η, I) * ε̇yz + end +end + +@kernel function compute_residuals!(r_V, r_Pr, Pr, τ, V, ρg, grid, Δ, offset=nothing) I = @index(Global, Cartesian) - @inbounds if checkbounds(Bool, V.x, I) + isnothing(offset) || (I += offset) + @inbounds if within(grid, r_Pr, I) + ε̇xx = ∂ᶜx(V.x, I) / Δ.x + ε̇yy = ∂ᶜy(V.y, I) / Δ.y + ε̇zz = ∂ᶜz(V.z, I) / Δ.z + r_Pr[I] = ε̇xx + ε̇yy + ε̇zz + end + @inbounds if within(grid, r_V.x, I) ∂σxx_∂x = (-∂ᵛx(Pr, I) + ∂ᵛx(τ.xx, I)) / Δ.x ∂τxy_∂y = ∂ᶜy(τ.xy, I) / Δ.y ∂τxz_∂z = ∂ᶜz(τ.xz, I) / Δ.z - V.x[I] += (∂σxx_∂x + ∂τxy_∂y + ∂τxz_∂z) / maxlᵛx(η, I) * Δτ.V.x + r_V.x[I] = ∂σxx_∂x + ∂τxy_∂y + ∂τxz_∂z - ρg.x[I] end - @inbounds if checkbounds(Bool, V.y, I) + @inbounds if within(grid, r_V.y, I) ∂σyy_∂y = (-∂ᵛy(Pr, I) + ∂ᵛy(τ.yy, I)) / Δ.y ∂τxy_∂x = ∂ᶜx(τ.xy, I) / Δ.x ∂τyz_∂z = ∂ᶜz(τ.yz, I) / Δ.z - V.y[I] += (∂σyy_∂y + ∂τxy_∂x + ∂τyz_∂z) / maxlᵛy(η, I) * Δτ.V.y + r_V.y[I] = ∂σyy_∂y + ∂τxy_∂x + ∂τyz_∂z - ρg.y[I] end - @inbounds if checkbounds(Bool, V.z, I) + @inbounds if within(grid, r_V.z, I) ∂σzz_∂z = (-∂ᵛz(Pr, I) + ∂ᵛz(τ.zz, I)) / Δ.z ∂τxz_∂x = ∂ᶜx(τ.xz, I) / Δ.x ∂τyz_∂y = ∂ᶜy(τ.yz, I) / Δ.y - V.z[I] += (∂σzz_∂z + ∂τxz_∂x + ∂τyz_∂y) / maxlᵛz(η, I) * Δτ.V.z + r_V.z[I] = ∂σzz_∂z + ∂τxz_∂x + ∂τyz_∂y - ρg.z[I] end end diff --git a/src/physics.jl b/src/Physics.jl similarity index 65% rename from src/physics.jl rename to src/Physics.jl index 5797a303..af7831bd 100644 --- a/src/physics.jl +++ b/src/Physics.jl @@ -1,14 +1,11 @@ module Physics -export AbstractPhysics export IncompressibleIceEOS, IceThermalProperties -export IceRheology, GlensLawRheology, τ +export IceRheology, GlensLawRheology export default using FastIce.GridOperators -abstract type AbstractPhysics end - struct IncompressibleIceEOS{T} density::T heat_capacity::T @@ -31,10 +28,10 @@ end default(::Type{GlensLawRheology{I}}) where {I} = GlensLawRheology(convert(I, 3)) -@inline function (rh::GlensLawRheology{T})(grid, I, fields) where {T} +Base.@propagate_inbounds function (rh::GlensLawRheology{T})(grid, I, fields) where {T} (; τ, A) = fields - @inbounds τII = sqrt(0.5 * (τ.xx[I]^2 + τ.yy[I]^2 + τ.zz[I]^2) + avᶜxy(τ.xy, I)^2 + avᶜxz(τ.xz, I)^2 + avᶜyz(τ.yz, I)^2) - return 0.5 / (A[I] * τII^(rh.exponent - 1)) + τII = sqrt(0.5 * (τ.xx[I]^2 + τ.yy[I]^2 + τ.zz[I]^2) + avᶜxy(τ.xy, I)^2 + avᶜxz(τ.xz, I)^2 + avᶜyz(τ.yz, I)^2) + 0.5 / (A[I] * τII^(rh.exponent - 1)) end -end \ No newline at end of file +end diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl new file mode 100644 index 00000000..78296230 --- /dev/null +++ b/src/Utils/Utils.jl @@ -0,0 +1,36 @@ +module Utils + +export remove_dim, insert_dim +export split_ndrange +export Pipeline + +using FastIce.Fields +using FastIce.Architectures +using KernelAbstractions + +include("pipelines.jl") +include("split_ndrange.jl") + +# Returns a copy of the tuple `A` with element in position `D` removed +@inline remove_dim(::Val{D}, A::NTuple{N}) where {D,N} = ntuple(I -> I < D ? A[I] : A[I+1], Val(N - 1)) +@inline remove_dim(::Val{1}, I::NTuple{1}) = 1 + +# Same as `remove_dim`, but for `CartesianIndex` +@inline remove_dim(dim, I::CartesianIndex) = remove_dim(dim, Tuple(I)) |> CartesianIndex + +# Returns a copy of tuple `A`, but inserts `i` into position `D` +@inline insert_dim(::Val{D}, A::NTuple{N}, i) where {D,N} = + ntuple(Val(N + 1)) do I + if I < D + A[I] + elseif I == D + i + else + A[I-1] + end + end + +# Same as `insert_dim`, but for `CartesianIndex` +@inline insert_dim(dim, A::CartesianIndex, i) = insert_dim(dim, Tuple(A), i) |> CartesianIndex + +end diff --git a/src/Utils/extrapolate.jl b/src/Utils/extrapolate.jl new file mode 100644 index 00000000..1d079204 --- /dev/null +++ b/src/Utils/extrapolate.jl @@ -0,0 +1,14 @@ +function extrapolate!(A::Field; async=true) + backend = get_backend(A) + I = ntuple(Val(ndims(A))) do dim + halo(A, dim, 1) > 0 ? 0 : nothing + end + N = ntuple(Val(ndims(A))) do dim + halo(A, dim, 2) > 0 ? size(A, dim) + 1 : nothing + end + kernel_extrapolate_x!(backend, 256, (size(A, 2) + 2, size(A, 3) + 2))(A, I[1], N[1]) + kernel_extrapolate_y!(backend, 256, (size(A, 1) + 2, size(A, 3) + 2))(A, I[2], N[2]) + kernel_extrapolate_z!(backend, 256, (size(A, 1) + 2, size(A, 2) + 2))(A, I[3], N[3]) + async || KernelAbstractions.synchronize(backend) + return +end diff --git a/src/Utils/pipelines.jl b/src/Utils/pipelines.jl new file mode 100644 index 00000000..63c9333e --- /dev/null +++ b/src/Utils/pipelines.jl @@ -0,0 +1,45 @@ +mutable struct Pipeline + src::Channel + out::Base.Event + task::Task + + function Pipeline(; pre=nothing, post=nothing) + src = Channel() + out = Base.Event(true) + task = Threads.@spawn begin + isnothing(pre) || Base.invokelatest(pre) + try + for work in src + Base.invokelatest(work) + notify(out) + end + finally + isnothing(post) || Base.invokelatest(post) + end + end + errormonitor(task) + return new(src, out, task) + end +end + +function Base.close(p::Pipeline) + close(p.src) + wait(p.task) + return +end + +Base.isopen(p::Pipeline) = isopen(p.src) + +function Base.put!(work::F, p::Pipeline) where {F} + put!(p.src, work) + return +end + +function Base.wait(p::Pipeline) + if isopen(p.src) + wait(p.out) + else + error("Pipeline is not running") + end + return +end diff --git a/src/Utils/split_ndrange.jl b/src/Utils/split_ndrange.jl new file mode 100644 index 00000000..a50d017b --- /dev/null +++ b/src/Utils/split_ndrange.jl @@ -0,0 +1,26 @@ +@inline outer_subrange(nr, bw, D, ::Val{1}) = 1:bw[D] +@inline outer_subrange(nr, bw, D, ::Val{2}) = (size(nr, D)-bw[D]+1):size(nr, D) +@inline inner_subrange(nr, bw, D) = (bw[D]+1):(size(nr, D)-bw[D]) + +function ndsubrange(ndrange::CartesianIndices{N}, ndwidth, I, ::Val{S}) where {N,S} + return ntuple(Val(N)) do D + if D < I + 1:size(ndrange, D) + elseif D == I + outer_subrange(ndrange, ndwidth, D, Val(S)) + else + inner_subrange(ndrange, ndwidth, D) + end + end +end + +@inline split_ndrange(ndrange, ndwidth) = split_ndrange(CartesianIndices(ndrange), ndwidth) + +function split_ndrange(ndrange::CartesianIndices{N}, ndwidth::NTuple{N,<:Integer}) where {N} + @assert all(size(ndrange) .> ndwidth .* 2) + ndinner = ntuple(D -> inner_subrange(ndrange, ndwidth, D), Val(N)) + inner_range = ndrange[ndinner...] + outer_ranges = ntuple(D -> (ndrange[ndsubrange(ndrange, ndwidth, D, Val(1))...], + ndrange[ndsubrange(ndrange, ndwidth, D, Val(2))...]), Val(N)) + return inner_range, outer_ranges +end diff --git a/src/architecture.jl b/src/architecture.jl deleted file mode 100644 index 963ad350..00000000 --- a/src/architecture.jl +++ /dev/null @@ -1,48 +0,0 @@ -module Architecture - -export AbstractArchitecture - -export SingleDeviceArchitecture - -export launch!, set_device!, heuristic_groupsize - -using FastIce.Grids - -using KernelAbstractions -import KernelAbstractions.Kernel - -abstract type AbstractArchitecture end - -set_device!(arch::AbstractArchitecture) = set_device!(device(arch)) - -heuristic_groupsize(arch::AbstractArchitecture) = heuristic_groupsize(device(arch)) - -struct SingleDeviceArchitecture{B,D} <: AbstractArchitecture - backend::B - device::D -end - -set_device!(::SingleDeviceArchitecture{CPU}) = nothing - -heuristic_groupsize(::SingleDeviceArchitecture{CPU}) = 256 - -device(arch::SingleDeviceArchitecture) = arch.device - -function launch!(arch::SingleDeviceArchitecture, grid::CartesianGrid, kernel::Pair{Kernel,Args}; kwargs...) where {Args} - worksize = size(grid, Vertex()) - launch!(arch, worksize, kernel; kwargs...) -end - -function launch!(arch::SingleDeviceArchitecture, worksize::NTuple{N,Int}, kernel::Pair{Kernel,Args}; boundary_conditions=nothing, async=true) where {N,Args} - fun, args = kernel - - groupsize = heuristic_groupsize(device(arch)) - - fun(arch.backend, groupsize, worksize)(args...) - isnothing(boundary_conditions) || apply_boundary_conditions!(boundary_conditions) - - async || synchronize(arch.backend) - return -end - -end \ No newline at end of file diff --git a/src/geometry.jl b/src/geometry.jl deleted file mode 100644 index dc4fe417..00000000 --- a/src/geometry.jl +++ /dev/null @@ -1,10 +0,0 @@ -function rotation_matrix(α,β,γ) - sα,cα = sincos(α) - sβ,cβ = sincos(β) - sγ,cγ = sincos(γ) - return Mat3( - cα * cβ , sα * cβ , -sβ , - cα * sβ * sγ - sα * cγ, sα * sβ * sγ + cα * cγ, cβ * sγ, - cα * sβ * cγ + sα * sγ, sα * sβ * cγ - cα * sγ, cβ * cγ, - ) -end \ No newline at end of file diff --git a/src/utils.jl b/src/utils.jl deleted file mode 100644 index eeae9d21..00000000 --- a/src/utils.jl +++ /dev/null @@ -1,68 +0,0 @@ -module Utils - -export remove_dim, insert_dim - -# Returns a copy of the tuple `A` with element in position `D` removed -@inline remove_dim(::Val{D}, A::NTuple{N}) where {D,N} = ntuple(I -> I < D ? A[I] : A[I+1], Val(N - 1)) -@inline remove_dim(::Val{1}, I::NTuple{1}) = 1 - -# Same as `remove_dim`, but for `CartesianIndex` -@inline remove_dim(dim, I::CartesianIndex) = remove_dim(dim, Tuple(I)) |> CartesianIndex - -# Returns a copy of tuple `A`, but inserts `i` into position `D` -@inline insert_dim(::Val{D}, A::NTuple{N}, i) where {D, N} = ntuple(Val(N + 1)) do I - if I < D - A[I] - elseif I == D - i - else - A[I-1] - end -end - -# Same as `insert_dim`, but for `CartesianIndex` -@inline insert_dim(dim, A::CartesianIndex, i) = insert_dim(dim, Tuple(A), i) |> CartesianIndex - -export extrapolate! - -using FastIce.Fields - -using KernelAbstractions - -@kernel function kernel_extrapolate_x!(A, I, N) - iy, iz = @index(Global, NTuple) - iy -= 1; iz -= 1 - @inbounds if !isnothing(I) A[I, iy, iz] = 2.0 * A[I+1, iy, iz] - A[I+2, iy, iz] end - @inbounds if !isnothing(N) A[N, iy, iz] = 2.0 * A[N-1, iy, iz] - A[N-2, iy, iz] end -end - -@kernel function kernel_extrapolate_y!(A, I, N) - ix, iz = @index(Global, NTuple) - ix -= 1; iz -= 1 - @inbounds if !isnothing(I) A[ix, I, iz] = 2.0 * A[ix, I+1, iz] - A[ix, I+2, iz] end - @inbounds if !isnothing(N) A[ix, N, iz] = 2.0 * A[ix, N-1, iz] - A[ix, N-2, iz] end -end - -@kernel function kernel_extrapolate_z!(A, I, N) - ix, iy = @index(Global, NTuple) - ix -= 1; iy -= 1 - @inbounds if !isnothing(I) A[ix, iy, I] = 2.0 * A[ix, iy, I+1] - A[ix, iy, I+2] end - @inbounds if !isnothing(N) A[ix, iy, N] = 2.0 * A[ix, iy, N-1] - A[ix, iy, N-2] end -end - -function extrapolate!(A::Field; async=true) - backend = get_backend(A) - I = ntuple(Val(ndims(A))) do dim - halo(A, dim, 1) > 0 ? 0 : nothing - end - N = ntuple(Val(ndims(A))) do dim - halo(A, dim, 2) > 0 ? size(A, dim) + 1 : nothing - end - kernel_extrapolate_x!(backend, 256, (size(A, 2)+2, size(A, 3)+2))(A, I[1], N[1]) - kernel_extrapolate_y!(backend, 256, (size(A, 1)+2, size(A, 3)+2))(A, I[2], N[2]) - kernel_extrapolate_z!(backend, 256, (size(A, 1)+2, size(A, 2)+2))(A, I[3], N[3]) - async || synchronize(backend) - return -end - -end \ No newline at end of file diff --git a/test/Project.toml b/test/Project.toml index 7a5a3438..dbd55b91 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,6 @@ [deps] KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" @@ -8,5 +9,5 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" [compat] -AMDGPU = "0.6" -CUDA = "4, 5" +AMDGPU = "0.7" +CUDA = "5" diff --git a/test/common.jl b/test/common.jl index 911f91c2..f7853771 100644 --- a/test/common.jl +++ b/test/common.jl @@ -4,12 +4,12 @@ using FastIce using KernelAbstractions # add KA backends -backends = KernelAbstractions.Backend[CPU(), ] +backends = KernelAbstractions.Backend[CPU()] -if get(ENV,"JULIA_FASTICE_BACKEND","") == "AMDGPU" +if get(ENV, "JULIA_FASTICE_BACKEND", "") == "AMDGPU" using AMDGPU AMDGPU.functional() && push!(backends, ROCBackend()) -elseif get(ENV,"JULIA_FASTICE_BACKEND","") == "CUDA" +elseif get(ENV, "JULIA_FASTICE_BACKEND", "") == "CUDA" using CUDA CUDA.functional() && push!(backends, CUDABackend()) -end \ No newline at end of file +end diff --git a/test/runtests.jl b/test/runtests.jl index 1023db56..525fa347 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,7 +3,16 @@ using FastIce using Pkg -excludedfiles = [ "test_excluded.jl"]; +excludedfiles = ["test_excluded.jl"] + +# distributed +test_distributed = ["test_distributed_2D.jl", "test_distributed_3D.jl"] +using MPI +nprocs_2D = 4 +nprocs_3D = 8 +ENV["DIMX"] = 2 +ENV["DIMY"] = 2 +ENV["DIMZ"] = 2 function parse_flags!(args, flag; default=nothing, typ=typeof(default)) for f in args @@ -36,14 +45,23 @@ function runtests() for f in testfiles println("") - if f ∈ excludedfiles + if basename(f) ∈ excludedfiles println("Test Skip:") println("$f") continue end try - run(`$exename --startup-file=no $(joinpath(testdir, f))`) + if basename(f) ∈ test_distributed + nprocs = contains(f, "2D") ? nprocs_2D : nprocs_3D + cmd(n=nprocs) = `$(mpiexec()) -n $n $exename --startup-file=no --color=yes $(joinpath(testdir, f))` + withenv("JULIA_NUM_THREADS" => "4") do + run(cmd()) + end + else + run(`$exename --startup-file=no $(joinpath(testdir, f))`) + end catch ex + @error ex nfail += 1 end end @@ -60,4 +78,4 @@ elseif backend_name == "CUDA" ENV["JULIA_FASTICE_BACKEND"] = "CUDA" end -exit(runtests()) \ No newline at end of file +exit(runtests()) diff --git a/test/test_bc.jl b/test/test_bc.jl index 51264b8d..21f33991 100644 --- a/test/test_bc.jl +++ b/test/test_bc.jl @@ -2,37 +2,42 @@ include("common.jl") using FastIce.Grids using FastIce.Fields +using FastIce.Architectures using FastIce.BoundaryConditions @testset "backend $backend" for backend in backends @testset "boundary conditions" begin nx, ny, nz = 2, 2, 2 - grid = CartesianGrid(origin = (0.0, 0.0, 0.0), extent = (1.0, 1.0, 1.0), size = (nx, ny, nz)) + grid = CartesianGrid(; origin=(0.0, 0.0, 0.0), extent=(1.0, 1.0, 1.0), size=(nx, ny, nz)) field = Field(backend, grid, Center(); halo=1) + arch = Architecture(backend) @testset "value" begin @testset "x-dim" begin data(field) .= 0.0 - west_bc = DirichletBC{HalfCell}(1.0) - east_bc = DirichletBC{FullCell}(0.5) - apply_bcs!(Val(1), backend, grid, (field, ), ((west_bc, east_bc), ); async=false) - @test all((parent(field)[1 , 2:ny+1, 2:nz+1] .+ parent(field)[2, 2:ny+1, 2:nz+1]) ./ 2 .≈ west_bc.condition) - @test all( parent(field)[nx+2, 2:ny+1, 2:nz+1] .≈ east_bc.condition) + west_bc = BoundaryConditionsBatch((field,), (DirichletBC{HalfCell}(1.0),)) + east_bc = BoundaryConditionsBatch((field,), (DirichletBC{FullCell}(0.5),)) + apply_boundary_conditions!(Val(1), Val(1), arch, grid, west_bc; async=false) + apply_boundary_conditions!(Val(2), Val(1), arch, grid, east_bc; async=false) + @test all((parent(field)[1, 2:ny+1, 2:nz+1] .+ parent(field)[2, 2:ny+1, 2:nz+1]) ./ 2 .≈ west_bc.conditions[1].condition) + @test all(parent(field)[nx+2, 2:ny+1, 2:nz+1] .≈ east_bc.conditions[1].condition) end @testset "y-dim" begin data(field) .= 0.0 - south_bc = DirichletBC{HalfCell}(1.0) - north_bc = DirichletBC{FullCell}(0.5) - apply_bcs!(Val(2), backend, grid, (field, ), ((south_bc, north_bc), ); async=false) - @test all((parent(field)[2:nx+1, 1 , 2:nz+1] .+ parent(field)[2:nx+1, 2, 2:nz+1]) ./ 2 .≈ south_bc.condition) - @test all( parent(field)[2:nx+1, ny+2, 2:nz+1] .≈ north_bc.condition) + south_bc = BoundaryConditionsBatch((field,), (DirichletBC{HalfCell}(1.0),)) + north_bc = BoundaryConditionsBatch((field,), (DirichletBC{FullCell}(0.5),)) + apply_boundary_conditions!(Val(1), Val(2), arch, grid, south_bc; async=false) + apply_boundary_conditions!(Val(2), Val(2), arch, grid, north_bc; async=false) + @test all((parent(field)[2:nx+1, 1, 2:nz+1] .+ parent(field)[2:nx+1, 2, 2:nz+1]) ./ 2 .≈ south_bc.conditions[1].condition) + @test all(parent(field)[2:nx+1, ny+2, 2:nz+1] .≈ north_bc.conditions[1].condition) end @testset "z-dim" begin data(field) .= 0.0 - bot_bc = DirichletBC{HalfCell}(1.0) - top_bc = DirichletBC{FullCell}(0.5) - apply_bcs!(Val(3), backend, grid, (field, ), ((bot_bc, top_bc), ); async=false) - @test all((parent(field)[2:nx+1, 2:ny+1, 1 ] .+ parent(field)[2:nx+1, 2:ny+1, 2]) ./ 2 .≈ bot_bc.condition) - @test all( parent(field)[2:nx+1, 2:ny+1, nz+2] .≈ top_bc.condition) + bottom_bc = BoundaryConditionsBatch((field,), (DirichletBC{HalfCell}(1.0),)) + top_bc = BoundaryConditionsBatch((field,), (DirichletBC{FullCell}(0.5),)) + apply_boundary_conditions!(Val(1), Val(3), arch, grid, bottom_bc; async=false) + apply_boundary_conditions!(Val(2), Val(3), arch, grid, top_bc; async=false) + @test all((parent(field)[2:nx+1, 2:ny+1, 1] .+ parent(field)[2:nx+1, 2:ny+1, 2]) ./ 2 .≈ bottom_bc.conditions[1].condition) + @test all(parent(field)[2:nx+1, 2:ny+1, nz+2] .≈ top_bc.conditions[1].condition) end end @testset "array" begin @@ -42,11 +47,12 @@ using FastIce.BoundaryConditions bc_array_east = KernelAbstractions.allocate(backend, Float64, (size(grid, 2), size(grid, 3))) bc_array_west .= 1.0 bc_array_east .= 0.5 - west_bc = DirichletBC{HalfCell}(bc_array_west) - east_bc = DirichletBC{FullCell}(bc_array_east) - apply_bcs!(Val(1), backend, grid, (field, ), ((west_bc, east_bc), ); async=false) - @test all((parent(field)[1 , 2:ny+1, 2:nz+1] .+ parent(field)[2, 2:ny+1, 2:nz+1]) ./ 2 .≈ west_bc.condition) - @test all( parent(field)[nx+2, 2:ny+1, 2:nz+1] .≈ east_bc.condition) + west_bc = BoundaryConditionsBatch((field,), (DirichletBC{HalfCell}(bc_array_west),)) + east_bc = BoundaryConditionsBatch((field,), (DirichletBC{FullCell}(bc_array_east),)) + apply_boundary_conditions!(Val(1), Val(1), arch, grid, west_bc; async=false) + apply_boundary_conditions!(Val(2), Val(1), arch, grid, east_bc; async=false) + @test all((parent(field)[1, 2:ny+1, 2:nz+1] .+ parent(field)[2, 2:ny+1, 2:nz+1]) ./ 2 .≈ west_bc.conditions[1].condition) + @test all(parent(field)[nx+2, 2:ny+1, 2:nz+1] .≈ east_bc.conditions[1].condition) end @testset "y-dim" begin data(field) .= 0.0 @@ -54,11 +60,12 @@ using FastIce.BoundaryConditions bc_array_north = KernelAbstractions.allocate(backend, Float64, (size(grid, 2), size(grid, 3))) bc_array_south .= 1.0 bc_array_north .= 0.5 - south_bc = DirichletBC{HalfCell}(bc_array_south) - north_bc = DirichletBC{FullCell}(bc_array_north) - apply_bcs!(Val(2), backend, grid, (field, ), ((south_bc, north_bc), ); async=false) - @test all((parent(field)[2:nx+1, 1 , 2:nz+1] .+ parent(field)[2:nx+1, 2, 2:nz+1]) ./ 2 .≈ south_bc.condition) - @test all( parent(field)[2:nx+1, ny+2, 2:nz+1] .≈ north_bc.condition) + south_bc = BoundaryConditionsBatch((field,), (DirichletBC{HalfCell}(bc_array_south),)) + north_bc = BoundaryConditionsBatch((field,), (DirichletBC{FullCell}(bc_array_north),)) + apply_boundary_conditions!(Val(1), Val(2), arch, grid, south_bc; async=false) + apply_boundary_conditions!(Val(2), Val(2), arch, grid, north_bc; async=false) + @test all((parent(field)[2:nx+1, 1, 2:nz+1] .+ parent(field)[2:nx+1, 2, 2:nz+1]) ./ 2 .≈ south_bc.conditions[1].condition) + @test all(parent(field)[2:nx+1, ny+2, 2:nz+1] .≈ north_bc.conditions[1].condition) end @testset "z-dim" begin data(field) .= 0.0 @@ -66,11 +73,12 @@ using FastIce.BoundaryConditions bc_array_top = KernelAbstractions.allocate(backend, Float64, (size(grid, 2), size(grid, 3))) bc_array_bot .= 1.0 bc_array_top .= 0.5 - bot_bc = DirichletBC{HalfCell}(bc_array_bot) - top_bc = DirichletBC{FullCell}(bc_array_top) - apply_bcs!(Val(3), backend, grid, (field, ), ((bot_bc, top_bc), ); async=false) - @test all((parent(field)[2:nx+1, 2:ny+1, 1 ] .+ parent(field)[2:nx+1, 2:ny+1, 2]) ./ 2 .≈ bot_bc.condition) - @test all( parent(field)[2:nx+1, 2:ny+1, nz+2] .≈ top_bc.condition) + bottom_bc = BoundaryConditionsBatch((field,), (DirichletBC{HalfCell}(bc_array_bot),)) + top_bc = BoundaryConditionsBatch((field,), (DirichletBC{FullCell}(bc_array_top),)) + apply_boundary_conditions!(Val(1), Val(3), arch, grid, bottom_bc; async=false) + apply_boundary_conditions!(Val(2), Val(3), arch, grid, top_bc; async=false) + @test all((parent(field)[2:nx+1, 2:ny+1, 1] .+ parent(field)[2:nx+1, 2:ny+1, 2]) ./ 2 .≈ bottom_bc.conditions[1].condition) + @test all(parent(field)[2:nx+1, 2:ny+1, nz+2] .≈ top_bc.conditions[1].condition) end end end diff --git a/test/test_distributed_2D.jl b/test/test_distributed_2D.jl new file mode 100644 index 00000000..5e44c8c9 --- /dev/null +++ b/test/test_distributed_2D.jl @@ -0,0 +1,47 @@ +include("common.jl") + +using MPI +using FastIce.Distributed + +MPI.Init() + +nprocs = MPI.Comm_size(MPI.COMM_WORLD) + +backend = CPU() # until we have testing environment setup for GPU-aware MPI, run only on CPU + +@testset "Distributed 2D" begin + mpi_dims = parse.(Int, (get(ENV, "DIMX", "1"), + get(ENV, "DIMY", "1"))) + dims = (0, 0) + topo = CartesianTopology(dims) + local_size = (4, 5) + @testset "Topology" begin + @test dimensions(topo) == mpi_dims + @test length(neighbors(topo)) == 2 + @test node_size(topo) == nprocs + if global_rank(topo) == 0 + @test neighbor(topo, 1, 1) == MPI.PROC_NULL + @test neighbor(topo, 2, 1) == MPI.PROC_NULL + @test has_neighbor(topo, 1, 1) == false + @test has_neighbor(topo, 2, 1) == false + if mpi_dims[2] > 1 + @test neighbor(topo, 1, 2) == mpi_dims[2] + @test has_neighbor(topo, 1, 2) == true + else + @test neighbor(topo, 1, 2) == MPI.PROC_NULL + @test has_neighbor(topo, 1, 2) == false + end + end + @test global_grid_size(topo, local_size) == mpi_dims .* local_size + end + @testset "gather!" begin + src = fill(global_rank(topo) + 1, local_size) + dst = (global_rank(topo) == 0) ? zeros(Int, mpi_dims .* local_size) : nothing + gather!(dst, src, cartesian_communicator(topo)) + if global_rank(topo) == 0 + @test dst == repeat(reshape(1:global_size(topo), dimensions(topo))'; inner=size(src)) + end + end +end + +MPI.Finalize() diff --git a/test/test_distributed_3D.jl b/test/test_distributed_3D.jl new file mode 100644 index 00000000..d10797b4 --- /dev/null +++ b/test/test_distributed_3D.jl @@ -0,0 +1,55 @@ +include("common.jl") + +using MPI +using FastIce.Distributed + +MPI.Init() + +nprocs = MPI.Comm_size(MPI.COMM_WORLD) + +backend = CPU() # until we have testing environment setup for GPU-aware MPI, run only on CPU + +@testset "Distributed 3D" begin + mpi_dims = parse.(Int, (get(ENV, "DIMX", "1"), + get(ENV, "DIMY", "1"), + get(ENV, "DIMZ", "1"))) + dims = (0, 0, 0) + topo = CartesianTopology(dims) + local_size = (4, 5, 6) + @testset "Topology" begin + @test dimensions(topo) == mpi_dims + @test length(neighbors(topo)) == 3 + @test node_size(topo) == nprocs + if global_rank(topo) == 0 + @test neighbor(topo, 1, 1) == MPI.PROC_NULL + @test neighbor(topo, 2, 1) == MPI.PROC_NULL + @test neighbor(topo, 3, 1) == MPI.PROC_NULL + @test has_neighbor(topo, 1, 1) == false + @test has_neighbor(topo, 2, 1) == false + @test has_neighbor(topo, 3, 1) == false + if mpi_dims[2] > 1 && mpi_dims[3] > 1 + @test neighbor(topo, 1, 2) == mpi_dims[2] * mpi_dims[3] + @test neighbor(topo, 2, 2) == mpi_dims[3] + @test has_neighbor(topo, 1, 2) == true + @test has_neighbor(topo, 2, 2) == true + else + @test neighbor(topo, 1, 2) == MPI.PROC_NULL + @test neighbor(topo, 2, 2) == MPI.PROC_NULL + @test has_neighbor(topo, 1, 2) == false + @test has_neighbor(topo, 2, 2) == false + end + end + @test global_grid_size(topo, local_size) == mpi_dims .* local_size + end + @testset "gather!" begin + src = fill(global_rank(topo) + 1, local_size) + dst = (global_rank(topo) == 0) ? zeros(Int, mpi_dims .* local_size) : nothing + gather!(dst, src, cartesian_communicator(topo)) + ranks_mat = permutedims(reshape(1:global_size(topo), dimensions(topo)), reverse(1:3)) + if global_rank(topo) == 0 + @test dst == repeat(ranks_mat; inner=size(src)) + end + end +end + +MPI.Finalize() diff --git a/test/test_fields.jl b/test/test_fields.jl index f640001e..b1394f78 100644 --- a/test/test_fields.jl +++ b/test/test_fields.jl @@ -8,7 +8,7 @@ const HALO_SIZES = ((2, 2, 4), (0, 0, 0), (2, 0, 2), (0, 0, 0), (0, 1, 3)) @testset "backend $backend" for backend in backends @testset "fields" begin - grid = CartesianGrid(origin = (0.0, 0.0, 0.0), extent=(1.0, 1.0, 1.0), size = (2, 2, 2)) + grid = CartesianGrid(; origin=(0.0, 0.0, 0.0), extent=(1.0, 1.0, 1.0), size=(2, 2, 2)) loc = (Center(), Vertex(), Center()) @testset "location" begin @test location(Field(backend, grid, Center())) == (Center(), Center(), Center()) @@ -25,17 +25,17 @@ const HALO_SIZES = ((2, 2, 4), (0, 0, 0), (2, 0, 2), (0, 0, 0), (0, 1, 3)) @testset "discrete" begin # no parameters vertex fill!(data(f), NaN) - set!(f, grid, (grid, loc, ix, iy, iz) -> ycoord(grid, loc, iy); discrete=true) + set!(f, grid, (grid, loc, I) -> ycoord(grid, loc, I[2]); discrete=true) @test Array(interior(f)) == [0.0; 0.0;; 0.5; 0.5;; 1.0; 1.0;;; 0.0; 0.0;; 0.5; 0.5;; 1.0; 1.0] # no parameters center fill!(data(f), NaN) - set!(f, grid, (grid, loc, ix, iy, iz) -> xcoord(grid, loc, ix); discrete=true) + set!(f, grid, (grid, loc, I) -> xcoord(grid, loc, I[1]); discrete=true) @test Array(interior(f)) == [0.25; 0.75;; 0.25; 0.75;; 0.25; 0.75;;; 0.25; 0.75;; 0.25; 0.75;; 0.25; 0.75] # with parameters fill!(data(f), NaN) - set!(f, grid, (grid, loc, ix, iy, iz, sc) -> ycoord(grid, loc, iy) * sc; discrete=true, parameters=(2.0, )) + set!(f, grid, (grid, loc, I, sc) -> ycoord(grid, loc, I[2]) * sc; discrete=true, parameters=(2.0,)) @test Array(interior(f)) == [0.0; 0.0;; 1.0; 1.0;; 2.0; 2.0;;; 0.0; 0.0;; 1.0; 1.0;; 2.0; 2.0] end @@ -52,10 +52,10 @@ const HALO_SIZES = ((2, 2, 4), (0, 0, 0), (2, 0, 2), (0, 0, 0), (0, 1, 3)) 0.25; 0.75;; 0.25; 0.75;; 0.25; 0.75] # with parameters fill!(data(f), NaN) - set!(f, grid, (x, y, z, sc) -> y * sc, parameters=(2.0, )) + set!(f, grid, (x, y, z, sc) -> y * sc; parameters=(2.0,)) @test Array(interior(f)) == [0.0; 0.0;; 1.0; 1.0;; 2.0; 2.0;;; 0.0; 0.0;; 1.0; 1.0;; 2.0; 2.0] end end end -end \ No newline at end of file +end diff --git a/test/test_utils.jl b/test/test_utils.jl new file mode 100644 index 00000000..2cfedd29 --- /dev/null +++ b/test/test_utils.jl @@ -0,0 +1,42 @@ +include("common.jl") + +using FastIce.Utils + +@testset "pipelines" begin + @testset "pre" begin + a = 0 + pipe = Pipeline(; pre=() -> a += 1) + put!(() -> nothing, pipe) + wait(pipe) + @test a == 1 + close(pipe) + end + + @testset "post" begin + a = 0 + pipe = Pipeline(; post=() -> a += 2) + put!(pipe) do + a -= 1 + end + wait(pipe) + close(pipe) + @test a == 1 + end + + @testset "do work" begin + a = 0 + pipe = Pipeline() + put!(pipe) do + a += 1 + end + wait(pipe) + close(pipe) + @test a == 1 + end + + @testset "not running" begin + pipe = Pipeline() + close(pipe) + @test_throws ErrorException wait(pipe) + end +end