Basic Usage

Ping demo code

 1sampik::initialize(argc, argv);
 2
 3int N = 1'000'000;
 4Kokkos::View<double*> v(N);
 5auto exec = Kokkos::DefaultExecutionSpace();
 6auto comm = sampik::DefaultCommunicationSpace();
 7
 8auto handle = sampik::Handle(comm, exec);
 9assert(handle.size() == 2);
10
11if (handle.rank() == 0) {
12  Kokkos::parallel_for(Kokkos::RangePolicy(space, 0, N), KOKKOS_LAMBDA(int const i) {
13    v(i) = double(i);
14  });
15  auto req = sampik::send(space, handle, v, 1);
16  sampik::wait(space, req);
17} else if (handle.rank() == 1) {
18  auto req = sampik::recv(space, handle, v, 0);
19  sampik::wait(space, req);
20  double result;
21  Kokkos::parallel_reduce(Kokkos::RangePolicy(space, 0, N), KOKKOS_LAMBDA(int const i, double& errs) {
22    errs += v(i) != double(i);
23  }, result);
24  assert(errs == 0);
25}
26
27sampik::finalize();

Halo exchange

 1auto halo_exchange() -> void {
 2  using Scalar = double;
 3  using Grid   = Kokkos::View<Scalar**, Kokkos::LayoutRight>;
 4
 5  // Problem size per rank
 6  int nx     = 512;
 7  int ny     = 512;
 8
 9  auto comm = sampik::DefaultCommunicationSpace();
10  auto exec = Kokkos::DefaultExecutionSpace();
11
12  auto handle = sampik::Handle(comm, exec);
13  auto rank   = handle.rank();
14  auto size   = handle.size();
15
16  const int rs = std::sqrt(size);
17  const int rx = rank % rs;
18  const int ry = rank / rs;
19
20  if (rank < rs * rs) {
21    // Grid of elements, and a radius-1 halo
22    Grid grid("g", nx + 2, ny + 2);
23
24    // 2D index of numbers in minus and plus direction (periodic)
25    const int xm1 = (rx + rs - 1) % rs;
26    const int ym1 = (ry + rs - 1) % rs;
27    const int xp1 = (rx + 1) % rs;
28    const int yp1 = (ry + 1) % rs;
29
30    // Convert 2D rank into 1D rank
31    auto get_1d_rank = [=](int const x, int const y) -> int { return y * rs + x; };
32
33    auto make_pair = [](sampik::RankId a, sampik::RankId b) -> Kokkos::pair<sampik::RankId, sampik::RankId> {
34      return Kokkos::pair{a, b};
35    }
36
37    // Create send/recv subviews
38    auto xp1_s = Kokkos::subview(grid, grid.extent(0) - 2, make_pair(1, ny + 1), Kokkos::ALL);
39    auto xp1_r = Kokkos::subview(grid, grid.extent(0) - 1, make_pair(1, ny + 1), Kokkos::ALL);
40    auto xm1_s = Kokkos::subview(grid, 1, make_pair(1, ny + 1), Kokkos::ALL);
41    auto xm1_r = Kokkos::subview(grid, 0, make_pair(1, ny + 1), Kokkos::ALL);
42    auto yp1_s = Kokkos::subview(grid, make_pair(1, nx + 1), grid.extent(1) - 2, Kokkos::ALL);
43    auto yp1_r = Kokkos::subview(grid, make_pair(1, nx + 1), grid.extent(1) - 1, Kokkos::ALL);
44    auto ym1_s = Kokkos::subview(grid, make_pair(1, nx + 1), 1, Kokkos::ALL);
45    auto ym1_r = Kokkos::subview(grid, make_pair(1, nx + 1), 0, Kokkos::ALL);
46
47    // Start sending the data
48    std::vector<sampik::Request> send_reqs;
49    send_reqs.push_back(sampik::send(exec, handle, xp1_s, get_rank(xp1, ry)));
50    send_reqs.push_back(sampik::send(exec, handle, xm1_s, get_rank(xm1, ry)));
51    send_reqs.push_back(sampik::send(exec, handle, yp1_s, get_rank(rx, yp1)));
52    send_reqs.push_back(sampik::send(exec, handle, ym1_s, get_rank(rx, ym1)));
53
54    // Compute kernel is enqueued on the same execution space as the send operations
55    Kokkos::parallel_for(Kokkos::Policy(exec, ...), KOKKOS_LAMBDA(...) {
56      // Do some useful work here
57    });
58
59    // Start receiving the data
60    // Will start only after the previous `parallel_for`, as we're enqueuing on the same execution space
61    std::vector<sampik::Request> recv_reqs;
62    recv_reqs.push_back(sampik::recv(exec, handle, xm1_r, get_rank(xm1, ry)));
63    recv_reqs.push_back(sampik::recv(exec, handle, xp1_r, get_rank(xp1, ry)));
64    recv_reqs.push_back(sampik::recv(exec, handle, ym1_r, get_rank(rx, ym1)));
65    recv_reqs.push_back(sampik::recv(exec, handle, yp1_r, get_rank(rx, yp1)));
66
67    // Wait for comms to finish
68    for (auto [sr, rr]: std::views::zip(send_reqs, recv_reqs)) {
69      sr.wait();
70      rr.wait();
71    }
72  }
73}

MPI interop

Create sampik handles from MPI communicators, and retrieve the inner communicator from sampik handles:

1MPI_Comm comm;
2MPI_Comm_dup(MPI_COMM_WORLD, &comm);
3
4// sampik handle from MPI communicator
5auto handle = sampik::Handle(comm, Kokkos::HostExecutionSpace());
6
7// MPI communicator from sampik handle
8MPI_Comm inner = handle.get_inner();