Package d-nv on DUB

To use this package, run the following command in your project's root directory:

d-nv

just work in progress

import dnv;

import std.stdio;
import std.random;
import std.range;

int n = 10;
auto gen = () => new Array!float(generate!(() => uniform(-1f, 1f)).take(n).array());
auto a = gen();
auto b = gen();
auto c = new Array!float(n);
enum code = Code(
  "saxpy", q{float *A, float *B, float *C, int numElements},
  q{
    int i = blockDim.x * blockIdx.x + threadIdx.x;
    if (i < numElements) C[i] = A[i] + B[i];
  });
auto saxpy = new TypedKernel!(code);
saxpy(a, b, c, n); // type-checked at compile-time. 
// compile error: saxpy(a, b, c), saxpy(a, b, c, 3f)

foreach (ai, bi, ci; zip(a.to_cpu(), b.to_cpu(), c.to_cpu())) {
  assert(ai + bi == ci);
}

cublas usage (WIP)

unittest {
    import dnv.storage : Array;
    import dnv.cuda.cublas;

    cublasHandle_t handle;
    auto status = cublasCreate_v2(&handle);
    scope(exit) cublasDestroy_v2(handle);

    assert(status == CUBLAS_STATUS_SUCCESS);

    float[] A = [1, 2, 3,
                 4, 5, 6]; // M=3 x K=2
    float[] B = [1, 2,
                 3, 4,
                 5, 6,
                 7, 8]; // N=4 x k=2
    auto M = 3;
    auto N = 4;
    auto K = 2;
    float alpha = 1.0f;
    float beta = 0.0f;
    auto d_A = new Array!float(A);
    auto d_B = new Array!float(B);
    auto d_C = new Array!float(M * N);

    // cublas driver API
    status = cublasSgemm_v2(handle, CUBLAS_OP_N, CUBLAS_OP_T, M, N, K,
                            &alpha, d_A.data, M, d_B.data, N, &beta, d_C.data, M);
    assert(status == CUBLAS_STATUS_SUCCESS);

    auto d_D = new Array!float(N * M);
    status = cublasSgemm_v2(handle, CUBLAS_OP_N, CUBLAS_OP_T, N, M, K,
                            &alpha, d_B.data, N, d_A.data, M, &beta, d_D.data, N);
    assert(status == CUBLAS_STATUS_SUCCESS);

    // check C = D.T
    auto C = d_C.to_cpu();     // C = A x B.T
    auto D = d_D.to_cpu();     // D = B x A.T
    foreach (m; 0 .. M) {
        foreach (n; 0 .. N) {
            assert(C[m + n * M] == D[n + m * N]);
        }
    }
}

how to use

add dependency to your DUB project

    "dependencies": {
        "d-nv": "~>0.0.1"
    }

to your project file dub.json and then $ dub run

or add this header

#!/usr/bin/env dub
/+ dub.json:
{
    "name": "your-app",
    "targetType":"executable",
    "dependencies": {
        "d-nv": "~>0.0.1"
    }
}
+/

to your single file app.d and then $ dub app.d

roadmap

(- v1.0)

(DONE) allocate memory on multiple devices with CUDA Driver API
(DONE) GPU device <-> CPU host memory transfer
(DONE) compile a kernel of raw string with NVRTC
(DONE) launch a kernel function
(DONE) type-check of kernel's arguments at compile-time
(DONE) build with dub
(DONE) Coveralls support using doveralls
(WIP) add benchmark and example using d-nvrtc as a library
(WIP) naive type-check of kernel's arguments at run-time
(WIP) user-friendly config of <<<grids, blocks, shared-memory, stream>>>
support template kernels
support static compilation of CUDA kernel (just linking objects without NVRTC?)

(v1.0 -)

thrust support
fully multi-device support

issues

fix the CUdeviceptr definition to use cuMemAlloc directly
think about how to cleanup resources
add unit tests

referrence

/usr/local/cuda/samples/0Simple/ vectorAddnvrtc, simpleTemplates_nvrtc
/usr/local/cuda/doc/pdf/NVRTCUserGuide.pdf
/usr/local/cuda/doc/pdf/

development

how to build and unittest

$ make coverage

current coverage