how can i compile a mex function which contain openmp and cuda ?

2 views (last 30 days)
i want to use cpu parallel and gpu parallel both!how can i compile mex ,i know mexcuda mexGPUExample.cu can compile cuda project,but how can i compile openmp in .cu project?
/*
* Example of how to use the mxGPUArray API in a MEX file. This example shows
* how to write a MEX function that takes a gpuArray input and returns a
* gpuArray output, e.g. B=mexFunction(A).
*
* Copyright 2012 The MathWorks, Inc.
*/
#include "mex.h"
#include "gpu/mxGPUArray.h"
#include"omp.h"
#include<stdio.h>
#include<stdlib.h>
/*
* Device code
*/
void __global__ TimesTwo(double const * const A,
double * const B,
int const N)
{
/* Calculate the global linear index, assuming a 1-d grid. */
int const i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < N) {
B[i] = 2.0 * A[i];
}
}
/*
* Host code
*/
void mexFunction(int nlhs, mxArray *plhs[],
int nrhs, mxArray const *prhs[])
{
size_t test=omp_get_thread_num();
#pragma omp parallel for num_threads(6)
for(int i=0;i<5;i++)
{
test+=omp_get_thread_num();
}
mexPrintf("%d\n",test);
/* Declare all variables.*/
mxGPUArray const *A;
mxGPUArray *B;
double const *d_A;
double *d_B;
int N;
char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput";
char const * const errMsg = "Invalid input to MEX file.";
/* Choose a reasonably sized number of threads for the block. */
int const threadsPerBlock = 256;
int blocksPerGrid;
/* Initialize the MathWorks GPU API. */
mxInitGPU();
/* Throw an error if the input is not a GPU array. */
if ((nrhs!=1) || !(mxIsGPUArray(prhs[0]))) {
mexErrMsgIdAndTxt(errId, errMsg);
}
A = mxGPUCreateFromMxArray(prhs[0]);
/*
* Verify that A really is a double array before extracting the pointer.
*/
if (mxGPUGetClassID(A) != mxDOUBLE_CLASS) {
mexErrMsgIdAndTxt(errId, errMsg);
}
/*
* Now that we have verified the data type, extract a pointer to the input
* data on the device.
*/
d_A = (double const *)(mxGPUGetDataReadOnly(A));
/* Create a GPUArray to hold the result and get its underlying pointer. */
B = mxGPUCreateGPUArray(mxGPUGetNumberOfDimensions(A),
mxGPUGetDimensions(A),
mxGPUGetClassID(A),
mxGPUGetComplexity(A),
MX_GPU_DO_NOT_INITIALIZE);
d_B = (double *)(mxGPUGetData(B));
/*
* Call the kernel using the CUDA runtime API. We are using a 1-d grid here,
* and it would be possible for the number of elements to be too large for
* the grid. For this example we are not guarding against this possibility.
*/
N = (int)(mxGPUGetNumberOfElements(A));
blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;
TimesTwo<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, N);
/* Wrap the result up as a MATLAB gpuArray for return. */
plhs[0] = mxGPUCreateMxArrayOnGPU(B);
/*
* The mxGPUArray pointers are host-side structures that refer to device
* data. These must be destroyed before leaving the MEX function.
*/
mxGPUDestroyGPUArray(A);
mxGPUDestroyGPUArray(B);
}
this code is official cuda test code which i have add openmp test
  3 Comments
Fernando
Fernando on 23 Aug 2023
-liomp5 doesn't work in R2023a. The code compiles correctly in nvcc. Can you provide the actual line required when running mexcuda in Matlab??

Sign in to comment.

Answers (0)

Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!