Help with my mex function output from cudamemcpy2D
20 views (last 30 days)
Show older comments
Hi I am writing a very basic CUDA code where I am sending an input via matlab, copying it to gpu and then copying it back to the host and calling that output via mex file. But I am getting all 0s. I am still getting to know CUDA but would anyone know why this would happen? Here is the code:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <iostream>
#include <algorithm>
#include <math.h>
#include "mex.h"
#include <vector>
#include <cuda.h>
using namespace std;
//#define CCE checkCudaErrors
#define HtoD cudaMemcpyHostToDevice
#define DtoH cudaMemcpyDeviceToHost
void matData(mxArray **A, short *h_raw, int nrows, int ncols, int nframes, short *tof, short *distance_array, int pix_x, int pix_y, int elements) {
int gpuID;
getFirstAvailableGPU(&gpuID);
short *new_data = h_raw;
cudaSetDevice(gpuID);
short* d_ptr;// = 0;
size_t pitchInBytes;// = sizeof(short)*nrows;
size_t h_pitchInBytes = sizeof(short) * nrows;
cudaMallocPitch((void **)&d_ptr, &pitchInBytes, nrows * sizeof(short),ncols*nframes);
cudaMemcpy2D(d_ptr, pitchInBytes, new_data, h_pitchInBytes,sizeof(short) * nrows, ncols*nframes, HtoD);
int dim_x = nrows;
int dim_y = ncols;
int dim_z = nframes;
int total_val = dim_x * dim_y*dim_z;
int nFrames = nframes;
double *img_PA{ new double[pix_x*pix_y]{} };// initializing it to 0
dim3 B(256, 1, 1);
dim3 G((nrows - 1) / B.x + 1, (ncols - 1) / B.y + 1,
(nframes - 1) / B.z + 1);
int ndim2 = 2;
const mwSize dims_n4[] = {nrows,ncols };
*A = mxCreateNumericArray(ndim2, dims_n4, mxSINGLE_CLASS, mxREAL);
short* res = (short *)mxGetData(*A);
size_t pitchInBytes_PAM = sizeof(short)*nrows;
size_t pitchInByes_PAM_h = sizeof(short) * nrows;
cudaMemcpy2D(res, pitchInBytes_PAM, d_ptr, pitchInByes_PAM_h, sizeof(short) * nrows, ncols * nframes, DtoH);
return;
}
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
if (nrhs == 0 || !mxIsInt16(prhs[0])) {
mexErrMsgTxt("Input must be int16");
}
const mwSize *dims2;
size_t numberOfElements;
short *h_raw = (short *)mxGetData(prhs[0]);
short *tof = (short *)mxGetData(prhs[1]);
short *shifted_data = (short *)mxGetData(prhs[2]);
short *distance_array = (short *)mxGetData(prhs[3]);
const mwSize *dimensions_rawdat = mxGetDimensions(prhs[0]);
int nrows = dimensions_rawdat[0];
int ncols = dimensions_rawdat[1];
int nframes = dimensions_rawdat[2];
if (mxGetNumberOfElements(prhs[0]) < nrows * ncols) {
mexErrMsgTxt("Not enough elements");
}
const mwSize *dimensions = mxGetDimensions(prhs[1]);
int pix_x = dimensions[0];
int pix_y = dimensions[1];
int elements = dimensions[2];
int ndim1 = 1;
int ndim = 3;
int ndim2 = 2;
int ndim3 = 3;
const mwSize dims[] = { nrows, ncols, nframes };
const mwSize dims_n[] = { nrows, ncols };
const mwSize dims_n3[] = { ncols, nrows };
const mwSize dims_n2[] = { pix_x,pix_y,elements };
const mwSize dims_n4[] = { pix_x,pix_y };
const mwSize dims_sum[] = { nrows };
const mwSize dims_sume[] = { elements,elements };
const mwSize dims_sum_sum[] = { 1 };
matData(&plhs[0], h_raw, nrows, ncols, nframes, tof, distance_array, pix_x, pix_y, elements);
return;
}
0 Comments
Accepted Answer
James Tursa
on 19 Apr 2020
Why is this created as a single class, and then a short pointer is used to access it?
*A = mxCreateNumericArray(ndim2, dims_n4, mxSINGLE_CLASS, mxREAL);
short* res = (short *)mxGetData(*A);
That's a mismatch of data type and pointer type.
3 Comments
Joss Knight
on 25 Apr 2020
You have specified pitchInBytes for both host and device pitches, but the pitch for the host arrays remains nrows*sizeof(short), whereas for the device arrays it has been modified by cudaMallocPitch.
You then proceed to ignore the pitch of the data in your kernel and so are copying data from and to the wrong addresses.
More Answers (0)
See Also
Categories
Find more on Standalone Applications in Help Center and File Exchange
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!