mex cuda code for matrix multiplication
Show older comments
Hello,
I made a simple mex cuda code to calculate multiplication of two matrices of size NxN but never get the same results as in matlab command
C = A*B except for B is a diagonal matrix. Just wondering if I was wrong at any point? The code is attached. Thanks.
#include "mex.h"
#include "cuda_runtime.h"
// CUDA kernel for matrix multiplication
__global__ void matrixMultiplication(const double* A, const double* B, double* C, int N) {
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
if (row < N && col < N) {
double sum = 0;
for (int i = 0; i < N; i++) {
sum += A[row * N + i] * B[i * N + col];
}
C[row * N + col] = sum;
}
}
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
/* Declare all variables */
const double* A;
const double* B;
double* C;
double* d_A = nullptr;
double* d_B = nullptr;
double* d_C = nullptr;
int N;
A = mxGetPr(prhs[0]);
B = mxGetPr(prhs[1]); // Use prhs[1] for matrix B
N = mxGetN(prhs[0]); // Get the number of columns of matrix A
/* Initialize output array */
plhs[0] = mxCreateDoubleMatrix((mwSize)N, (mwSize)N, mxREAL);
C = mxGetPr(plhs[0]);
// Allocate device memory for matrices A, B, and C
cudaMalloc((void**)&d_A, N * N * sizeof(double));
cudaMalloc((void**)&d_B, N * N * sizeof(double));
cudaMalloc((void**)&d_C, N * N * sizeof(double));
// Copy matrices A and B from host to device
cudaMemcpy(d_A, A, N * N * sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, N * N * sizeof(double), cudaMemcpyHostToDevice);
// Define grid and block dimensions
dim3 dimGrid(128, 128);
dim3 dimBlock(1, 1);
// dim3 dimGrid((N + 15) / 16, (N + 15) / 16); // Adjust block size as needed
// dim3 dimBlock(16, 16);
// Launch the CUDA kernel
matrixMultiplication << <dimGrid, dimBlock >> > (d_A, d_B, d_C, N);
cudaDeviceSynchronize();
// Copy the result matrix C from device to host
cudaMemcpy(C, d_C, N * N * sizeof(double), cudaMemcpyDeviceToHost);
// Free device memory
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
}
Accepted Answer
More Answers (0)
Categories
Find more on Matrix Indexing in Help Center and File Exchange
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!