Path: news.mathworks.com!newsfeed-00.mathworks.com!solaris.cc.vt.edu!news.vt.edu!news.glorb.com!postnews.google.com!v23g2000pro.googlegroups.com!not-for-mail
From: Bob Alvarez <ralvarez@spambob.net>
Newsgroups: comp.soft-sys.matlab
Subject: Re: Help in vectorizing code
Date: Tue, 28 Apr 2009 21:12:58 -0700 (PDT)
Organization: http://groups.google.com
Lines: 116
Message-ID: <25e7f014-50ea-44ca-a4df-24e731ad7e06@v23g2000pro.googlegroups.com>
References: <1e6f5074-edce-417a-b0a5-b62db0fb033f@w31g2000prd.googlegroups.com> 
	<128ed907-11c6-4e7a-a67a-736ececef060@s1g2000prd.googlegroups.com> 
	<gt5sqm$7mc$1@fred.mathworks.com> <gt6dqg$i52$1@fred.mathworks.com> 
	<gt7946$81g$1@fred.mathworks.com>
NNTP-Posting-Host: 216.103.212.87
Mime-Version: 1.0
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 7bit
X-Trace: posting.google.com 1240978378 31767 127.0.0.1 (29 Apr 2009 04:12:58 GMT)
X-Complaints-To: groups-abuse@google.com
NNTP-Posting-Date: Wed, 29 Apr 2009 04:12:58 +0000 (UTC)
Complaints-To: groups-abuse@google.com
Injection-Info: v23g2000pro.googlegroups.com; posting-host=216.103.212.87; 
	posting-account=pNrldAoAAAB5GlVBDJqvQa-_ca5yFHCO
User-Agent: G2/1.0
X-HTTP-UserAgent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) 
	Gecko/2009042316 Firefox/3.0.10 (.NET CLR 3.5.30729),gzip(gfe),gzip(gfe)
Xref: news.mathworks.com comp.soft-sys.matlab:536083


Some updates:

1.  There was a bug in the C code I posted. A new version is at the
end of this post.
2. I timed Roger's vectorized code, the for loop and the Mex code.
Results:
Elapsed time is 0.037361 seconds. % vectorized
Elapsed time is 0.019795 seconds. % for loop
Elapsed time is 0.027539 seconds. % Mex code
Doh! Apparently the JIT compiler is so good it beats the other 2
methods. The test matlab code is:

---
%% code from Roger Stafford of news group
lambda = 1000;
m=1000;
 R = poissrnd(lambda,[1,m]); % <- Or your simulated poisson source
 tic
 p = cumsum([1,R]);
 r = randn(1,p(m+1)-1); % <- Or some other ind. random variables
 s = [0,cumsum(r)];
 t = s(p(2:m+1))-s(p(1:m));
toc
clear p r s

%% do with for loop
tic
t2 = zeros(1,m);
for k = 1:m
     t2(k) = sum(randn(1,R(k)));
end
toc

%% do with mex code
tic
d = randn(m,max(R)+1);
t3 = sum2idx(d,R(:));
toc

----- sum2idx.c (updated) -----
/*
rowsums = sum2idx(M,idx)
sums rows of matrix M from 1:idx
where idx is a vector of length = # of rows of M
   REA 4/28/09
*/

#define V4_COMPAT
#include <matrix.h>  /* Matlab matrices */
#include <mex.h>

#include <stddef.h>  /* NULL */

#define notDblMtx(it) (!mxIsNumeric(it) || !mxIsDouble(it) ||
mxIsSparse(it) || mxIsComplex(it))

void mexFunction(int nlhs,	     /* Num return vals on lhs */
		 mxArray *plhs[],    /* Matrices on lhs      */
		 int nrhs,	     /* Num args on rhs    */
		 const mxArray *prhs[]     /* Matrices on rhs */
		 )
  {
  double temp, sumval;
  double *mtx,*idx,*outArray;
  int kcol, krow,idxval,nrows,ncols,nrows_idx,ncols_idx;
  mxArray *arg;

  if (nrhs != 2) mexErrMsgTxt("sum2idx: requires 2 arguments.");

  /* ARG 1: MATRIX  */
  arg = prhs[0];
  if notDblMtx(arg) mexErrMsgTxt("sum2idx: M arg must be a real non-
sparse matrix.");
  mtx = mxGetPr(arg);
  nrows = (int) mxGetM(arg);
  ncols = (int) mxGetN(arg);
  if ((nrows*ncols) == 0) mexErrMsgTxt("sum2idx: M must be a non-empty
matrix.");

  /* ARG 2: idx vector  */
  arg = prhs[1];
  if notDblMtx(arg) mexErrMsgTxt("sum2idx: idx arg must be a real non-
sparse 1D vector.");
  idx = mxGetPr(arg);
  nrows_idx = (int) mxGetM(arg);
  ncols_idx = (int) mxGetN(arg);
  /* assume the calling program forces idx to be a column vector*/
  if(ncols_idx!=1) mexErrMsgTxt("sum2idx: idx must be 1 D column
vector");
  if(nrows_idx!=nrows) mexErrMsgTxt("sum2idx: idx must be same length
as number of rows of M");

  /* do the sum */
  plhs[0] = (mxArray *) mxCreateDoubleMatrix((int)nrows,1,mxREAL);
  if (plhs[0] == NULL) mexErrMsgTxt("sum2idx: Error allocating result
matrix");
  //Get a pointer to the data space in our newly allocated memory
  outArray = mxGetPr(plhs[0]);
  for(krow=0;krow<nrows;krow++){
  	sumval=0.0;
        idxval = (int)idx[krow];
        if(idxval>ncols) mexErrMsgTxt("sum2idx: idx must be <= number
of columns of M");
        if(idxval>0){ //do the sum
            for(kcol=0;kcol<idxval;kcol++){
                sumval += mtx[(kcol*nrows)+krow];
            }
        }
        outArray[krow] = sumval;
  }


  return;
}