Path: news.mathworks.com!newsfeed-00.mathworks.com!newsfeed2.dallas1.level3.net!news.level3.com!postnews.google.com!y38g2000hsy.googlegroups.com!not-for-mail
From: Rune Allnor <allnor@tele.ntnu.no>
Newsgroups: comp.soft-sys.matlab
Subject: Re: combine data?
Date: Mon, 21 Jul 2008 16:09:43 -0700 (PDT)
Organization: http://groups.google.com
Lines: 162
Message-ID: <aa533ccd-c999-44c6-8392-d3c58c6f3476@y38g2000hsy.googlegroups.com>
References: <g5k9qk$kc5$1@fred.mathworks.com> <57f37a55-f46c-4456-b93a-23b4a1f981a6@d77g2000hsb.googlegroups.com>
NNTP-Posting-Host: 212.17.141.54
Mime-Version: 1.0
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 7bit
X-Trace: posting.google.com 1216681783 14713 127.0.0.1 (21 Jul 2008 23:09:43 GMT)
X-Complaints-To: groups-abuse@google.com
NNTP-Posting-Date: Mon, 21 Jul 2008 23:09:43 +0000 (UTC)
Complaints-To: groups-abuse@google.com
Injection-Info: y38g2000hsy.googlegroups.com; posting-host=212.17.141.54; 
User-Agent: G2/1.0
X-HTTP-UserAgent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET 
Xref: news.mathworks.com comp.soft-sys.matlab:480796



On 16 Jul, 12:37, Rune Allnor <all...@tele.ntnu.no> wrote:
> On 16 Jul, 10:00, "jay vaughan" <jvaughan5.nos...@gmail.com> wrote:
>
> > Hi,
>
> > I am trying to find a way tocombinedatawithout using a
> > loop.
>
> Sigh... the vixen of vectorisation in all her splendour...

It took a little bit of time to get this test done, but
at last some hard facts.

I implemented the three algorithms suggested in this thread:
Peter and Steven's use of ACCUMARRAY, my matlab code and
my C++ MEX program.

Below is the report from the profiler (R2006a) as well as
the code for the various files. My 'naive' matlab code
runs in half the time of the 'terse' code by Peter &
Steven - note that it is the call to UNIQUE which
takes the most time. My C++ MEX code runs in 10% of that
time again.

The C++ code is likely to become relatively slower
when there are a lot of different 'width' classes
(there are only 11 below); if there are only one or
two weights per width, the C++ code might in fact become
slower than alt least the 'naive' matlab code.

Conclusions? Don't mistake the number of typed
characters for run-time efficiency...

Rune


%%%%%  Profiler report   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Function                   Calls Total time Self-time

test                         1   0.698 s    0.070 s
unique                       2   0.458 s    0.458 s
alt2  (Peter & Steven)       1   0.386 s    0.027 s
alt1  (Rune)                 1   0.213 s    0.113 s
dataweights (MEX-function)   1   0.029 s    0.029 s
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%   test.m   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function test

N = 10;
M = 1000000;
width=int32(rand(M,1)*N);
weight=rand(M,1);

[wid1,wei1]=dataweights(width,weight);
[wid2,wei2]=alt1(width,weight);
[wid3,wei3]=alt2(width,weight);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%   alt1.m   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [wiv,wev]=alt1(width,weight)

wiv = unique(width);
wev = zeros(size(wiv));


for n=1:length(wev)
   idx=find(width==wiv(n));
   wev(n)=sum(weight(idx));
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%   alt2.m   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [wid,wei]=alt2(width,weight);

[wid, a, b] = unique(width);
wei=accumarray(b, weight).';
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

///  dataweights.cpp  //////////////////////////////////////////////
#include<map>
#include "mex.h"

using namespace std;

extern void _main();

void mexFunction(
		 int          nlhs,
		 mxArray      *plhs[],
		 int          nrhs,
		 const mxArray *prhs[]
		 )
{

    if (nrhs != 2)
    {
        mexErrMsgTxt("Function requires two arguments");
    }

    if (nlhs != 2)
    {
        mexErrMsgTxt("Function returns two results");
    }

    if ((mxGetClassID(prhs[0]) != mxINT32_CLASS)||
        (mxIsComplex(prhs[0])==true))
    {
        mexErrMsgTxt("Argument 1 must be real int32");
    }

    if ((mxGetClassID(prhs[1]) != mxDOUBLE_CLASS)||
        (mxIsComplex(prhs[1])==true))
    {
        mexErrMsgTxt("Argument 2 must be real doubles");
    }

    int MWeights = mxGetM(prhs[0]);
    int NWeights = mxGetN(prhs[0]);
    int MWidths = mxGetM(prhs[1]);
    int NWidths = mxGetN(prhs[1]);

    if (!((NWeights == 1) &&(NWidths == 1)))
    {
        mexErrMsgTxt("Arguments must be column vectors");
    }

    if (MWeights != MWidths)
    {
        mexErrMsgTxt("Arguments must be same lengths");
    }

    std::map<int,double> data;
    int* weight = (int*)mxGetData(prhs[0]);
    double* widths = (double*)mxGetData(prhs[1]);

    for (int n=0;n<MWeights;n++)
    {
        data[weight[n]]+=widths[n];
    }

    int dims[2];
    dims[0]=data.size();
    dims[1]=1;
    plhs[0]=mxCreateNumericArray(2,dims,mxINT32_CLASS,mxREAL);
    plhs[1]=mxCreateNumericArray(2,dims,mxDOUBLE_CLASS,mxREAL);

    weight = (int*)mxGetData(plhs[0]);
    widths = (double*)mxGetData(plhs[1]);
    int i = 0;
    std::map<int,double>::const_iterator j;
    for (j=data.begin();j!=data.end();++j)
    {
        weight[i]=(*j).first;
        widths[i]=(*j).second;
        ++i;
    }

    return;
}
///////////////////////////////////////////////////////////////////