| Description of slpartitionpca |
slpartitionpca
PURPOSE 
SLPARTITIONPCA Performs Partition-based PCA and saves the models
SYNOPSIS 
function slpartitionpca(data, arrsiz, n, ps, filepath, varargin)
DESCRIPTION 
CROSS-REFERENCE INFORMATION 
This function calls:
- disp DISP displays the dataset fields
- sladdvec SLADDVEC adds a vector to columns or rows of a matrix
- slsymeig SLSYMEIG Compute the eigenvalues and eigenvectors for symmetric matrix
- slreadarray SLREADARRAY Reads an array from an array file
- slwritearray SLWRITEARRAY Writes an array to an array file
- slcov SLCOV Compute the covariance matrix
- slarrmean SLARRMEAN Computes the mean of a set of arrays
- raise_lackinput RAISE_LACKINPUT Raises an error indicating lack of input argument
- slallsubinds SLALLSUBINDS Generate all sub-indices for all elements of the array
- slignorevars SLIGNOREVARS Ignores the input variables
- slparblocks SLPARBLOCKS Gets the blocks from partition structure
- slparseprops SLPARSEPROPS Parses input parameters
- slrange2indcells SLRANGE2INDCELLS Converts a range array to indices cell array
This function is called by:
SUBFUNCTIONS 
- function C = compute_localcov(data, meanarr, rangecell, d, n, w)
- function V = generate_pvecs(data, rangecell, oridim, subdim, n, meanarr, projmat)
- function check_valuerange(var, name, minval, maxval)
- function str = generate_indexstring(nums, inds)
- function rk = decide_rank(evals, er)
- function showinfo(message, opts)
SOURCE CODE 
0001 function slpartitionpca(data, arrsiz, n, ps, filepath, varargin)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121 if nargin < 5
0122 raise_lackinput('slpartitionpca', 5);
0123 end
0124 arrsiz = arrsiz(:)';
0125 if ~ischar(filepath)
0126 error('sltoolbox:invalidarg', ...
0127 'The filepath should be a char string');
0128 end
0129 arrdim = length(arrsiz);
0130
0131 if length(ps) ~= arrdim
0132 error('sltoolbox:sizmismatch', ...
0133 'The dimension of partition structure is not consisitent with the array unit');
0134 end
0135
0136 opts.combmodel = true;
0137 opts.er0 = 0.99;
0138 opts.er1 = 0.95;
0139 opts.mixev = 0;
0140 opts.meanarr = [];
0141 opts.weights = [];
0142 opts.verbose = true;
0143 opts = slparseprops(opts, varargin{:});
0144
0145 check_valuerange(opts.er0, 'er0', 0, 1);
0146 check_valuerange(opts.er1, 'er1', 0, opts.er0);
0147 check_valuerange(opts.mixev, 'mixev', 0, 1);
0148
0149 meanarr = opts.meanarr;
0150 if ~isempty(meanarr)
0151 if ~isequal(size(meanarr), arrmatdim)
0152 error('sltoolbox:sizmismatch', ...
0153 'The mean array offered is not consistent with the array unit size');
0154 end
0155 end
0156
0157 if ~isempty(opts.weights)
0158 opts.weights = opts.weights(:);
0159 if length(opts.weights) ~= n
0160 error('sltoolbox:sizmismatch', ...
0161 'The length of given weights is inconsistent with the number of samples');
0162 end
0163 hasweights = true;
0164 else
0165 hasweights = false;
0166 end
0167
0168
0169
0170
0171
0172 showinfo('Initializing Blocks ...', opts);
0173
0174
0175 parstruct = ps;
0176
0177 blocks = slparblocks(parstruct);
0178 blkinds = slallsubinds(size(blocks));
0179 NBlks = numel(blocks);
0180
0181 projfiles = cell(size(blocks));
0182 for i = 1 : NBlks
0183 projfiles{i} = [filepath, '.proj.', generate_indexstring(size(blocks), blkinds(:, i)')];
0184 end
0185
0186 if opts.combmodel
0187 combprojfile = [filepath, '.proj.comb'];
0188 else
0189 combprojfile = [];
0190 end
0191
0192 clear curinds currange;
0193
0194
0195
0196
0197 showinfo('Preparing Info Structure ...', opts);
0198
0199 energy.total = 0;
0200 energy.intpreserved = 0;
0201 energy.combpreserved = 0;
0202 energy.par = zeros(size(blocks));
0203 energy.parpreserved = zeros(size(blocks));
0204
0205 diminfo.size = arrsiz;
0206 diminfo.oridim = prod(arrsiz);
0207 diminfo.dims = zeros(size(blocks));
0208 diminfo.subdims = zeros(size(blocks));
0209 diminfo.intdim = 0;
0210 diminfo.feadim = 0;
0211
0212
0213
0214
0215
0216 if isempty(meanarr)
0217
0218 showinfo('Computing Mean Array ...', opts);
0219
0220 if ~hasweights
0221 meanarr = slarrmean(data, arrsiz, n);
0222 else
0223 meanarr = slarrmean(data, arrsiz, n, 'weights', opts.weights);
0224 end
0225 end
0226
0227
0228
0229
0230 showinfo('Learning Individual PCA Models ...', opts);
0231
0232 evalset = cell(size(blocks));
0233 for ib = 1 : NBlks
0234
0235 showinfo(sprintf(' PCA Model %d', ib), opts);
0236
0237 curblk = blocks{ib};
0238 rgncell = slrange2indcells(curblk);
0239 vecd = prod(curblk(2,:) - curblk(1,:) + 1);
0240
0241
0242 blkcov = compute_localcov(data, meanarr, rgncell, vecd, n, opts.weights);
0243
0244
0245 [evals, evecs] = slsymeig(blkcov);
0246
0247
0248 currk = sum(evals >= eps(evals(1)) / 10);
0249 evals = evals(1:currk);
0250 evecs = evecs(:, 1:currk);
0251
0252
0253 evalset{ib} = evals;
0254 slwritearray(evecs, projfiles{ib});
0255
0256 energy.par(ib) = sum(evals);
0257 diminfo.dims(ib) = vecd;
0258
0259 end
0260
0261 energy.total = sum(energy.par(:));
0262
0263
0264
0265
0266 showinfo('Performing Principal Components Selection ...', opts);
0267
0268 if opts.mixev == 0
0269
0270 showinfo(' Use Separate Strategy', opts);
0271
0272 for ib = 1 : NBlks
0273
0274 evals = evalset{ib};
0275 currk = decide_rank(evals, opts.er0);
0276
0277 evals = evals(1:currk);
0278 evalset{ib} = evals;
0279
0280 evecs = slreadarray(projfiles{ib});
0281 evecs = evecs(:, 1:currk);
0282 slwritearray(evecs, projfiles{ib});
0283
0284 energy.parpreserved(ib) = sum(evals);
0285 diminfo.subdims(ib) = currk;
0286
0287 clear evals evecs
0288 end
0289
0290 else
0291
0292 showinfo(' Use Mix-Up Strategy', opts);
0293
0294 showinfo(' Individual Preservation', opts);
0295
0296
0297 if opts.mixev < 1
0298 er0 = opts.er0 * (1 - opts.mixev);
0299 for ib = 1 : NBlks
0300 evals = evalset{ib};
0301 diminfo.subdims(ib) = decide_rank(evals, er0);
0302 energy.parpreserved(ib) = sum(evals(1:diminfo.subdims(ib)));
0303 end
0304 else
0305 diminfo.subdims(:) = 1;
0306 for ib = 1 : NBlks
0307 evals = evalset{ib};
0308 energy.parpreserved(ib) = evals(1);
0309 end
0310 end
0311
0312 indv_preserved = sum(energy.parpreserved(:));
0313
0314
0315 if indv_preserved < energy.total * opts.er0
0316
0317 showinfo(' Collecting Information for Mix-up ...', opts);
0318
0319
0320 rest_target_energy = energy.total * opts.er0 - indv_preserved;
0321
0322
0323 restevnums = zeros(NBlks, 1);
0324 for ib = 1 : NBlks
0325 evals = evalset{ib};
0326 restevnums(ib) = length(evals) - diminfo.subdims(ib);
0327 end
0328 total_restevnum = sum(restevnums(:));
0329
0330 evspool = zeros(total_restevnum, 1);
0331 evsbid = zeros(total_restevnum, 1);
0332 cn = 0;
0333 for ib = 1 : NBlks
0334 if restevnums(ib) > 0
0335 evals = evalset{ib};
0336 evspool(cn+1:cn+restevnums(ib)) = ...
0337 evals(diminfo.subdims(ib)+1:end);
0338 evsbid(cn+1:cn+restevnums(ib)) = ib;
0339 cn = cn + restevnums(ib);
0340 end
0341 end
0342
0343 showinfo(' Overall Ranking ...', opts);
0344
0345
0346 [evspool, sortord] = sort(evspool, 'descend');
0347 evsbid = evsbid(sortord);
0348
0349
0350 er0 = rest_target_energy / sum(evspool);
0351 rk = decide_rank(evspool, er0);
0352
0353 showinfo(' Updating Selection ...', opts);
0354
0355
0356 evsbid = evsbid(1:rk);
0357 for ib = 1 : NBlks
0358 knew = sum(evsbid == ib);
0359 if knew > 0
0360 evals = evalset{ib};
0361 diminfo.subdims(ib) = diminfo.subdims(ib) + knew;
0362 energy.parpreserved(ib) = sum(evals(1:diminfo.subdims(ib)));
0363 evalset{ib} = evals(1:diminfo.subdims(ib));
0364 end
0365 end
0366
0367
0368
0369 showinfo(' Truncating Projections ...', opts);
0370 for ib = 1 : NBlks
0371 pfn = projfiles{ib};
0372 curprojmat = slreadarray(pfn);
0373 curprojmat = curprojmat(:, 1:diminfo.subdims(ib));
0374 slwritearray(curprojmat, pfn);
0375 end
0376
0377 clear restevnums total_restevnum rest_target_energy;
0378 clear evsbid evspool sortord;
0379
0380 end
0381
0382 end
0383
0384 energy.intpreserved = sum(energy.parpreserved(:));
0385 diminfo.intdim = sum(diminfo.subdims(:));
0386
0387
0388
0389
0390 if opts.combmodel
0391
0392 showinfo('Learning Combined PCA Model ...', opts);
0393
0394 showinfo(' Generating Intermediate Stacked Vectors ...', opts);
0395
0396
0397 intvecs = zeros(diminfo.intdim, n);
0398 dc = 0;
0399 for ib = 1 : NBlks
0400
0401 curblk = blocks{ib};
0402 rgncell = slrange2indcells(curblk);
0403 vecd = prod(curblk(2,:) - curblk(1,:) + 1);
0404 cursubdim = diminfo.subdims(ib);
0405
0406 projmat = slreadarray(projfiles{ib});
0407
0408 V = generate_pvecs(data, rgncell, vecd, cursubdim, n, meanarr, projmat);
0409
0410 intvecs(dc+1:dc+cursubdim, :) = V;
0411 dc = dc + cursubdim;
0412
0413 clear projmat V;
0414 end
0415
0416 showinfo(' Learning Combined PCA ...', opts);
0417
0418
0419 covcomb = slcov(intvecs, opts.weights, 0);
0420 [combevals, evecs] = slsymeig(covcomb);
0421
0422 showinfo(' Truncating Combined PCA ...', opts);
0423
0424
0425 rk = decide_rank(combevals, (opts.er1 * energy.total) / energy.intpreserved);
0426 combevals = combevals(1:rk);
0427 combproj = evecs(:,1:rk);
0428 slwritearray(combproj, combprojfile);
0429
0430 energy.combpreserved = sum(combevals);
0431 diminfo.feadim = rk;
0432
0433 else
0434 energy.combpreserved = energy.intpreserved;
0435 diminfo.feadim = diminfo.intdim;
0436
0437 combevals = [];
0438 slignorevars(combevals);
0439 end
0440
0441
0442
0443
0444 showinfo('Outputing Core file ...', opts);
0445
0446 corefilename = [filepath, '.mat'];
0447
0448
0449 dstdir = fileparts(filepath);
0450 if isempty(dstdir)
0451 pathprelen = 0;
0452 elseif dstdir(end) ~= '\'
0453 pathprelen = length(dstdir) + 1;
0454 else
0455 pathprelen = length(dstdir);
0456 end
0457
0458 if pathprelen > 0
0459 for ib = 1 : NBlks
0460 fn = projfiles{ib};
0461 fn = fn(pathprelen+1:end);
0462 projfiles{ib} = fn;
0463 end
0464 if ~isempty(combprojfile)
0465 combprojfile = combprojfile(pathprelen+1:end);
0466 end
0467 end
0468
0469 slignorevars(combprojfile);
0470
0471
0472 save(corefilename, ...
0473 'parstruct', ...
0474 'blocks', ...
0475 'projfiles', ...
0476 'combprojfile', ...
0477 'meanarr', ...
0478 'energy', ...
0479 'diminfo', ...
0480 'evalset', ...
0481 'combevals', ...
0482 '-v6');
0483
0484
0485
0486
0487
0488
0489 function C = compute_localcov(data, meanarr, rangecell, d, n, w)
0490
0491 localmean = meanarr(rangecell{:});
0492 localmean = reshape(localmean, [d, 1]);
0493
0494 if isnumeric(data)
0495
0496 localarr = data(rangecell{:}, :);
0497 localarr = reshape(localarr, [d, n]);
0498
0499 C = slcov(localarr, w, localmean);
0500
0501 else
0502
0503 C = zeros(d, d);
0504 nfiles = length(data);
0505
0506 cf = 0;
0507 for i = 1 : nfiles
0508 localarr = slreadarray(data{i});
0509 curn = size(localarr, length(rangecell)+1);
0510
0511 if isempty(w)
0512 curw = [];
0513 tw = curn;
0514 else
0515 curw = w(cf+1:cf+curn);
0516 tw = sum(curw);
0517 end
0518
0519 curcov = compute_localcov(localarr, meanarr, rangecell, d, curn, curw);
0520 C = C + curcov * tw;
0521
0522 cf = cf + curn;
0523 end
0524
0525 if cf ~= n
0526 error('sltoolbox:sizmismatch', ...
0527 'The total number of units in the set of array files is not n');
0528 end
0529
0530 if isempty(w)
0531 C = C / n;
0532 else
0533 C = C / sum(w);
0534 end
0535
0536 end
0537
0538
0539 function V = generate_pvecs(data, rangecell, oridim, subdim, n, meanarr, projmat)
0540
0541 if isnumeric(data)
0542
0543 localarr = data(rangecell{:}, :);
0544 localarr = reshape(localarr, [oridim, n]);
0545 localmean = meanarr(rangecell{:});
0546 localmean = reshape(localmean, [oridim, 1]);
0547 D = sladdvec(localarr, -localmean, 1);
0548 clear localmean localarr;
0549 V = projmat' * D;
0550
0551 if size(V, 1) ~= subdim
0552 error('sltoolbox:sizmismatch', ...
0553 'Inconsistent sub dimension');
0554 end
0555
0556 else
0557
0558 V = zeros(subdim, n);
0559 nfiles = length(data);
0560 cf = 0;
0561 for i = 1 : nfiles
0562 curdata = slreadarray(data{i});
0563 curn = size(curdata, length(rangecell) + 1);
0564 curV = generate_pvecs(curdata, rangecell, oridim, subdim, curn, meanarr, projmat);
0565 clear curdata;
0566 V(:, cf+1:cf+curn) = curV;
0567 cf = cf + curn;
0568 end
0569
0570 if cf ~= n
0571 error('sltoolbox:sizmismatch', ...
0572 'The total number of units in the set of array files is not n');
0573 end
0574 end
0575
0576
0577
0578
0579
0580 function check_valuerange(var, name, minval, maxval)
0581
0582 if var < minval || var > maxval
0583 error('sltoolbox:outofrange', ...
0584 'The variable %s should be between %f and %f', ...
0585 name, minval, maxval);
0586 end
0587
0588
0589 function str = generate_indexstring(nums, inds)
0590
0591 d = length(nums);
0592 if any(inds <= 0) || any(inds > nums)
0593 error('The indices are beyond boundary');
0594 end
0595
0596
0597 pats = cell(1, d);
0598 for i = 1 : d
0599 curlen = length(int2str(nums(i)));
0600 pats{i} = sprintf('%%0%dd.', curlen);
0601 end
0602
0603
0604 sstrs = cell(1, d);
0605 for i = 1 : d
0606 sstrs{i} = sprintf(pats{i}, inds(i));
0607 end
0608
0609
0610 str = strcat(sstrs{:});
0611 str(end) = [];
0612
0613
0614 function rk = decide_rank(evals, er)
0615
0616 cumevs = cumsum(evals);
0617 rk = min(sum(cumevs < sum(evals) * er) + 1, length(evals));
0618
0619
0620 function showinfo(message, opts)
0621
0622 if opts.verbose
0623 disp(message);
0624 end
0625
0626
0627
Generated on Wed 20-Sep-2006 12:43:11 by m2html © 2003
|
|