function [Z_scores p_vals] = gic_pca(peak_list, C, C_axes, SVD_info)
% [Z_scores p_vals] = gic_pca(peak_list, C, C_axes, svd_info)
%     perform PCA analysis for the peak_list (with columns in ppm for each dimension) associated with the GIC spectrum 
%     represented by C, C_axes and svd_info as returned by the function covar.m

num_peaks = size(peak_list,1);

size_row = C_axes(26,:);
num_dims = sum(size_row > 1);
acptr_dims = find(C_axes(27,:) == 1); 
donor_dims = find(C_axes(27,:) == -1);

if(isstr(C)) % output from covar rather than being covar spec is string pointing to file with output info
    [S_size U_size V_size C_size svd_power Z_flag sigma_A sigma_B temp_array rows2use cols2use S U] = read_tmp_file(C);

	clear V_size; % we won't actually use these
    clear temp_array;
    clear svd_info;
else
	clear C; % we don't actually need C -- it's more consistent just to recompute it    
    U = SVD_info.U;
    S = SVD_info.S;
    svd_power = SVD_info.svd_power;
    
    rows2use(1) = 1;
    rows2use(2) = prod(size_row(acptr_dims));
    cols2use(1) = rows2use(2) + 1;
    cols2use(2) = rows2use(2) + prod(size_row(donor_dims));
end

U_acc = U(rows2use(1):rows2use(2),:);
U_don = U(cols2use(1):cols2use(2),:);
clear U;

transformed_S = diag(S.^(svd_power))'; % raise S to appropriate power for SVD (also only need diagonal)
peak_list = ppm2inc(peak_list,C_axes);
peak_list = [peak_list(:,1:num_dims) ones(num_peaks,4-num_dims)]; % pad peak list with ones
acc_idcs = subs2idcs(size_row(acptr_dims),peak_list(:,acptr_dims));
don_idcs = subs2idcs(size_row(donor_dims),peak_list(:,donor_dims));
    
intensities = sum(U_acc(acc_idcs,:).*repmat(transformed_S,num_peaks,1).*U_don(don_idcs,:),2);  

transformed_S = diag((S.^(2*svd_power)).*log(S))';
clear S;

slopes = (2./intensities).*sum(U_acc(acc_idcs,:).*repmat(transformed_S,num_peaks,1).*U_don(don_idcs,:),2);  
% scatter(intensities,slopes)

clear transformed_S; % clean up to free memory

% [coeff_mat raw_scores] = princomp([intensities slopes]); % code below is more compatible, explicit version of PCA 
dIdl_v_I = [intensities slopes];
col_aves = repmat(mean(dIdl_v_I,1),size(dIdl_v_I,1),1);
[U S coeff_mat] = svd((dIdl_v_I - col_aves),'econ');
raw_scores = (dIdl_v_I - col_aves)*coeff_mat;
Z_scores = (raw_scores - repmat(mean(raw_scores),num_peaks,1))./repmat(std(raw_scores),num_peaks,1); 
p_vals = 1 - normcdf(Z_scores);  
% scatter(Z_scores(:,1),Z_scores(:,2))


function idcs = subs2idcs(the_size_row,subs_array)
% helper function to "matrix-ize" sub2ind so that you don't need to pass in
% separate arrays for each dimension
% note that size(subs_array,2) should equal length(the_size_row)

cur_num_dims = length(the_size_row);
switch cur_num_dims
    case {1} % no conversion necessary
        idcs = subs_array;
    case {2}
        idcs = sub2ind(the_size_row,subs_array(:,1),subs_array(:,2));
    case{3} % shouldn't have more than 3 donor or accptr dims, so this is all the cases we need 
        idcs = sub2ind(the_size_row,subs_array(:,1),subs_array(:,2),subs_array(:,3));
end