function [Z_scores p_vals] = gic_pca(peak_list, C, C_axes, SVD_info) % [Z_scores p_vals] = gic_pca(peak_list, C, C_axes, svd_info) % perform PCA analysis for the peak_list (with columns in ppm for each dimension) associated with the GIC spectrum % represented by C, C_axes and svd_info as returned by the function covar.m num_peaks = size(peak_list,1); size_row = C_axes(26,:); num_dims = sum(size_row > 1); acptr_dims = find(C_axes(27,:) == 1); donor_dims = find(C_axes(27,:) == -1); if(isstr(C)) % output from covar rather than being covar spec is string pointing to file with output info [S_size U_size V_size C_size svd_power Z_flag sigma_A sigma_B temp_array rows2use cols2use S U] = read_tmp_file(C); clear V_size; % we won't actually use these clear temp_array; clear svd_info; else clear C; % we don't actually need C -- it's more consistent just to recompute it U = SVD_info.U; S = SVD_info.S; svd_power = SVD_info.svd_power; rows2use(1) = 1; rows2use(2) = prod(size_row(acptr_dims)); cols2use(1) = rows2use(2) + 1; cols2use(2) = rows2use(2) + prod(size_row(donor_dims)); end U_acc = U(rows2use(1):rows2use(2),:); U_don = U(cols2use(1):cols2use(2),:); clear U; transformed_S = diag(S.^(svd_power))'; % raise S to appropriate power for SVD (also only need diagonal) peak_list = ppm2inc(peak_list,C_axes); peak_list = [peak_list(:,1:num_dims) ones(num_peaks,4-num_dims)]; % pad peak list with ones acc_idcs = subs2idcs(size_row(acptr_dims),peak_list(:,acptr_dims)); don_idcs = subs2idcs(size_row(donor_dims),peak_list(:,donor_dims)); intensities = sum(U_acc(acc_idcs,:).*repmat(transformed_S,num_peaks,1).*U_don(don_idcs,:),2); transformed_S = diag((S.^(2*svd_power)).*log(S))'; clear S; slopes = (2./intensities).*sum(U_acc(acc_idcs,:).*repmat(transformed_S,num_peaks,1).*U_don(don_idcs,:),2); % scatter(intensities,slopes) clear transformed_S; % clean up to free memory % [coeff_mat raw_scores] = princomp([intensities slopes]); % code below is more compatible, explicit version of PCA dIdl_v_I = [intensities slopes]; col_aves = repmat(mean(dIdl_v_I,1),size(dIdl_v_I,1),1); [U S coeff_mat] = svd((dIdl_v_I - col_aves),'econ'); raw_scores = (dIdl_v_I - col_aves)*coeff_mat; Z_scores = (raw_scores - repmat(mean(raw_scores),num_peaks,1))./repmat(std(raw_scores),num_peaks,1); p_vals = 1 - normcdf(Z_scores); % scatter(Z_scores(:,1),Z_scores(:,2)) function idcs = subs2idcs(the_size_row,subs_array) % helper function to "matrix-ize" sub2ind so that you don't need to pass in % separate arrays for each dimension % note that size(subs_array,2) should equal length(the_size_row) cur_num_dims = length(the_size_row); switch cur_num_dims case {1} % no conversion necessary idcs = subs_array; case {2} idcs = sub2ind(the_size_row,subs_array(:,1),subs_array(:,2)); case{3} % shouldn't have more than 3 donor or accptr dims, so this is all the cases we need idcs = sub2ind(the_size_row,subs_array(:,1),subs_array(:,2),subs_array(:,3)); end