Code covered by the BSD License  

Highlights from
ProcessNetwork Version 1.0 Software

image thumbnail

ProcessNetwork Version 1.0 Software

by

 

Functions for the delineation of Dynamical Process Networks using Information Theory

ProcessNetwork (configfile,inputfile,lagfile)
% This is the 11th version of main function (Dec 1 2011)
% This version is tweaked by Dr. Ruddell and adds storage of local and
% global bins, averages, and bounds

function [R] = ProcessNetwork (configfile,inputfile,lagfile)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%   SETTINGS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% READ PARAMETER INPUTS FROM THE CONFIGURATION FILE
fid = fopen(configfile); %open the file as file ID "fid"
inputs = fscanf(fid, '%f %*s'); %put the contents of the file into the "inputs" vector, recording the first floating point on each line, then skipping the first string (a comment)
fclose(fid);

% SET PARAMETERS BASED ON WHAT'S IN THE CONFIGURATION FILE
% [nLags] = inputs(1); %the longest lag Lag we want to go to in our analysis- starting at 0, which would mean there is only one calculation made and T doesn't work.
[NoDataCode] = inputs(1); %tuples with this value anywhere in them will be skipped in entropy computations. Must be an integer that can't possibly be a bin #, i.e. negative integer! recommend -9999.
[nTests] = inputs(2); % number of shuffles to do; 100 is best. Must be integer valued one or higher. Otherwise shuffled and split tests will then give "NaN".
[oneTailZ] = inputs(3); % one-tail z-score for 95% given number of tests, 1.66 for 100, 1.68 for 50, 1.71 for 25
[nBins] = inputs(4); %set the number of bins used to classify each signal here
[trimTheData] = inputs(5); %set to 0 to not trim the whole row if one data value in the row is NoData; default is 1
[transformation] = inputs(6); %set to 0 to not use transformation, 1 to use anomaly transformation; other transforms not implemented yet
[anomalyMovingAveragePeriodNumber] = inputs(7); %the moving window used for anomaly generation, in #periods
[anomalyPeriodInData] = inputs(8); %set to the length in time steps of the period of the data
[workspaceOutput] = inputs(9); % workspace output file format selection
[textOutput] = inputs(10); % text output file format selection
[mfileOutput] = inputs(11); % mfile output file format selection
[debugFlag] = inputs(12); %whether or not to display timing, processing, and debug information; 0 means off, 1 means on
[parallelWorkers] = inputs(13); %parallel CPU matlab toolbox flag; if zero or one no parallelization is used, if a positive integer will attempt to open this number of workers using parallel toolbox
[binType] = inputs(14); %global flag; if 0 use locally bounded bins, if 1 use globally bounded bins
% [splitFraction] = inputs(14); %the fraction of the data to use in splitting signal strength tests; bounded as a real floating point number between zero and one (a positive fraction). Recommended value is close to 1, for example 0.90.
% [lagType] = inputs(14); %type of lag structure to use; 1=simple timesteps starting at zero, 2=doubling timesteps starting at zero, 3=factors of ten timesteps starting at zero

%QUALITY CHECK THESE INPUT PARAMETERS HERE...

% CONFIGURE DEBUG SCREEN OUTPUT AND LOG FILE SETTINGS
if (debugFlag) warning off; end

% SET UP THE STORAGE FOLDER FOR OUTPUTS
pname = sprintf('Results_workspace_%s',datestr(clock));
pname(pname==' ') = ['_'];
pname(pname==':') = ['-'];
system(sprintf('mkdir %s',pname));
time_log_w(configfile,[pname '\']);                               % Write time log file, copy config settings
% time_log_w(inputfile,[pname '\']);                               % Write time log file, copy inputfile settings %%%%make this work!!!!

% OBTAIN THE NUMBER OF FILES IN THE INPUT DATASET
try
    fid = fopen(inputfile);
    [~,nDataFiles] = fscanf(fid, '%s');
    fclose(fid);
catch
    fprintf(1, 'FATAL ERROR: input file loading problem \n');  %%%%%%%%%%LOG FILE THIS
    return
end

% OBTAIN THE LAGS IN THE LAG FILE
try
    fid = fopen(lagfile); %open the file as file ID "fid"
%     lags = fscanf(fid, '%u %*s'); %put the contents of the file into the "lags" vector, recording the first unsigned integer on each line, then skipping the first string (a comment)
    lags = fscanf(fid, '%u'); %put the contents of the file into the "lags" vector, recording the first unsigned integer on each line, then skipping the first string (a comment)
    fclose(fid);
catch
    fprintf(1, 'FATAL ERROR: lag file loading problem \n');  %%%%%%%%%%LOG FILE THIS
    return
end    
[nLags,~]=size(lags);
if nLags < 1
    fprintf(1, 'FATAL ERROR: lag file contains no lags \n');  %%%%%%%%%%LOG FILE THIS
    return
end
R.lagVect=zeros(nLags+1,1);
R.lagVect(2:nLags+1)=lags;
for i=1:nLags
    if ~isfloat(R.lagVect(i+1))
        fprintf(1, 'FATAL ERROR: non-float lag in lag file \n');  %%%%%%%%%%LOG FILE THIS  
        return
    end
    if R.lagVect(i+1) < 0 
        fprintf(1, 'FATAL ERROR: negative lag in lag file \n');  %%%%%%%%%%LOG FILE THIS  
        return
    end
    if R.lagVect(i+1) <= R.lagVect(i)
        fprintf(1, 'FATAL ERROR: non-monotonically-increasing lag in lag file \n');  %%%%%%%%%%LOG FILE THIS  
        return
    end
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% INITIALIZE RESULTS STRUCTURE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% INITIALIZE INDEX QUANTITIES
R.nRawData              = zeros(nDataFiles,1);
R.nVars                 = zeros(nDataFiles,1);

% OBTAIN THE NUMBER OF DATA AND NUMBER OF VARIABLES IN ALL FILES
fid = fopen(inputfile);
for f=1:nDataFiles
    %LOAD DATA FILE
    dataFile = fgetl(fid); %TODO: NEEDS TO BE RESET
    fprintf(1, 'screening input files; loading file %s\n', dataFile); %%%%%%%%%%LOG FILE THIS
    try
        rawData = load(dataFile);
    catch
        fprintf(1, 'FATAL ERROR: file loading issue with file \n');  %%%%%%%%%%LOG FILE THIS
        return
    end
    if f == nDataFiles fclose(fid); end %close the input file when we reach the end
    [R.nRawData(f),R.nVars(f)]=size(rawData);
end
if max(R.nVars) ~= min(R.nVars) | R.nVars <= 0 %check to make sure nVars is consistent
    fprintf(1, 'FATAL ERROR: Inconsistent column dimensions in the input files \n'); %%%%%%%%%%LOG FILE THIS AS A FATAL ERROR
    return
else
    nVars=R.nVars(1);
end

% INITIALIZE PREPROCESSING QUANTITIES
R.nBinVect                = ones(nVars,1)*nBins;
R.nClassified             = NaN(nDataFiles,1);
R.binEdgesLocal           = NaN(nVars,nBins,nDataFiles);
R.minEdgeLocal            = NaN(nVars,nDataFiles);
R.maxEdgeLocal            = NaN(nVars,nDataFiles);
R.LocalVarAvg             = NaN(nVars,nDataFiles);
R.binEdgesGlobal          = NaN(nVars,nBins);
R.minEdgeGlobal           = NaN(nVars,1);
R.maxEdgeGlobal           = NaN(nVars,1);
R.GlobalVarAvg            = NaN(nVars,1);

% INITIALIZE OUTPUT QUANTITIES FROM THE ENTROPYFUNCTION
R.HXt                    = NaN(nVars,nVars,nLags+1,nDataFiles);
R.HYw                    = NaN(nVars,nVars,nLags+1,nDataFiles);
R.HYf                    = NaN(nVars,nVars,nLags+1,nDataFiles);
R.HXtYw                  = NaN(nVars,nVars,nLags+1,nDataFiles);
R.HXtYf                  = NaN(nVars,nVars,nLags+1,nDataFiles);
R.HYwYf                  = NaN(nVars,nVars,nLags+1,nDataFiles);
R.HXtYwYf                = NaN(nVars,nVars,nLags+1,nDataFiles);
R.SigThreshT             = NaN(nVars,nVars,nDataFiles);
R.SigThreshI             = NaN(nVars,nVars,nDataFiles);
R.meanShuffT             = NaN(nVars,nVars,nDataFiles);
R.sigmaShuffT            = NaN(nVars,nVars,nDataFiles);
R.meanShuffI             = NaN(nVars,nVars,nDataFiles);
R.sigmaShuffI            = NaN(nVars,nVars,nDataFiles);
R.nCounts                = NaN(nVars,nVars,nLags+1,nDataFiles);
R.I                      = NaN(nVars,nVars,nLags+1,nDataFiles);
R.T                      = NaN(nVars,nVars,nLags+1,nDataFiles);
R.Tplus                  = NaN(nVars,nLags+1,nDataFiles);
R.Tminus                 = NaN(nVars,nLags+1,nDataFiles);
R.Tnet                   = NaN(nVars,nLags+1,nDataFiles);
R.TnetBinary             = NaN(nVars,nVars,nLags+1,nDataFiles);
R.InormByDist            = NaN(nVars,nVars,nLags+1,nDataFiles);
R.TnormByDist            = NaN(nVars,nVars,nLags+1,nDataFiles);
R.SigThreshInormByDist   = NaN(nVars,nVars,nDataFiles);
R.SigThreshTnormByDist   = NaN(nVars,nVars,nDataFiles);
R.Ic                     = NaN(nVars,nVars,nLags+1,nDataFiles);
R.Tc                     = NaN(nVars,nVars,nLags+1,nDataFiles);
R.TvsIzero               = NaN(nVars,nVars,nLags+1,nDataFiles);
R.SigThreshTvsIzero      = NaN(nVars,nVars,nDataFiles);
R.Abinary                = NaN(nVars,nVars,nLags+1,nDataFiles);
R.Awtd                   = NaN(nVars,nVars,nLags+1,nDataFiles);
R.AwtdCut                = NaN(nVars,nVars,nLags+1,nDataFiles);
R.charLagFirstPeak       = NaN(nVars,nVars,nDataFiles);
R.TcharLagFirstPeak      = NaN(nVars,nVars,nDataFiles);
R.charLagMaxPeak         = NaN(nVars,nVars,nDataFiles);
R.TcharLagMaxPeak        = NaN(nVars,nVars,nDataFiles);
R.TvsIzerocharLagMaxPeak = NaN(nVars,nVars,nDataFiles);
R.nSigLags               = NaN(nVars,nVars,nDataFiles);
R.FirstSigLag            = NaN(nVars,nVars,nDataFiles);
R.LastSigLag             = NaN(nVars,nVars,nDataFiles);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% PREPROCESS BIN EDGES, BOTH LOCAL AND GLOBAL, AND DATA STATISTICS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
fid = fopen(inputfile);
for f=1:nDataFiles
    
    %LOAD DATA FILE
    dataFile = fgetl(fid); %TODO: NEEDS TO BE RESET
    fprintf(1, 'preprocessing input files; loading file %s\n', dataFile); %%%%%%%%%%LOG FILE THIS
    try
        rawData = load(dataFile);
    catch
        fprintf(1, 'ERROR: file loading issue in preprocessing loop, skipping file \n');  %%%%%%%%%%LOG FILE THIS
        continue
    end
    if f == nDataFiles fclose(fid); end %close the input file when we reach the end
        
    % PROCESS THE DATA FILE TO TRIM ROWS WHERE THERE IS MISSING DATA
    if trimTheData == 1
        [trimmedData,~]=trimNoData(rawData,NoDataCode);
    elseif trimTheData == 0
        trimmedData=rawData;
    end
    
    % PROCESS THE DATA FILE TO GET TRANSFORMED DATA
    if transformation == 1
        [transformedData]=removePeriodicMean(trimmedData,anomalyPeriodInData,anomalyMovingAveragePeriodNumber,NoDataCode);
    elseif transformation == 0
        transformedData=trimmedData;
    end
    
    % COMPUTE THE LOCAL BINS AND AVERAGES
    [R.binEdgesLocal(:,:,f),R.minEdgeLocal(:,f),R.maxEdgeLocal(:,f)]=GetUniformBinEdges(transformedData,R.nBinVect,NoDataCode);
    
    % COMPUTE THE VARIABLE AVERAGES FOR STORAGE AS METADATA
    for v=1:nVars
        R.LocalVarAvg(v,f)=mean(transformedData(v,:));
    end
       
end

% ESTABLISH THE GLOBAL STATISTICS AND BINS
for v=1:nVars
    R.GlobalVarAvg(v)=mean(R.LocalVarAvg(v,:));
    R.minEdgeGlobal(v)=min(R.minEdgeLocal(v,:));
    R.maxEdgeGlobal(v)=max(R.minEdgeLocal(v,:));
    for e=1:R.nBinVect(v)
        R.binEdgesGlobal(v,e)=R.minEdgeGlobal(v)+((R.maxEdgeGlobal(v)-R.minEdgeGlobal(v))*e/R.nBinVect(v));
    end
end
clear v;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% RUN CALCULATION LOOP
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% OPEN PARALLEL TOOLBOX POOL
if parallelWorkers>1 && matlabpool('size')==0 % proceed if we are parallelizing and if the pool is currently closed (size zero)
    localScheduler=findResource;
    nWorkers = min(parallelWorkers,localScheduler.ClusterSize);
    matlabpool('open', 'local', nWorkers); %open worker pool sized to the lesser of parallelWorkers or the Cluster Size
    fprintf(1, ['Matlab Pool open with ' , sprintf('%i',nWorkers), ' CPU cores employed \n']); %%%%%%%%%%LOG FILE THIS
end

% RE-OPEN INPUT FILE
fid= fopen(inputfile);

% MAIN CALCULATION LOOP
for f = 1:nDataFiles
    
    %TRACK RUN TIME
    tStart=tic;
    
    %LOAD DATA FILE
    dataFile = fgetl(fid); %TODO: NEEDS TO BE RESET
    fprintf(1, 'processing input files; loading file %s\n', dataFile); %%%%%%%%%%LOG FILE THIS
    try
        rawData = load(dataFile);
    catch
        fprintf(1, 'ERROR: file loading issue in processing loop, skipping file \n');  %%%%%%%%%%LOG FILE THIS
        continue
    end
    if f == nDataFiles fclose(fid); end %close the input file when we reach the end

    % PROCESS THE DATA FILE TO TRIM ROWS WHERE THERE IS MISSING DATA
    if trimTheData == 1
        [trimmedData,~]=trimNoData(rawData,NoDataCode);
    elseif trimTheData == 0
        trimmedData=rawData;
    end
    
    % PROCESS THE DATA FILE TO GET TRANSFORMED DATA
    if transformation == 1
        [transformedData]=removePeriodicMean(trimmedData,anomalyPeriodInData,anomalyMovingAveragePeriodNumber,NoDataCode);
    elseif transformation == 0
        transformedData=trimmedData;
    end

    %SET BIN EDGES FOR THIS RUN
    if binType==1
        binEdges=R.binEdgesGlobal;
    elseif binType==0
        binEdges=R.binEdgesLocal(:,:,f);
    end
     
    %CLASSIFY THE VALUES IN THE DATASET
    [classifiedData,R.nClassified(f)]=classifySignal(transformedData,binEdges,R.nBinVect,NoDataCode);

    % RUN THE PARALLEL AND COMPUTATIONALLY INTENSIVE ENTROPY FUNCTION
    if R.nClassified(f) <= 0; continue; end %proceed if there is data in the vector for this file
    [E] = entropyFunction(classifiedData,R.lagVect,R.nBinVect,NoDataCode,nTests,oneTailZ);
    timewrite(dataFile,pname,'Log.txt',([num2str(toc(tStart)/60,2)]),1); % Write time elapsed in log %%%%%%%%%%LOG FILE THIS
    
    %ONE BY ONE, ASSIGN VALUES OF R TO THE NECESSARY INDICES
    R.HXt(:,:,:,f)=E.HXt;
    R.HYw(:,:,:,f)=E.HYw;
    R.HYf(:,:,:,f)=E.HYf;
    R.HXtYw(:,:,:,f)=E.HXtYw;
    R.HXtYf(:,:,:,f)=E.HXtYf;
    R.HYwYf(:,:,:,f)=E.HYwYf;
    R.HXtYwYf(:,:,:,f)=E.HXtYwYf;
    R.SigThreshT(:,:,f)=E.SigThreshT;
    R.SigThreshI(:,:,f)=E.SigThreshI;
    R.meanShuffT(:,:,f)=E.meanShuffT;
    R.sigmaShuffT(:,:,f)=E.sigmaShuffT;
    R.meanShuffI(:,:,f)=E.meanShuffI;
    R.sigmaShuffI(:,:,f)=E.sigmaShuffI;
    R.nCounts(:,:,:,f)=E.nCounts;
%     R.nCountsShuff(:,:,f)=E.nCountsShuff;
    R.I(:,:,:,f)=E.I;
    R.T(:,:,:,f)=E.T;
    R.Tplus(:,:,f)=E.Tplus;
    R.Tminus(:,:,f)=E.Tminus;
    R.Tnet(:,:,f)=E.Tnet;
    R.TnetBinary(:,:,:,f)=E.TnetBinary;
    R.InormByDist(:,:,:,f)=E.InormByDist;
    R.TnormByDist(:,:,:,f)=E.TnormByDist;
    R.SigThreshInormByDist(:,:,f)=E.SigThreshInormByDist;
    R.SigThreshTnormByDist(:,:,f)=E.SigThreshTnormByDist;
    R.Ic(:,:,:,f)=E.Ic;
    R.Tc(:,:,:,f)=E.Tc;
    R.TvsIzero(:,:,:,f)=E.TvsIzero;
    R.SigThreshTvsIzero(:,:,f)=E.SigThreshTvsIzero;
    R.Abinary(:,:,:,f)=E.Abinary;
    R.Awtd(:,:,:,f)=E.Awtd;
    R.AwtdCut(:,:,:,f)=E.AwtdCut;
    R.charLagFirstPeak(:,:,f)=E.charLagFirstPeak;
    R.TcharLagFirstPeak(:,:,f)=E.TcharLagFirstPeak;
    R.charLagMaxPeak(:,:,f)=E.charLagMaxPeak;
    R.TcharLagMaxPeak(:,:,f)=E.TcharLagMaxPeak;
    R.TvsIzerocharLagMaxPeak(:,:,f)=E.TvsIzerocharLagMaxPeak;
    R.nSigLags(:,:,f)=E.nSigLags;
    R.FirstSigLag(:,:,f)=E.FirstSigLag;
    R.LastSigLag(:,:,f)=E.LastSigLag;
    R.IR(:,:,:,f)=E.RelEnt;
    R.TR(:,:,:,f)=E.RelT;

end

% CLOSE THE PARALLEL WORKER POOL IF ONE WAS OPEN
if(parallelWorkers>1) matlabpool close; end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% POSTPROCESS DERIVED INFORMATION THEORY AND PHYSICAL QUANTITIES
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
R.Hm=sum(squeeze(R.HXt(:,1,1,:)./log2(nBins)))./nVars;
R.TSTm=squeeze(sum(sum(R.T./log2(nBins),1),2))./(nVars^2);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% WRITE OUTPUT
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% outputFunction(R,pname,nDataFiles,workspaceOutput,textOutput); %THIS STILL ISN'T WORKING WITH MFILE AND TEXT OUTPUT, OR AT LEAST I BROKE IT....
if workspaceOutput==1
    file=[pname '\results_workspace'];
    save(file);
end

end

Contact us