Code covered by the BSD License  

Highlights from
TDMS Reader

TDMS Reader

by

 

13 Jan 2011 (Updated )

Read TDMS files v1 & v2 without DLL Current Version: 2.5

TDMS_preprocessFile(fid,tdmsFileName,params)
function metaStruct = TDMS_preprocessFile(fid,tdmsFileName,params)
%TDMS_preprocessFile  Populates props and gets segment information
%
%   NOTE: This file shouldn't be called directly by the user
%
%   metaStruct = TDMS_preprocessFile(fid,tdmsFileName,params)
%
%   INPUTS
%   =======================================================================
%   fid          : file id of opened tdms file, may be null if INDEX_DEBUG is true
%   tdmsFileName : full path to tdms file OR tdms index file for INDEX_DEBUG
%   params       : all optional inputs from TDMS_readTDMSFile
%
%   OUTPUTS
%   =======================================================================
%   metaStruct :
%       structure with fields:
%
%   The following 3 variables are all the same length, and have one index
%   for each unique object.  Each object may be one of the following types,
%   file root, group object, or channel object.  Only channel objects may
%   have data.
%
%   rawDataInfo           : (structure array)
%           NOTE: This holds the most recent info for an object, and can be
%           changed on a per segment basis, segInfo is used to hold
%           information that is needed per segement
%
%           .lengthOfIndex    - length of raw data index (not used)
%           .dataType         - enumerated dataType (Labview)
%           .dimensionData    - currently ignored (should be 1d)
%           .numberOfValues   - # of values to read each time
%           .totalSizeBytes   - # of bytes per values
%           .numberProperties - # of properties defined for the object
%           .infoSet          - a logical on whether or not this data has
%                               been set (for error checking)
%           .propNames        - a cell array for definining property names
%           .propValues       - cell array for holding property values
%
%   numberDataPoints     : an array which specifies the # of data points for
%                          each object (used for preallocation)
%   objectNameList       : (cell array of strings) names of all objects in the
%                          TDMS file
%
%   Info for each segment ...
%
%   segInfo              : (struct array)
%           .rawPos - position of raw data for that segment
%           .kTocNewObjList - flag on whether a new channel list was
%           created or not
%           .objOrder - array specifying indices of the objects to assign
%                       data to, order specifies read order, and values
%                       are indexed
%           into the objectNameList & rawDataInfo struct
%           .nRawObjects - length of objOrder
%           .nSamplesRead - array, # of samples to read for each object
%           .isInterleaved - logical, whether that segment uses interleaved
%                               data or not
%           .isBigEndian - logical
%           .nChunks     - # of times to read data using the parameters
%                           above before moving onto the next segment
%
%   See also: TDMS_processLeadIn, TDMS_getDataSize, TDMS_getPropValue


%PARAMETERS - passed in from TDMS_readTDMSFile
%==========================================================================
UTC_DIFF        = params.UTC_DIFF;
MAX_NUM_OBJECTS = params.MAX_NUM_OBJECTS;
MAX_NUM_PROPS   = params.MAX_NUM_PROPS;
N_SEGS_GUESS    = params.N_SEGS_GUESS;
N_SEGS_INC      = params.N_SEGS_INC;
DEBUG           = params.DEBUG;
DATE_STR_FORMAT = params.DATE_STR_FORMAT;
UNICODE_FORMAT  = params.STRING_ENCODING;
USE_INDEX       = params.USE_INDEX;
INIT_CHUNK_SIZE = params.INIT_CHUNK_SIZE;
TDMS_INDEX_EXT  = params.TDMS_INDEX_EXT;
MACHINE_FORMAT  = params.MACHINE_FORMAT;
STRING_ENCODING = params.STRING_ENCODING;
CURRENT_VERSION = params.CURRENT_VERSION;
INDEX_DEBUG     = params.INDEX_DEBUG;

%DONT CHANGE THESE
%==========================================================================
LEAD_IN_LENGTH = 28; %# of bytes of lead in

%FIGURING OUT WHICH FILE TO READ
%==========================================================================
[tdmsPathToFile,tdmsNameOnly] = fileparts(tdmsFileName);
if isempty(params.META_STRUCT)
    isIndexFID = false;
    if USE_INDEX || INDEX_DEBUG
        if INDEX_DEBUG
            indexFile = tdmsFileName;
        else
            indexFile = fullfile(tdmsPathToFile,[tdmsNameOnly TDMS_INDEX_EXT]);
        end
        if exist(indexFile,'file')
            fid = fopen(indexFile,'r',MACHINE_FORMAT,STRING_ENCODING);
            isIndexFID = true;
        else
            %Just use the tdms file, which we have already tested to exist
            if INDEX_DEBUG
                %NOTE: With INDEX_DEBUG we explicitly passed in the index
                %file to read and parse (generally for debugging purposes
                %so if it doesn't exist, then we have a problem
                error('Specified tdms_index file doesn not exist')
            end
        end
    end
else
    %Quits early if meta struct input is valid
    %======================================================================
    metaStruct = params.META_STRUCT;
    if ~isstruct(metaStruct) || ~isfield(metaStruct,'version') || ~isfield(metaStruct,'fileName')
        error('The META_STRUCT parameter passed should be a structure with fields "version" and "fileName"')
    end
    if metaStruct.version ~= CURRENT_VERSION
        error('Mismatch in version creation, version of struct %d, please run with latest version: %d')
    end
    if ~strcmpi(metaStruct.fileName,tdmsNameOnly)
        fprintf('Filename from metaStruct: %s\n',metaStruct.fileName);
        fprintf('Filename from input: %s\n',tdmsNameOnly);
        error('Mismatch in filenames, see printout above')
    end
    return
end

if isIndexFID
    lastLetter = double('h');  %used for .tdms_index files
else
    lastLetter = double('m');  %used for .tdms files
end


%INITIALIZE OUTPUTS
%==========================================================================
curNumTotalObjects  = 0; %Current # of objects
numberDataPoints    = zeros(1,MAX_NUM_OBJECTS);
objectNameList      = cell(1,MAX_NUM_OBJECTS);  %names of all objects

rawDataInfo = initRawInfoStruct(MAX_NUM_PROPS,MAX_NUM_OBJECTS);
segInfo     = initSegStruct(N_SEGS_GUESS);
%==========================================================================

%TEMP VARIABLES
%==========================================================================
objectHasRawData    = false(1,MAX_NUM_OBJECTS); %This is needed for adding
%# of data points together
ranOnce    = false;
nSegs      = 0;

%Get eof & return to start
fseek(fid,0,1);
eofPosition = ftell(fid);
fseek(fid,0,-1);

%START OF READING
%==========================================================================
curPosCounter = 0; %Used to keep track of where in the actual tdms file we 
%would be at various points, this is used for parsing verification as well
%as assisting with more complicated reads
while ftell(fid) ~= eofPosition
    
    nSegs = nSegs + 1;
    if nSegs > length(segInfo)
        segInfo = [segInfo initSegStruct(N_SEGS_INC)]; %#ok<AGROW>
    end
    
    if DEBUG
        disp('------  START OF DEBUG -------')
        fprintf(2,'CURRENT SEGMENT: %d\n',nSegs);
        fprintf(2,'Current file position: %d\n',curPosCounter);
    end
    
    %LEAD IN HANDLING
    %======================================================================
    [flags,info,eof_error]  = TDMS_processLeadIn(fid,lastLetter);
    
    if eof_error
        %This should only happen once at the end 
        fprintf(2,['WARNING: File was not closed properly.\n' ... 
            'Data will most likely be missing at the end of the file\n']);
        nSegs = nSegs - 1;
        break
    end
    
    curPosCounter = curPosCounter + LEAD_IN_LENGTH + info.segLength;
    segInfo(nSegs).rawPos = curPosCounter - info.segLength + info.metaLength;
    
    
    if ~ranOnce
        %NOTE: This might be false if no channels are defined ...
        if flags.kTocNewObjList == false
            error('the kTocNewObjList was false when first run, this is not expected')
        end
        ranOnce = true;
    end
    %======================================================================
    
    
    %======================================================================
    %                       META INFORMATION PROCESSING
    %======================================================================
    if flags.hasMetaData
        
        %Get # of changed objects
        numNewObjInSeg = fread(fid,1,'uint32');
        
        %Reinitialize order list if new
        if flags.kTocNewObjList
            curObjListCount      = 0;
            objOrder             = zeros(1,2*numNewObjInSeg);
            nValuesRead          = zeros(1,2*numNewObjInSeg);
            %NOTE: I padded this by doubling the #, we might append extra
            %channels in subsequent reads, I currently don't resize this
            %...
        end
        
        for iNewObject = 1:numNewObjInSeg
            
            %1) GET OBJECT PATH
            %-------------------------------
            objPathLength = fread(fid,1,'uint32');
            temp          = fread(fid,objPathLength,'*uint8');
            objName       = native2unicode(temp,UNICODE_FORMAT)'; %#ok<*N2UNI>
            
            %POPULATE ALL OBJECT LIST
            %----------------------------------------
            objIndex = find(strcmp(objectNameList(1:curNumTotalObjects),objName),1);
            if isempty(objIndex)
                curNumTotalObjects  = curNumTotalObjects + 1;
                if curNumTotalObjects > length(rawDataInfo)
                    rawDataInfo      = [rawDataInfo      ...
                        initRawInfoStruct(MAX_NUM_PROPS,MAX_NUM_OBJECTS)]; %#ok<AGROW>
                    numberDataPoints = [numberDataPoints zeros(1,MAX_NUM_OBJECTS)]; %#ok<AGROW>
                    objectNameList   = [objectNameList   cell(1,MAX_NUM_OBJECTS)]; %#ok<AGROW>
                    objectHasRawData = [objectHasRawData false(1,MAX_NUM_OBJECTS)];  %#ok<AGROW>
                end
                objIndex            = curNumTotalObjects;
                objectNameList{curNumTotalObjects} = objName;
            end
            
            
            
            %RAW DATA INDEX PROCESSING
            %---------------------------------------------
            curPos = ftell(fid);
            
            rawDataIndexLength = fread(fid,1,'uint32');
            
            rawDataInfo(objIndex).lengthOfIndex = rawDataIndexLength;
            
            if DEBUG
                fprintf(2,'RawDataLength: %d\n',rawDataIndexLength);
                fprintf(2,'CurrentPos: %d\n',curPos);
            end
            
            
            switch rawDataIndexLength
                case 0 %Same as previous
                    if rawDataInfo(objIndex).infoSet == false
                        error('Channel %s set to use previous rawDataIndex but this channel is new',objName)
                    end
                    
                    %NOTE: "same as previous segment" apparently means
                    %"same as the previous one with data ..."
                    
                    if rawDataInfo(objIndex).numberOfValues > 0 && ~objectHasRawData(objIndex)
                        objectHasRawData(objIndex) = true;
                    end
                    
                case 2^32-1 %no raw data
                    rawDataInfo(objIndex).infoSet    = true;
                    objectHasRawData(objIndex)       = false;
                otherwise
                    objectHasRawData(objIndex)    = true;
                    rawDataInfo(objIndex).infoSet = true;
                    
                    %DATA TYPE HANDLING
                    %------------------------------------------------------
                    dataType = fread(fid,1,'uint32');
                    if rawDataInfo(objIndex).infoSet && dataType ~= rawDataInfo(objIndex).dataType && numberDataPoints(objIndex) > 0
                        error('Raw data type for channel %s has changed from %d to %d',...
                            objName,rawDataInfo(objIndex).dataType,dataType)
                    else
                        rawDataInfo(objIndex).dataType = dataType;
                    end

                    %DATA SIZE HANDLING
                    %-----------------------------------------------------
                    rawDataInfo(objIndex).dimensionData  = fread(fid,1,'uint32');
                    if rawDataInfo(objIndex).dimensionData ~= 1
                        error('Code doesn''t yet handle non 1D data')
                    end
                    
                    rawDataInfo(objIndex).numberOfValues = fread(fid,1,'uint64');
                    
                    %RawDaqMX
                    %------------------------------------------------------
                    %NOTES:
                    if dataType == 2^32-1
                        %JIM CODE IN PROGRESS
                        rawDataInfo(objIndex).isRawDAQmx = true; %We can
% % %                         %post process this to convert from bits to an
% % %                         %actual value
% % %                         
% % %                         %FORMAT:
% % %                         %1) -
% % %                         daqMXVersion = rawDataIndexLength;
% % %                         if ~ismember(daqMXVersion,[4713 4714])
% % %                             error('Unexpected version: %d',daqMXVersion)
% % %                         end
% % % 
% % % 
% % %                         %2) Let's get the remaining # of bytes
% % %                         %This seems to be 32 ...
% % %                         
% % % % %                         wtf = fread(fid,8,'uint32');
% % % % %                         disp(wtf)
% % %                         %4713
% % %                         %13000012
% % %                         %15000018
% % %                         %? -> 8th indicates data type? OR -> bytes per sample
% % %                         
% % %                         %4714
% % %                         %1 0 0 0 256 256 512
% % % 
% % %                         %3) Update datetype - where the heck is it?
% % %                         %rawDataInfo(objIndex).dataType = dataType;
                    else %Non RawDAQmx type
                        if rawDataInfo(objIndex).dataType == 32
                            %If string, size is specified by an additional field
                            rawDataInfo(objIndex).totalSizeBytes = fread(fid,1,'uint64');
                        else
                            rawDataInfo(objIndex).totalSizeBytes = ...
                                rawDataInfo(objIndex).numberOfValues*TDMS_getDataSize(dataType);
                        end
                        
                        %Another chance to check correct reading
                        if curPos + rawDataIndexLength ~= ftell(fid)
                            error(['Raw Data Index length was incorrect: %d stated vs %d observed,' ...
                                'likely indicates bad code or a bad tdms file'],rawDataIndexLength,ftell(fid) - curPos)
                        end
                    end
                    
                    if DEBUG
                        fprintf(2,'nSegs: %d\n',nSegs);
                        fprintf(2,'objName: %s\n',objName);
                    end
            end
            
            
            %--------------------------------------------------------------
            %POPULATE ORDER TO RETRIEVE RAW DATA
            %--------------------------------------------------------------
            if objectHasRawData(objIndex)
                appendToList = false;
                if flags.kTocNewObjList
                    appendToList = true;
                else %Only append if not currently specified
                    I_objOrder = find(objOrder(1:curObjListCount) == objIndex,1);
                    if isempty(I_objOrder)
                        appendToList = true;
                    else
                        nValuesRead(I_objOrder) = rawDataInfo(objIndex).numberOfValues;
                    end
                end
                
                %NOTE: No overflow code in place yet, however we do
                %initialize with twice the # of objects specified to be 
                %in a segement when a new list is created, new segments
                %might add more objects to the list
                if appendToList
                    curObjListCount                 = curObjListCount + 1;
                    objOrder(curObjListCount)       = objIndex;
                    nValuesRead(curObjListCount)    = rawDataInfo(objIndex).numberOfValues;
                end
            end
            
            
            %PROPERTY HANDLING
            %--------------------------------------------------------------
            numberProperties = fread(fid,1,'uint32');
            %Below is the # of props already assigned to that channel
            nPropsChan       = rawDataInfo(objIndex).numberProperties;
            curProps         = rawDataInfo(objIndex).propNames;
            
            for iProp = 1:numberProperties
                propNameLength  = fread(fid,1,'uint32');
                temp            = fread(fid,propNameLength,'*uint8');
                propName        = native2unicode(temp,UNICODE_FORMAT)';
                propDataType    = fread(fid,1,'uint32');
                
                propIndex = find(strcmp(curProps(1:nPropsChan),propName),1);
                if isempty(propIndex)
                    %Updates needed for new properties
                    nPropsChan              = nPropsChan + 1;
                    propIndex               = nPropsChan;
                    curProps{propIndex}     = propName;
                    rawDataInfo(objIndex).propNames{propIndex} = propName;
                end
               
                %Update value
                propValue = TDMS_getPropValue(fid,propDataType,UTC_DIFF,DATE_STR_FORMAT);
                rawDataInfo(objIndex).propValues{propIndex} = propValue;
            end
            rawDataInfo(objIndex).numberProperties = nPropsChan;
            
            if DEBUG
                fprintf(2,'end of index position: %d\n',ftell(fid));
            end
        end
    end
    %======================================================================
    %               END OF META DATA PROCESSING
    
    
    
    %RAW DATA SAMPLE COUNTING
    %======================================================================
    
    segInfo(nSegs).kTocNewObjList = flags.kTocNewObjList;
    byteSizeRaw = info.segLength - info.metaLength;
    if ~flags.hasRawData || byteSizeRaw == 0
        segInfo(nSegs).nChunks = 0;
    else
        segInfo(nSegs).objOrder      = objOrder(1:curObjListCount);
        segInfo(nSegs).nRawObjects   = curObjListCount;
        segInfo(nSegs).nSamplesRead  = nValuesRead(1:curObjListCount);
        segInfo(nSegs).isInterleaved = flags.isInterleaved;
        segInfo(nSegs).isBigEndian   = flags.isBigEndian;
        
        %# OF CHUNK PROCESSING
        %------------------------------------------------------------------
        totalBytesPerChunk = sum([rawDataInfo(objOrder(1:curObjListCount)).totalSizeBytes]);
        nChunks            = byteSizeRaw/totalBytesPerChunk;
        
        %Some error checking
        %------------------------------------------
        if DEBUG
            fprintf(2,'nChunks: %d\n',nChunks);
            fprintf(2,'nSamplesRead: %s\n',mat2str(nValuesRead(1:curObjListCount)));
            fprintf(2,'totalBytesPerChunk: %d\n',totalBytesPerChunk);
            fprintf(2,'byteSizeRaw: %d\n',byteSizeRaw);
        end
        
        if nChunks ~= floor(nChunks)
            error(['The remaining data doesn''t split evently into' ...
                ' chunks, estimated # of chunks: %d'],nChunks)
        end
        
        chunkByteOffset = 0;
        %Increment the number of data points
        for iObject = 1:curObjListCount
            
            curIndex = objOrder(iObject);
            
            nSamplesReadCurObject = rawDataInfo(curIndex).numberOfValues;
            
            %Apparently some writers don't stripe the objects when some
            %objects don't have any data and some do ...
            if nSamplesReadCurObject > 0            
                %This allows us to grow these values if we haven't sufficiently preallocated
                if rawDataInfo(curIndex).chunkIndex + nChunks > rawDataInfo(curIndex).chunkLength
                    rawDataInfo(curIndex).dataMatrix  = [rawDataInfo(curIndex).dataMatrix; zeros(INIT_CHUNK_SIZE,3)];
                    rawDataInfo(curIndex).chunkLength = rawDataInfo(curIndex).chunkLength + INIT_CHUNK_SIZE;
                end

                %DataMatrix:
                %==============================================================
                %This information is used for reading parts of an object during
                %a single read instead of the entire object

                %Column 1, file position
                indices = (rawDataInfo(curIndex).chunkIndex+1):(rawDataInfo(curIndex).chunkIndex+nChunks);
                rawDataInfo(curIndex).dataMatrix(indices,1) = ...
                    segInfo(nSegs).rawPos + chunkByteOffset + (0:totalBytesPerChunk:(nChunks-1)*totalBytesPerChunk);

                %Column 2, first sample at that position
                rawDataInfo(curIndex).dataMatrix(indices,2) = ...
                    numberDataPoints(curIndex) + (0:nSamplesReadCurObject:(nChunks-1)*nSamplesReadCurObject) + 1;

                %Column 3, segment number
                rawDataInfo(curIndex).dataMatrix(indices,3) = nSegs;

                chunkByteOffset                  = chunkByteOffset + rawDataInfo(curIndex).totalSizeBytes;
                numberDataPoints(curIndex)       = numberDataPoints(curIndex) + nSamplesReadCurObject*nChunks;
                rawDataInfo(curIndex).chunkIndex = rawDataInfo(curIndex).chunkIndex+nChunks;
            end
        end
        
        %nChunksAll = nChunksAll + nChunks;
        segInfo(nSegs).nChunks = nChunks;
        
        %This needs to be handled in some manner for RawDaqMx
        if ~isIndexFID
            fseek(fid,byteSizeRaw,'cof');
        end
    end
    
    %Addition for raw data (needs to be fixed)
    %In general, the tdms_index requires skipping some meta data
    %I think this is only needed for raw data (I need to flush this out
    %better as it will cause an error when reading index files for regular
    %tdms files)
%     if isIndexFID
%         fseek(fid,segInfo(nSegs).rawPos,'bof');
%     end
end





%Trim output:
%==========================================================================
rawDataInfo         = rawDataInfo(1:curNumTotalObjects);
for iObject = 1:length(rawDataInfo)
    nProps = rawDataInfo(iObject).numberProperties;
    rawDataInfo(iObject).propNames  = rawDataInfo(iObject).propNames(1:nProps);
    rawDataInfo(iObject).propValues = rawDataInfo(iObject).propValues(1:nProps);
    
    %Add extra value for later processing
    if rawDataInfo(iObject).chunkIndex ~= 0
        rawDataInfo(iObject).chunkIndex = rawDataInfo(iObject).chunkIndex + 1;
        rawDataInfo(iObject).dataMatrix(rawDataInfo(iObject).chunkIndex,2) = numberDataPoints(iObject) + 1;
    end
end

numberDataPoints    = numberDataPoints(1:curNumTotalObjects);
objectNameList     = objectNameList(1:curNumTotalObjects);
segInfo             = segInfo(1:nSegs);

metaStruct = struct(...
    'eof_error',        eof_error,...
    'numberDataPoints', numberDataPoints,...
    'objectNameList',   {objectNameList},...
    'segInfo',          segInfo,...
    'rawDataInfo',      rawDataInfo,...
    'fileName',         tdmsNameOnly,...
    'version',          CURRENT_VERSION);

if isIndexFID
    %If using the index file to parse meta, close it
    fclose(fid);
end


end

function segStruct = initSegStruct(nSegs)
%initSegStruct
%
%   segStruct = initSegStruct(nSegs)
%
segStruct = struct(...
    'rawPos',repmat({0},[1 nSegs]),...
    'kTocNewObjList',0,...
    'objOrder',[],...
    'nRawObjects',0,...
    'nSamplesRead',[],...
    'isInterleaved',false,...
    'isBigEndian',false,...
    'nChunks',false);

end

function rawInfoStruct = initRawInfoStruct(MAX_NUM_PROPS,MAX_NUM_OBJECTS)

rawInfoStruct = struct( ...
    'isRawDAQmx',       false, ...
    'lengthOfIndex',    0,...
    'dataType',         0,...
    'dimensionData',    0,...
    'numberOfValues',   0,...
    'totalSizeBytes',   0,... %Only valid for strings
    'numberProperties', 0,...
    'chunkIndex',       0,...
    ... %Could initialize to zero, this would save space
    ... %with non-raw data objects
    'chunkLength',      0,...
    ... %SEE ALSO: chunk loop above for resizing
    'dataMatrix',       zeros(0,3),...
    ...%Position #
    ...%1st Sample #
    ...%Seg #
    'infoSet',          false, ...
    'propNames',        repmat({cell(1,MAX_NUM_PROPS)},1,MAX_NUM_OBJECTS),...
    'propValues',       repmat({cell(1,MAX_NUM_PROPS)},1,MAX_NUM_OBJECTS));
end

Contact us