Code covered by the BSD License  

Highlights from
ISO 8601 Date String to Serial Date Number

image thumbnail
from ISO 8601 Date String to Serial Date Number by Stephen Cobeldick
Convert an ISO 8601 Date String to a Serial Date Number. Auto-detect or select timestamp style.

datenum8601(str,tok)
function [dtn,spl,tok] = datenum8601(str,tok)
% Convert an ISO 8601 formatted Date String (timestamp) to a Serial Date Number.
%
% (c) 2013 Stephen Cobeldick
%
% ### Function ###
%
% Syntax:
%  DateNum = datenum8601(String)
%  DateNum = datenum8601(String,Token)
%  [DateNum,Split,Token] = datenum8601(...)
%
% By default the function automatically detects any ISO 8601 timestamp
% within the string, or an optional token can be used to restrict the
% timestamp style recognition. The ISO 8601 timestamp style options are:
%
% - Date in ordinal, calendar or week-numbering notation.
% - Basic or Extended format (without/with unit separation characters).
% - Any date-time separator character (with a few exceptions).
% - Full or lower precision (fewer trailing date/time units).
% - Decimal fraction of the trailing unit (decimal places).
%
% These style options are explained in the tables below (see "Timestamps").
%
% The function returns a Serial Date Number, the input string parts
% that are split by the detected timestamp (i.e. before & after the
% timestamp) and the string token showing the detected timestamp style.
%
% Note 1: Undefined month/date/week/day input string values default to one.
% Note 2: Undefined hour/minute/second input string values default to zero.
% Note 3: Auto-detection mode also accepts a mix of basic/extended formats.
% Note 4: Calls undocumented MATLAB functions "datenummx" & "ismembc".
%
% See also DATESTR8601 CLOCK NOW DATENUM DATEVEC DATESTR DATEROUND
%
% ### Examples ###
%
% Examples use the date+time described by the vector [1999,1,3,15,6,48.0568].
%
% datenum8601(datestr8601([1999,1,3,15,6,48.0568],'ymdHMS4'))
%  ans = 730123.62972287962
%
% datenum8601('1999-01-03 15:06:48.0568')
%  ans = 730123.62972287962
%
% datenum8601('1999003T150648.0568')
%  ans = 730123.62972287962
%
% [dtn,spl,tok] = datenum8601('AA1998W537_150648.0568ZZ')
%  dtn = 730123.62972287962
%  spl = {'AA','ZZ'}
%  tok = 'YWD_HMS4'
%
% [dtn,spl,tok] = datenum8601('1999-003T15')
%  dtn = 730123.6250
%  spl = {'',''}
%  tok = '*ynH'
%
% [dtn,spl,tok] = datenum8601('1999-01-03T15','*ymd')
%  dtn = 730123.0000
%  spl = {'','T15'}
%  tok = '*ymd'
%
% ### ISO 8601 Timestamps ###
%
% Input   | Basic Format             | Extended Format (token prefix '*')
% Date    | In/Out | Input Timestamp | In/Out  | Input Timestamp
% Notation| Token: | Example:        | Token:  | Example:
% --------|--------|-----------------|---------|---------------------------
% Ordinal |'ynHMS' |'1999003T150648' |'*ynHMS' |'1999-003T15:06:48'
% --------|--------|-----------------|---------|---------------------------
% Calendar|'ymdHMS'|'19990103T150648'|'*ymdHMS'|'1999-01-03T15:06:48'
% --------|--------|-----------------|---------|---------------------------
% Week    |'YWDHMS'|'1998W537T150648'|'*YWDHMS'|'1998-W53-7T15:06:48'
% --------|--------|-----------------|---------|---------------------------
%
% Timestamp may omit trailing units (reduced precision), eg:     | Output->Vector:
% --------|--------|-----------------|---------|-----------------|---------------------
%         |'Y'     |'1999W'          |'*Y'     |'1999-W'         |[1999,1,4,0,0,0]
% --------|--------|-----------------|---------|-----------------|---------------------
%         |'ymdH'  |'19990103T15'    |'*ymdH'  |'1999-01-03T15'  |[1999,1,3,15,0,0]
% --------|--------|-----------------|---------|-----------------|---------------------
% Date-time separator character can be specified (default='T'), eg:
% --------|--------|-----------------|---------|-----------------|---------------------
%         |'yn_HM' |'1999003_1506'   |'*yn_HM' |'1999-003_15:06' |[1999,1,3,15,6,0]
% --------|--------|-----------------|---------|-----------------|---------------------
%         |'YWD@H' |'1998W537@15'    |'*YWD@H' |'1998-W53-7@15'  |[1999,1,3,15,0,0]
% --------|--------|-----------------|---------|-----------------|---------------------
% Trailing date/time value can have decimal digits (fraction), eg:
% --------|--------|-----------------|---------|-----------------|---------------------
%         |'ynH3'  |'1999003T15.113' |'*ynH3'  |'1999-003T15.113'|[1999,1,3,15,6,46.80]
% --------|--------|-----------------|---------|-----------------|---------------------
%         |'YWD4'  |'1998W537.6297'  |'*YWD4'  |'1998-W53-7.6297'|[1999,1,3,15,6,46.08]
% --------|--------|-----------------|---------|-----------------|---------------------
%         |'y10'   |'1999.0072047202'|'*y10'   |'1999.0072047202'|[1999,1,3,15,6,48.06]
% --------|--------|-----------------|---------|-----------------|---------------------
%
% Note 5: Function does not check for ISO 8601 compliance: user beware!
% Note 6: Date-time separator must not be any of [+-./0123456789:DFHMPRSWYZdmny].
%
% ### Inputs & Outputs ###
%
% Inputs:
%  String = String, possibly containing an ISO 8601 timestamp (date & time).
%  Token  = String token to select the date notation & format (default=any).
%
% Outputs:
%  DateNum = Numeric Scalar, the input ISO 8601 timestamp as a Serial Date Number.
%  Split   = CellOfStrings, the strings before and after the detected timestamp.
%  Token   = String Token (see tables), the detected date notation & format.
%
% Inputs  = (String,Token*)
% Outputs = [DateNum,Split,Token]

% Define "regexp" match string:
if nargin>1 % User requests a specific timestamp.
    [mtc,typ] = d8601Usr(tok);
else % Automagically detect timestamp, with any date-time separator.
    typ = 0;
    mtc = [...
        '(\d{4})',... % year
        '((-(?=(\d{2,3}|W)))?)',... % -
        '(W?)',...    % W
        '(?(3)(\d{2})?|(\d{2}$|\d{2}(?=(\D|\d{2})))?)',... % week/month
        '(?(4)(-(?=(?(3)\d|\d{2})))?)',... % -
        '(?(4)(?(3)\d|\d{2})?|(\d{3})?)',... % day of week/month/year
        '(?(6)([^\+\-\./0123456789:DFHMPRSWYZdmny](?=\d{2}))?)',... % separator
        '(?(7)(\d{2})?)',...  % hour
        '(?(8)(:(?=\d{2}))?)',...  % :
        '(?(8)(\d{2})?)',...  % minute
        '(?(10)(:(?=\d{2}))?)',... % :
        '(?(10)(\d{2})?)',... % second
        '((\.\d+)?)'];   % decimal fraction
    % (allows any combination of basic/extended formats)
end
%
assert(ischar(str)&&size(str,1)<2,'Input "str" must be a string')
%
% Extract timestamp tokens, return split strings:
[tkn,spl] = regexp(str,mtc,'tokens','split','once');
%
% Timestamp not found in str:
if isempty(tkn)
    tok = '';
    dtn = [];
    return
end
%
% Lengths of matched tokens:
len = cellfun('length',tkn);
%
% Preallocate Date Vector:
dtv = [0,1,1,0,0,0];
% Convert date & time values to numeric:
idx = [1,4,6,8,10,12];
for m = find(len(idx));
    dtv(m) = sscanf(tkn{idx(m)},'%f');
end
%
% Create token of 8601 timestamp (see also "datestr8601"):
if typ==0
    typ = 2-len(3)+(len(6)==3); % (must define before fraction & week parsing)
    Ext = char(42*any(len([2,5,9,11])==1));
    Sep = tkn{7};
    if strcmp('T',Sep) % Default date-time separator
        tkc = {'YWDHMS','ymdHMS','y*nHMS'};
        tok = [Ext,tkc{typ}(0<len([1,4,6,8,10,12]))];
    else % Custom date-time separator
        tkc = {['YWD',Sep,'HMS'],['ymd',Sep,'HMS'],['y*n',Sep,'HMS']};
        tok = [Ext,tkc{typ}(0<len([1,4,6,7,8,10,12]))];
    end
    Dgt = sprintf('%.0f',len(13)-1);
else
    Dgt = '';
end
%
% Convert decimal fraction value:
if 1<len(13)
    if typ==2&&m==2 % Month (special case not converted by "datenummx"):
        dtv(3) = 1+sscanf(tkn{13},'%f')*(datenummx(dtv+[0,1,0,0,0,0])-datenummx(dtv));
    else % All other date or time values (are converted by "datenummx"):
        dtv(m) = dtv(m)+sscanf(tkn{13},'%f');
    end
    tok = [tok,Dgt];
end
%
if typ==1 % Week-numbering vector to ordinal vector:
    dtv(3) = dtv(3)+7*dtv(2)-4-mod(datenummx([dtv(1),1,1]),7);
    dtv(2) = 1;
end
%
% Convert out-of-range-date-vector to Serial Date Number: Do not remove this!
dtn = datenummx(dtv);
%
% Month zero (special case not converted by "datenummx"):
if 0==dtv(2)
    dtn = dtn-31; % Faster.
    %dtn = addtodate(dtn,-1,'month'); % Adds rounding error.
end
%
end
%--------------------------------------------------------------------------
function [mtc,typ] = d8601Usr(tok)
% Identify 8601 timestamp notation using user input token.
%
assert(ischar(tok)&&isrow(tok),'Input "tok" must be a string')
%
% Check if extended or basic, check if decimal fraction:
Ext = strncmp('*',tok,1);
DcP = find(~isstrprop(tok,'digit'),1,'last');
Dgt = tok(DcP+1:end);
tok = tok(1+Ext:DcP);
% Identify date-time separator and start of timestamp:
IsT = ismembc(tok,'+-./0123456789:DFHMPRSWYZdmny'); % (presorted)
tkl = sum(IsT);
typ = find([strncmp(tok(IsT),{'YWDHMS','ymdHMS','ynHMS'},tkl),true],1,'first');
switch sum(~IsT)
    case 0 % Standard 'T' separator.
        Sep = '(T)';
    case 1 % User supplied separator.
        nxt = strcmp('H',tok([false,~IsT(1:end-1)]));
        assert(nxt,'Input token date-time separator position incorrect.')
        Sep = ['(',tok(~IsT),')'];
    otherwise
        error('Input token is not recognized: too many separator chars.')
end
%
% Year and time tokens:
mtc([1,7,8,10,12,13]) = {'(\d{4})',Sep,'(\d{2})',... % year, separator, hour
    '(\d{2})','(\d{2})',['(\.\d{',Dgt,'})']}; % minute, second, decimal fraction
% Format tokens (extended/basic):
if Ext
    mtc([2,5,9,11]) = {'(-)','(-)','(:)','(:)'};
else
    mtc([2,5,9,11]) = {'()','()','()','()'};
end
%
% Date tokens:
switch typ
    case 1 % Week
        idz = [2,5,7,9,11,13,14];
        mtc([3,4,6]) = {'(W)','(\d{2})','(\d{1})'};
    case 2 % Calendar
        idz = [2,5,7,9,11,13,14];
        mtc([3,4,6]) = {'()', '(\d{2})','(\d{2})'};
    case 3 % Ordinal
        idz = [2,7,9,11,13,14];
        mtc([3,4,5,6]) = {'()','()','()','(\d{3})'};
    otherwise
        error('Input token is not recognized.')
end
%
% Concatenate tokens into "regexp" match token:
mtc(idz(tkl):12+isempty(Dgt)) = {'()'};
mtc = [mtc{:}];
%
end
%----------------------------------------------------------------------End!

Contact us