function [result, ext] = matml(varargin)
% [result, ext] = matml('file', file, ...)
%
% Read a MatML file to a matlab variable, and optionally
% return file information.
% ________________________________
%
% [result, ext] = matml('text', text, ...)
%
% Read a MatML text to a matlab variable.
% ________________________________
%
% [result, ext] = matml('data', data, ...)
%
% Convert a variable into MatML text.
% ________________________________
%
% [result, ext] = matml('data', data, 'file', file, ...)
%
% Convert a variable into MatML and write it to a file.
% ________________________________
%
% Any form can take one additional named argument, 'ext',
% which should be a structure with any of the fields listed
% below. The ext object is always returned, possibly
% modified, at the output.
%
% 'precision'
% output precision (significant figures)
%
% 'root'
% tag name for root element ("MatMLDocument" by
% default)
%
% 'binaryfile'
% if present, the document can be written in
% unencapsulated form (with links out to additional binary
% files). binary files, if generated, will be named as
% <binaryfile>.####. these are quicker to read and write.
%
% 'binaryindex'
% if present, is the first index used in constructing
% binary file filenames. if absent, numbering starts at
% one.
%
% 'attr',
% structure of additional attributes to add to the root
% node.
% ________________________________
%
% test = matml
%
% Return a suitably convoluted variable for testing.
version = 7;
% Author: Ben Mitch
% URL: http://tinyurl.com/6m6qjy
% Modified: 20/12/2010
%
% Version: 7
%
% * removed GPL licensed component, so that BSD license now
% applies to all source code in this release
%
% Version: 6
%
% * fixed bug in safe/unsafe implementation
%
% Version: 5
%
% * added root node attributes
% * added encapsulation
%
% Version: 4
%
% * updated usage options to allow read/write from a text
% string rather than a file (breaks old interface)
%
% Version: 3
%
% * corrected help formatting
% * fixed incorrect Version in write
% * changed URL to tinyurl
% * specified encoding in xml header
%
% Version: 2
%
% * reviewed m-lint warnings and fixed some
% * correctly handle trailing singletons in all cases
% * improved code for reconstructing structures
% * changed parser to handle comments correctly
% * deprecated missing sz [0 0] attribute (too weird)
% * added basic XML header
% * added "written by" comment
%
% Version: 1
% usage
if nargout == 0 && nargin == 0
% invoke help
help matml
elseif nargout == 1 && nargin == 0
% return a test variable
result = [];
result.a = {1 2 3 'the quick brown fox'};
result.b = {result result [] {} 'jumped over'};
result.aLongerName = result;
result.with_us = result;
result.nonscalar = [];
result.nonscalar(1,2,3).f1 = result.b;
result.simpletext = 'the lazy dog';
result.cpx = 1 + 9i;
result.matrix = rand(1,2,3,4);
result.nonscalarcell = {1 2 3; 4 'foo' {'bar'}};
else
% default result is empty
result = [];
% otherwise, expect key/value pairs
if mod(nargin, 2)
error('expects an even number of arguments')
end
% defaults
file = [];
data = [];
text = [];
% extended object
ext = [];
ext.precision = 60;
ext.root = 'MatMLDocument';
ext.binaryfile = [];
ext.binaryindex = 1;
ext.attr = struct();
% parse key/value pairs
for n = 1:2:nargin
key = varargin{n};
val = varargin{n+1};
switch key
case 'file'
file = val;
case 'data'
data = val;
case 'text'
text = val;
case 'ext'
ext = val;
otherwise
error(['unrecognised key "' key '"'])
end
end
% was data supplied?
if ~isempty(data)
% attributes
attr = '';
if ~isempty(ext.attr)
f = fieldnames(ext.attr);
for n = 1:length(f)
key = f{n};
if length(key) < 2
error('additional root attributes must have keys of at least two characters (one character keys are reserved for MatML)');
end
val = ext.attr.(key);
if isnumeric(val)
val = num2str(val);
end
attr = [attr ' ' key '="' val '"'];
end
end
attr = [attr ' Version="' int2str(version) '"'];
attr = [attr ' Created="' datestr(now) '"'];
if ext.precision ~= 60
attr = [attr ' Precision="' num2str(ext.precision) '"'];
end
% root tag
[xml, ext] = matml_write(ext.root, data, '', attr, ext);
% comment
comment = [ ...
'<!-- ' ...
'written by matml.m (http://tinyurl.com/6m6qjy)' ...
' -->' ...
];
% write file
if ~isempty(file)
fid = fopen(file, 'w');
if fid == -1
error(['failed open "' file '"']);
end
% in fact, we write only US-ASCII characters (in the range
% 32-127), but this is a subset of UTF-8, and i'm not 100%
% how to indicate US-ASCII encoding, so...
fwrite(fid, '<?xml version="1.0" encoding="UTF-8"?>');
fwrite(fid, comment);
fwrite(fid, xml);
fclose(fid);
else
% return result
result = xml;
end
else
% read from file
if ~isempty(file)
text = matml_file2array(file);
if isempty(text)
error('specified XML file not valid (no content)');
end
end
% read from text
if ~isempty(text)
xml = matml_xmlread(text);
result = matml_matmlparse(xml);
end
% info
if ~isempty(file)
d = dir(file);
w = whos('result');
info.version = str2double(xml.attr.Version);
info.created = datenum(xml.attr.Created);
info.bytes.matlab = w.bytes;
info.bytes.matml = d.bytes;
end
end
end
function [xml, writer] = matml_write(tagName, arr, path, attr, writer)
sz = size(arr);
% most MatML fields are standardised arrays, but we allow a
% special case for simple strings for readability and brevity,
% that is just a tag with no attributes and the string as content.
if ischar(arr) && length(sz) == 2 && sz(1) == 1 && all(arr >= 32) && all(arr <= 127)
xml = ['<' tagName attr '>' matml_xmlsafe(arr) '</' tagName '>'];
return
end
% calculate more stuff
numels = prod(sz);
isscalar = numels == 1;
while length(sz) > 1 && sz(end) == 1
sz = sz(1:end-1);
end
% in MatML, scalar can have missing sz attribute
if isscalar
sz = '';
else
sz = [' b="' matml_numtostr(sz, []) '"'];
end
% given the note above and the note below, it's worth summarising
% the protocol for identifying the type of a matml node. the matml
% attribute can only be left out in two cases, a non-empty string
% or a structure element, therefore on encountering a node we can
% assess its type as follows:
%
% 1) if explicitly specified using c="type"
% 2) otherwise, if it has text, c="char" and b="1 length(text)"
% 3) otherwise, c="struct" (a struct element can never have text)
cls = class(arr);
switch cls
case 'cell'
xml = ['<' tagName attr ' c="y"' sz '>'];
for n = 1:numels
[subxml, writer] = matml_write('m', arr{n}, [tagName '/'], '', writer);
xml = [xml subxml];
end
xml = [xml '</' tagName '>'];
case 'struct'
fn = fieldnames(arr);
fns = ' a="';
for f = 1:length(fn)
fns = [fns fn{f} ';'];
end
fns = [fns '"'];
xml = ['<' tagName attr ' c="z"' fns sz '>'];
for n = 1:numels
for f = 1:length(fn)
[subxml, writer] = matml_write('m', arr(n).(fn{f}), [tagName '/'], '', writer);
xml = [xml subxml];
end
end
xml = [xml '</' tagName '>'];
case {'char' 'single' 'double' 'int8' 'int16' 'int32' 'int64' 'uint8' 'uint16' 'uint32' 'uint64' 'logical'}
switch cls
case 'single', cls = 'f';
case 'double', cls = 'd';
case 'uint64', cls = 'v';
case 'uint32', cls = 'u';
case 'uint16', cls = 't';
case 'uint8', cls = 's';
case 'int64', cls = 'p';
case 'int32', cls = 'o';
case 'int16', cls = 'n';
case 'int8', cls = 'm';
case 'logical', cls = 'l';
case 'char', cls = 'c';
otherwise
error(['uncoded MatML class "' cls '"']);
end
% in MatML, no cpx attribute means it's real
if ~isreal(arr)
cls = [cls 'x'];
end
if prod(size(arr)) <= 1000 || isempty(writer.binaryfile)
xml = ['<' tagName attr ' c="' cls '"' sz '>'];
xml = [xml matml_numtostr(arr, writer.precision)];
xml = [xml '</' tagName '>'];
else
% use binary files only if array is very large
filename = [writer.binaryfile '.' sprintf('%04i', writer.binaryindex)];
writer.binaryindex = writer.binaryindex + 1;
d = size(arr);
while length(d) && d(end) == 1
d = d(1:end-1);
end
matml_numtofile(filename, arr, length(d));
filename = strrep(filename, filesep, '/');
xml = ['<' tagName attr ' c="' cls '"' sz ' s="b">'];
xml = [xml matml_xmlsafe(filename)];
xml = [xml '</' tagName '>'];
end
case 'function_handle'
xml = ['<' tagName attr ' c="e">'];
xml = [xml func2str(arr)];
xml = [xml '</' tagName '>'];
otherwise
error(['MatML cannot handle class "' class(arr) '" in node (' path tagName ')'])
end
function s = matml_numtostr(n, precision)
if isempty(n)
s = '';
return
end
if isreal(n)
if isfloat(n)
if isempty(precision)
s = sprintf('%.20g ', n);
else
s = sprintf(['%.' int2str(precision) 'g '], n);
end
else
% integers are written at maximum precision
s = sprintf('%d ', n);
end
s = s(1:end-1);
else
s = [matml_numtostr(real(n), precision) ' ' matml_numtostr(imag(n), precision)];
end
function matml_numtofile(filename, data, ndims)
%
% writeBinaryNumeric(filename, data, ndims)
%
% save the passed data into a binary file named
% as specified. the data must be numeric, but
% may be multi-dimensional and of any numeric type.
% the data is stored as it is stored in a mat
% file, with the exception that real and complex
% data are interleaved with granularity of the last
% dimension. for instance, a 3x2xN complex uint32
% matrix will be stored as
%
% real(1,1,1)
% real(2,1,1)
% ...
% real(3,2,1)
% imag(1,1,1)
% imag(2,1,1)
% ...
% imag(3,2,1)
% real(1,1,2)
% ...
%
% where all entries are a 4-byte uint32 element.
%
% by default, trailing scalar dimensions are
% removed. to change this behaviour, set ndims to
% the number of dimensions. this file is then in
% the right format to be read by std/source/numeric.
%
fid = fopen(filename, 'w');
if fid == -1
error(['could not open "' filename '"'])
end
sz = size(data);
while length(sz)>1 & sz(end) == 1
sz = sz(1:end-1);
end
if nargin >= 3
while length(sz)<ndims
sz = [sz 1];
end
end
N = sz(end);
C = prod(sz(1:end-1));
if isreal(data)
% we can just output the whole lot as it comes
fwrite(fid, data, class(data));
else
r = real(data);
i = imag(data);
cls = class(r);
for n = 1:N
ed = C * n;
st = ed - C + 1;
fwrite(fid, r(st:ed), cls);
fwrite(fid, i(st:ed), cls);
end
end
fclose(fid);
%% taken from systemml 'sml_xml'
%
% WARNING: THIS IS NOT A USER FUNCTION - ITS INTERFACE OR
% OPERATION MAY CHANGE IN FUTURE RELEASES.
%
% xml = private_sml_xml(filename, xml)
% read or write a generic XML structure into or from the
% specified file. the XML tree's format is that every node is:
%
% xml.name = '<tag-name>'
% xml.attr = struct('key',val,...) [optional]
% xml.value = <any-matlab-variable> [optional]
% xml.children = {<child-node-1>,...} [optional]
%
% where exactly one of "value" and "children" must be
% present in all nodes.
function xml = matml_xmlread(text)
% peel off first tag as special case
if strcmp(text(1:5), '<?xml')
f = strfind(text, '?>');
if isempty(f)
error('malformed XML declaration');
end
text = text(f(1)+2:end);
end
xml = matml_xmlparts(text);
xml = matml_xmlparse(xml);
function parts = matml_xmlparts(xml)
% convert xml text into data,tag,data,tag,etc.
f = find(xml == '<');
g = find(xml == '>');
ch = 1;
tag = 1;
ntags = length(f);
if length(g)~=ntags
error('mismatch in tags');
end
if min(diff(reshape([f; g],ntags*2,1))) < 1
error('mismatch in tags');
end
emptystruct = struct();
part = [];
part.type = 0;
part.data = '';
part.attr = emptystruct;
parts = part;
partscount = 0;
partsreserved = 1;
while tag<=ntags
if ch ~= f(tag)
% add data
part.type = 0;
part.data = xml(ch:f(tag)-1);
part.attr = emptystruct;
% expand storage
if partscount == partsreserved
partsreserved = partsreserved * 2;
parts(partsreserved,1) = part;
end
partscount = partscount + 1;
parts(partscount,1) = part;
ch = f(tag);
end
% add tag
wholetag = xml(f(tag)+1:g(tag)-1);
% CLOSE TAGS
if wholetag(1) == '/'
part.type = 2; % close tag
part.data = wholetag(2:end);
part.attr = emptystruct;
% COMMENT TAGS
elseif wholetag(1) == '!'
part.type = 4; % comment
part.data = wholetag(4:end-3);
part.attr = emptystruct;
% MUST BE OPEN TAGS
else
% check for attributes
attr = emptystruct;
ucase = wholetag >= 65 & wholetag <= 90;
lcase = wholetag >= 97 & wholetag <= 122;
numb = wholetag >= 48 & wholetag <= 57;
endstagname = ~lcase & ~ucase & ~numb & wholetag ~= '_';
part.type = 1; % open tag
if any(endstagname)
e = find(endstagname);
e = e(1);
if wholetag(e) ~= 32
error(['malformed tag (no space after tag name) "' wholetag '"']);
end
attrs = wholetag(e+1:end);
wholetag = wholetag(1:e-1);
% interpret attrs
while true
e = find(attrs == '=');
if isempty(e)
error('invalid XML (no =)');
end
e = e(1);
if attrs(e+1) ~= '"'
error('invalid XML (not ")');
end
key = attrs(1:e-1);
attrs = attrs(e+2:end);
e = find(attrs == '"');
if isempty(e)
error('invalid XML (no ")');
end
e = e(1);
val = attrs(1:e-1);
attr.(key) = val;
% if strcmp(key,'class') part.type = 3; end % matlab data
if e == length(attrs)
break
end
attrs = attrs(e+2:end);
end
end
part.data = wholetag;
part.attr = attr;
end
% expand storage
if partscount == partsreserved
partsreserved = partsreserved * 2;
parts(partsreserved,1) = part;
end
partscount = partscount + 1;
parts(partscount,1) = part;
ch = g(tag)+1;
tag = tag + 1;
end
if ch <= length(xml)
part = [];
part.type = 0;
part.data = xml(ch:end);
part.attr = struct();
partscount = partscount + 1;
parts(partscount,1) = part;
end
parts = parts(1:partscount);
function [xml, nextpart] = matml_xmlparse(parts, nextpart, parent)
if nargin<2
nextpart = 1;
end
if nargin<3
parent = [];
end
% only simple (text or children, but not both) XML
% tags are supported in this format, so we store them
% separately
xml = [];
xml.name = '';
xml.attr = struct();
xml.value = '';
xml.children = {};
while nextpart <= length(parts)
part = parts(nextpart);
nextpart = nextpart + 1;
switch part.type
case 0
if ~isempty(xml.children)
% ignore whitespace if children are present, but use it otherwise
f = find(part.data ~= 9 & part.data ~= 32 & part.data ~= 10 & part.data ~= 13);
if isempty(f) continue; end
% presence of non-whitespace data when we already have children is a
% mixed XML tag
error('mixed type (text and children) XML tags unsupported in this translation');
end
% just return string content
xml.value = matml_xmlunsafe(part.data);
% 1 = opening tag
case 1
% presence of children when we already have non-whitespace data is a
% mixed XML tag
if ~isempty(xml.value)
f = find(xml.value ~= 9 & xml.value ~= 32 & xml.value ~= 10 & xml.value ~= 13);
if ~isempty(f) error('mixed type (text and children) XML tags unsupported in this translation'); end
% if data is whitespace, we'll just discard it
xml.value = [];
end
[xml_, nextpart] = matml_xmlparse(parts, nextpart, part);
xml_.name = part.data;
xml_.attr = part.attr;
% add to children
xml.children{end+1} = xml_;
case 2
if ~strcmp(part.data,parent.data)
error(['invalid nesting - "' part.data '" ends "' parent.data '"']);
end
return;
case 4
% comments are ignored
otherwise
error('unrecognised part code');
end
end
if isempty(parent)
% this is the root tag, and we should reach here
% but we fix up the representation at this point
if ~isempty(xml.value)
error('document cannot have data outside root tag');
end
if length(xml.children) ~= 1
error('document cannot have more than one root tag');
end
xml = xml.children{1};
else
error('did not find closing tag');
end
function data = matml_xmlunsafe(data)
data = strrep(data, '<', '<');
data = strrep(data, '>', '>');
data = strrep(data, ''', '''');
data = strrep(data, '"', '"');
data = strrep(data, '&', '&');
function data = matml_xmlsafe(data)
data = strrep(data, '&', '&');
data = strrep(data, '"', '"');
data = strrep(data, '''', ''');
data = strrep(data, '>', '>');
data = strrep(data, '<', '<');
% matml_file2array(<filename>) returns a string containing the contents of
% filename with linefeeds converted to LF (asc 10)
%
% matml_file2array(<filename>, true) returns a cell array of lines instead
% of one long text string
function out = matml_file2array(fname, in_lines)
fid=fopen(fname);
if fid==-1
error('File not found');
end
bin=fread(fid);
% go through replacing crlf with just lf
if ~isempty(bin)
notcrs = bin~=13;
bin = bin(notcrs);
end
out=char(bin');
if fclose(fid)
error('File not closed');
end
if nargin > 1 && in_lines
str = out;
if str(end) ~= 10
str = [str 10];
end
lf = find(str == 10);
st = [1 lf(1:end-1)+1];
ed = lf - 1;
N = length(st);
out = cell(N,1);
for n=1:N
out{n} = str(st(n):ed(n));
end
end
%% PARSE MATML
function value = matml_matmlparse(tag)
if ~isfield(tag.attr, 'c')
% it's a simple string
value = tag.value;
elseif strcmp(tag.attr.c, 'e')
value = str2func(tag.value);
elseif strcmp(tag.attr.c, 'z')
% create scalar structure
fn = matml_explode(';', tag.attr.a);
nfields = length(fn);
value = struct();
for n = 1:length(fn)
value.(fn{n}) = [];
end
% expand it to required sz if not scalar
if isfield(tag.attr, 'b')
sz = tag.attr.b;
cmd = ['value = repmat(value, [' sz ']);'];
eval(cmd);
end
% for each <struct> element
for e = 0:length(tag.children)-1
el = floor(e / nfields) + 1;
n = mod(e, nfields) + 1;
value(el).(fn{n}) = matml_matmlparse(tag.children{e+1});
end
elseif strcmp(tag.attr.c, 'y')
if isfield(tag.attr, 'b')
sz = str2num(tag.attr.b);
else
% no sz attr means scalar
sz = 1;
end
value = cell(sz);
for n = 1:length(tag.children)
value{n} = matml_matmlparse(tag.children{n});
end
else
if isfield(tag.attr, 'b')
sz = sscanf(tag.attr.b, '%f')';
else
% no sz attr means scalar
sz = 1;
end
if length(tag.attr.c) == 2
if tag.attr.c(2) ~= 'x'
error('malformed MatML');
end
cls = tag.attr.c(1);
cpx = true;
else
cls = tag.attr.c;
cpx = false;
end
if isfield(tag.attr, 's')
storage_protocol = tag.attr.s;
if ~ischar(storage_protocol) || ~isscalar(storage_protocol) || storage_protocol ~= 'b'
error('cannot recognise storage protocol');
end
filename = tag.value;
d = dir(filename);
f = find('fdvutsponmlc' == tag.attr.c(1));
if isempty(f)
error('unrecognised numeric type')
end
g = [4 8 8 4 2 1 8 4 2 1 1 1];
c = {'single', 'double', 'uint64', 'uint32', 'uint16', 'uint8', 'int64', 'int32', 'int16', 'int8', 'logical', 'char'};
bytesperel = g(f);
nels = prod(sz);
expbytes = bytesperel * nels;
if cpx
expbytes = expbytes * 2;
end
if expbytes ~= d.bytes
error('binary file incorrect size');
end
fid = fopen(filename, 'r');
if fid == -1
error(['failed open "' filename '"']);
end
c = c{f};
if cpx
N = sz(end);
C = prod(sz(1:end-1));
value = zeros(C, N);
for n = 1:N
real = fread(fid, C, ['*' c]);
imag = fread(fid, C, ['*' c]);
value(:, n) = complex(real, imag);
end
else
value = fread(fid, inf, ['*' c]);
end
fclose(fid);
else
value = sscanf(tag.value, '%f');
if cpx
value = value(1:end/2) + i * value(end/2+1:end);
end
end
if length(sz) < 2
sz = [sz 1];
end
if length(sz) > 1
value = reshape(value, sz);
end
switch cls
case 'f', value = single(value);
case 'd', value = double(value);
case 'v', value = uint64(value);
case 'u', value = uint32(value);
case 't', value = uint16(value);
case 's', value = uint8(value);
case 'p', value = int64(value);
case 'o', value = int32(value);
case 'n', value = int16(value);
case 'm', value = int8(value);
case 'l', value = logical(value);
case 'c', value = char(value);
otherwise
error(['uncoded MatML class "' cls '"']);
end
end
function c = matml_explode(t, s)
c = {};
f = [0 find(s == t)];
for n = 1:length(f)-1
c{n} = s(f(n)+1:f(n+1)-1);
end