Code covered by the BSD License

# mweka: Running Machine Learning Tool Weka from MATLAB

### Durga Lal Shrestha (view profile)

24 Jul 2009 (Updated )

Runs Machine Learning Tool Weka from MATLAB

```function [dataName,attributeName, attributeType, data]= arffread(fileName)
%
% USAGE:
%       [dataName,attributeName, attributeType, data] = arffRead(fileName)
%
% INPUT:
%       fileName:       file name to be read
%
% OUTPUT:
%       dataName:       relation name of the arff file
%       attributeName:       attribute name of attribute as cell array
%                       { 1 by nAttr }
%       attributeType:       attribute type of attribute as cell array
%                       { 1 by nAttr}
%       data:           data (nInstan by nAttr)
%

% Copyright 2004-2004 by Durga Lal Shrestha.
% eMail: durgals@hotmail.com
% \$Date: 2004/06/23
% \$Revision: 3.2.0 \$ \$Date: 2004/08/16 \$

% ***********************************************************************
if nargin < 1,
error('No input arguments!');
end
if nargin > 1,
error('Too many input arguments!');
end
atRelation = '@relation';
atAttribute = '@attribute';
atData = '@data';
noOfLines = size(wholeData,1);
k=0;
%************************************************************************
%% Finding data name
for i=1:noOfLines
k = findstr(wholeData{i},atRelation);
if k ~= 0;
lineAtRelation = i;
[token,dataName] = strtok(wholeData{lineAtRelation});
break
end
end
% Check whether dataName has whitespaces
tf = isspace(dataName);
tf = find(tf ==1);
if size(tf,2)>1
dataName = dataName(2:tf(2)-1);
else
dataName = dataName(2:size(dataName,2));
end
% Check whether dataName has semicolons or others
% First convert to ascii code and note that quotation mark is 39 is ascii
ascDataName = double(dataName);
if ascDataName(1) == 39
ascDataName = ascDataName(2:end);
end
if ascDataName(end) == 39
ascDataName = ascDataName(1:end-1);
end
% Convert back to characters
dataName = char(ascDataName);
%************************************************************************
%% Finding attribute name
lineAtAttribute =[];
k=0;
l=0;
j = 0;
for i=lineAtRelation+1:noOfLines
k = findstr(wholeData{i},atAttribute);
if k ~= 0;
lineAtAttribute =[lineAtAttribute i];
[chopped,remainder] = strtok(wholeData{i});
[attrName,remAttrType] = strtok(remainder);
[attrType,rem] = strtok(remAttrType);
j=j+1;
attrVector{j} = attrName;
attrTypeVector{j} = attrType;
end
l = findstr(wholeData{i},atData);
if l ~= 0;
lineAtData = i;
break
end
end

%************************************************************************

% Finding whether data is tab formatted or csv and the position of data
k = [];
for i=lineAtData+1:noOfLines
str = wholeData{i};
if ~isempty(str) & ~strcmp(str,'%')
k = findstr(wholeData{i},',');
if ~isempty(k);
dataFormat ='comma' ;
lineData = i;
break
else
dataFormat ='tabOrSpace' ;
lineData = i;
break
end
end
end

%************************************************************************
%nRowSkip=lineData-1;
nColSkip = 0;
%dataName = dataName;
attributeName = attrVector ;
attributeType = attrTypeVector;
%{
% You have to convert '' marks for each var to write in arff file
if strcmp(dataFormat,'comma')
elseif strcmp(dataFormat,'tabOrSpace') | strcmp(dataFormat,'tab')...
| strcmp(dataFormat,'Space')
data = dlmread(fileName,'\t',nRowSkip,nColSkip);		% Space delimiter
dataFormat = 'space';
if size(data,2)~=size(attributeName,2)
data = dlmread(fileName,'\t' ,nRowSkip,nColSkip);	% tab delimiter
dataFormat = 'tab';
end
if size(data,2)~=size(attributeName,2)
error('arff file is not tab or comma delemited!');
end
end
%}
strData = wholeData(lineData:end);
for i = 1:size(strData,1)
data(i,:) = str2num(strData{i});
end
```