function finalString = headline(varargin)
%HEADLINE Convert string to "headline" format (initial capitals on key words).
% B = HEADLINE(A) converts the initial letter of all words (except prepositions
% and conjunctions) in A to capitals. A must be a string. Any letters placed in
% curly braces "{}" will be left unaltered (i.e., in their original case). This
% allows one to easily sidestep some of the function's idiosyncracies.
%
% Note that the function is SLOW. Sorry about that. I assume that generally speed
% will not be a concern for text-processing stuff.
%
% B = HEADLINE, with no inputs, produces the default list of prepositions and
% conjunctions that are not capitalized. This list can be modified using the
% following syntax:
%
% B = HEADLINE(...,addList,delList) adds the words in cell array addList to the
% default list of of words, and deletes the words in cell array delList from the
% default list of of words. Use empty brackets [] for addList if you wish only
% to specify delList. (The function does not check whether added words already
% exist on the list, since this would be unnecessary computation. Words to be
% deleted must match entries in the default list exactly.)
%
% The default list of prepositions can also be modified simply by editing the
% M-file. The default list has been extracted from the Wikipedia entry for
% <a href="matlab:web('http://en.wikipedia.org/wiki/List_of_English_prepositions')">English Prepositions</a>.
%
% Examples: A = 'life, don''t talk to me about life.'
% y = headline(A)
% y =
% Life, Don't Talk to Me about Life.
%
% y = headline(A,'don''t')
% y =
% Life, don't Talk to Me about Life.
%
% y = headline(A,'don''t','about')
% y =
% Life, don't Talk to Me About Life.
%
% B = '{l}ife, don''t talk to {me} about life.'
% y = headline(B)
% y =
% life, Don't Talk to me about Life.
%
% See also UPPER, LOWER, INITIALCAPS.
% Below is the DEFAULT list of prepositions and conjunctions that are not
% capitalized. If you EDIT this list, make sure you put new phrases in the
% correct cell array, with the appropriate number of words. For phrases
% with more than 3 words, it is probably best to use the 'add' and 'del'
% syntax, so that the terms are ordered correctly to be searched.
% Otherwise, be sure to put these phrases at the beginning of 'longList3'
% in order of descending word-length. The reason is that the function
% searches the lists in the order longList3,longList2,longList1 to insure
% that a string like 'As Far As' is matched with preposition phrase 'as
% far as', rather than with the preposition 'as', which would then leave
% 'Far' remaining capitalized.
longList3 = {'as far as';'as well as';'by means of';'in accordance with';'in addition to';...
'in front of';'in place of';'in spite of';'on account of';'on behalf of';'on top of';...
'with regard to';'with respect to';'as it were'};
longList2 = {'according to';'ahead of';'as to';'aside from';'because of';'close to';'due to';...
'far from';'in to';'inside of';'instead of';'on to';'out of';'outside of';'owing to';...
'near to';'next to';'prior to';'subsequent to'};
longList1 = {'a';'aboard';'about';'above';'across';'after';'against';'ago';'along';'alongside';'amid';...
'amidst';'among';'amongst';'apart from';'around';'as';'at';'atop';'before';'behind';...
'below';'beneath';'beside';'besides';'between';'beyond';'but';'by';'concerning';...
'considering';'despite';'down';'during';'except';'following';'for';'from';'in';'inside';...
'into';'like';'minus';'near';'and';'nor';'or';'yet';'so';'although';'if';'unless';...
'because';'notwithstanding';'of';'off';'on';'onto';'out';'outside';'over';'past';'plus';...
'round';'since';'the';'through';'throughout';'till';'to';'toward';'towards';'under';'underneath';...
'unlike';'until';'up';'upon';'versus';'via';'with';'within';'without';'regarding'};
Narg = nargin;
error(nargchk(0,5,Narg,'struct'))
listToAdd = {};
listToDel = {};
longListHigher = {};
longListHigherLength = [];
if Narg==0,
if nargout==1,
finalString = [longList3;longList2;longList1];
elseif nargout==0,
disp('------ Default list of prepositions ------')
disp([longList3;longList2;longList1])
end
return
else
Astring = varargin{1};
if isempty(Astring),
finalString = Astring;
return
elseif ~ischar(Astring),
error('First argument must be a string.')
end
if Narg==2 | Narg==3,
if ischar(varargin{2}), % single string
listToAdd = {varargin{2}};
elseif iscell(varargin{2}),
listToAdd = varargin{2};
else
error('Second argument must be either a string, or a cell array of strings, or [].')
end
if Narg==3,
if ischar(varargin{3}), % single string
listToDel = {varargin{3}};
elseif iscell(varargin{3}),
listToDel = varargin{3};
else
error('Third argument must be either a string, or a cell array of strings, or [].')
end
end
end
% First adjust the list by adding specified words:
numberOfAdditions = length(listToAdd);
numberOfDeletions = length(listToDel);
for i = 1:numberOfAdditions,
thisAddition = listToAdd{i};
numWord = length(findstr(thisAddition,' ')) + 1; % num spaces + 1 = num words
switch numWord,
case 1,
longList1{end+1,1} = thisAddition;
case 2,
longList2{end+1,1} = thisAddition;
case 3,
longList3{end+1,1} = thisAddition;
otherwise,
longListHigher{end+1,1} = thisAddition;
longListHigherLength(end+1) = numWord;
end
end
% Order HIGHER list by numWords:
[sortY,sortIx] = sort(longListHigherLength,1,'descend');
longListHigherSort = longListHigher(sortIx);
% Paste them all together:
longList = [longListHigherSort;longList3;longList2;longList1];
% Next adjust the list by deleting specified words:
for i = 1:numberOfDeletions,
thisDeletion = listToDel{i};
matchIX = strmatch(thisDeletion,longList,'exact');
longList(matchIX) = [];
end
% Identify "no-change" letters, identified by {}:
leftBracketIx = findstr(Astring,'{');
rightBracketIx = findstr(Astring,'}');
bracketedElements = [];
bracketDiff = rightBracketIx-leftBracketIx;
leftBracketSpace = diff(leftBracketIx);
if numel(leftBracketIx) ~= numel(rightBracketIx),
error('Unbalanced brackets.')
end
if ~isempty(leftBracketIx)
if any(bracketDiff < 0),
error('Brackets not closed properly.')
end
if length(leftBracketSpace>1) && any(leftBracketSpace <= bracketDiff(1:end-1)),
error('Nested brackets not permitted.')
end
for i = 1:length(leftBracketIx),
bracketedElements = [bracketedElements (leftBracketIx(i):rightBracketIx(i))];
end
end
% Indentify must-cap letters, following period and semicolon.
periodIx = findstr(Astring,'.');
colonIx = findstr(Astring,':');
hyphenIx = findstr(Astring,'-');
periodAndColonIx = [periodIx;colonIx;hyphenIx];
postPuctuationElements = [1];
for i = 1:length(periodAndColonIx),
wordToLeave = strtok(Astring(periodAndColonIx(i)+1:end));
firstLetterOfAllWordIx = strfind(Astring,wordToLeave);
candidateIx = find(firstLetterOfAllWordIx > periodAndColonIx(i));
firstLetterOfThisWordIx = min(firstLetterOfAllWordIx(candidateIx));
postPuctuationElements = [postPuctuationElements;firstLetterOfThisWordIx];
end
% Capitalize all first letters...
AstringLowerCase = lower(Astring);
AstringUpperCase = upper(Astring);
finalString = initialcaps(AstringLowerCase);
%%%%%%%%%%%%% THE MAIN EVENT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Step through AstringLowerCase scanning for, and replacing, each preposition
% in turn. % Note the order of 3-word, 2-word, then 1-word prepositions in longList.
% This order is important for the way the search and replace is done.
for prepCount = 1:length(longList),
% Make sure detected preposition is not prefix of another word:
punctuationList = '. ,:;-!?/&(){}[]''"';
for puncCount = 1:length(punctuationList),
thisPrep = [longList{prepCount} punctuationList(puncCount)]; % different ending puctuation
thisPrepCap = initialcaps(thisPrep);
finalString = strrep(finalString,thisPrepCap,thisPrep);
end
end
finalString(postPuctuationElements) = AstringUpperCase(postPuctuationElements);
finalString(bracketedElements) = Astring(bracketedElements);
finalString([leftBracketIx;rightBracketIx]) = [];
end