No BSD License  

Highlights from
headline

from headline by David Fass
Convert a string to "headline" format (initial capitals on key words).

headline(varargin)
function finalString = headline(varargin)
%HEADLINE  Convert string to "headline" format (initial capitals on key words).
%   B = HEADLINE(A) converts the initial letter of all words (except prepositions
%   and conjunctions) in A to capitals.  A must be a string.  Any letters placed in
%   curly braces "{}" will be left unaltered (i.e., in their original case).  This 
%   allows one to easily sidestep some of the function's idiosyncracies.
%
%   Note that the function is SLOW.  Sorry about that.  I assume that generally speed 
%   will not be a concern for text-processing stuff.
%
%   B = HEADLINE, with no inputs, produces the default list of prepositions and
%   conjunctions that are not capitalized.  This list can be modified using the
%   following syntax:
%
%   B = HEADLINE(...,addList,delList) adds the words in cell array addList to the
%   default list of of words, and deletes the words in cell array delList from the
%   default list of of words.  Use empty brackets [] for addList if you wish only
%   to specify delList.  (The function does not check whether added words already
%   exist on the list, since this would be unnecessary computation. Words to be
%   deleted must match entries in the default list exactly.)
%
%   The default list of prepositions can also be modified simply by editing the
%   M-file.  The default list has been extracted from the Wikipedia entry for
%   <a href="matlab:web('http://en.wikipedia.org/wiki/List_of_English_prepositions')">English Prepositions</a>.
%
%   Examples:  A = 'life, don''t talk to me about life.'
%              y = headline(A)
%                  y =
%                      Life, Don't Talk to Me about Life.
%
%              y = headline(A,'don''t')
%                  y =
%                      Life, don't Talk to Me about Life.
%
%              y = headline(A,'don''t','about')
%                  y =
%                      Life, don't Talk to Me About Life.
%
%              B = '{l}ife, don''t talk to {me} about life.'
%              y = headline(B)
%                  y =
%                      life, Don't Talk to me about Life.
%
%   See also UPPER, LOWER, INITIALCAPS.


% Below is the DEFAULT list of prepositions and conjunctions that are not
% capitalized.  If you EDIT this list, make sure you put new phrases in the
% correct cell array, with the appropriate number of words.  For phrases
% with more than 3 words, it is probably best to use the 'add' and 'del'
% syntax, so that the terms are ordered correctly to be searched.
% Otherwise, be sure to put these phrases at the beginning of 'longList3'
% in order of descending word-length.  The reason is that the function
% searches the lists in the order longList3,longList2,longList1 to insure
% that a string like 'As Far As' is matched with preposition phrase 'as
% far as', rather than with the preposition 'as', which would then leave
% 'Far' remaining capitalized.

longList3 = {'as far as';'as well as';'by means of';'in accordance with';'in addition to';...
    'in front of';'in place of';'in spite of';'on account of';'on behalf of';'on top of';...
    'with regard to';'with respect to';'as it were'};

longList2 = {'according to';'ahead of';'as to';'aside from';'because of';'close to';'due to';...
    'far from';'in to';'inside of';'instead of';'on to';'out of';'outside of';'owing to';...
    'near to';'next to';'prior to';'subsequent to'};

longList1 = {'a';'aboard';'about';'above';'across';'after';'against';'ago';'along';'alongside';'amid';...
    'amidst';'among';'amongst';'apart from';'around';'as';'at';'atop';'before';'behind';...
    'below';'beneath';'beside';'besides';'between';'beyond';'but';'by';'concerning';...
    'considering';'despite';'down';'during';'except';'following';'for';'from';'in';'inside';...
    'into';'like';'minus';'near';'and';'nor';'or';'yet';'so';'although';'if';'unless';...
    'because';'notwithstanding';'of';'off';'on';'onto';'out';'outside';'over';'past';'plus';...
    'round';'since';'the';'through';'throughout';'till';'to';'toward';'towards';'under';'underneath';...
    'unlike';'until';'up';'upon';'versus';'via';'with';'within';'without';'regarding'};

Narg = nargin;
error(nargchk(0,5,Narg,'struct'))
listToAdd = {};
listToDel = {};
longListHigher = {};
longListHigherLength = [];


if Narg==0,
    if nargout==1,
        finalString = [longList3;longList2;longList1];
    elseif nargout==0,
        disp('------ Default list of prepositions ------')
        disp([longList3;longList2;longList1])
    end
    return
else

    Astring = varargin{1};

    if isempty(Astring),
        finalString = Astring;
        return
    elseif ~ischar(Astring),
            error('First argument must be a string.')
    end

    if Narg==2 | Narg==3,
        if ischar(varargin{2}),  % single string
            listToAdd = {varargin{2}};
        elseif iscell(varargin{2}),
            listToAdd = varargin{2};
        else
            error('Second argument must be either a string, or a cell array of strings, or [].')
        end

        if Narg==3,
            if ischar(varargin{3}),  % single string
                listToDel = {varargin{3}};
            elseif iscell(varargin{3}),
                listToDel = varargin{3};
            else
                error('Third argument must be either a string, or a cell array of strings, or [].')
            end
        end
    end

    % First adjust the list by adding specified words:
    numberOfAdditions = length(listToAdd);
    numberOfDeletions = length(listToDel);

    for i = 1:numberOfAdditions,

        thisAddition = listToAdd{i};
        numWord = length(findstr(thisAddition,' ')) + 1;  % num spaces + 1 = num words
        switch numWord,
            case 1,
                longList1{end+1,1} = thisAddition;
            case 2,
                longList2{end+1,1} = thisAddition;
            case 3,
                longList3{end+1,1} = thisAddition;
            otherwise,
                longListHigher{end+1,1} = thisAddition;
                longListHigherLength(end+1) = numWord;
        end
    end
    % Order HIGHER list by numWords:
    [sortY,sortIx] = sort(longListHigherLength,1,'descend');
    longListHigherSort = longListHigher(sortIx);
    % Paste them all together:
    longList = [longListHigherSort;longList3;longList2;longList1];


    % Next adjust the list by deleting specified words:
    for i = 1:numberOfDeletions,
        thisDeletion  = listToDel{i};
        matchIX = strmatch(thisDeletion,longList,'exact');
        longList(matchIX) = [];
    end

    % Identify "no-change" letters, identified by {}:
    leftBracketIx = findstr(Astring,'{');
    rightBracketIx = findstr(Astring,'}');
    bracketedElements = [];

    bracketDiff = rightBracketIx-leftBracketIx;
    leftBracketSpace = diff(leftBracketIx);
    if numel(leftBracketIx) ~= numel(rightBracketIx),
        error('Unbalanced brackets.')
    end
    if ~isempty(leftBracketIx)
        if any(bracketDiff < 0),
            error('Brackets not closed properly.')
        end
        if length(leftBracketSpace>1) && any(leftBracketSpace <= bracketDiff(1:end-1)),
            error('Nested brackets not permitted.')
        end
        for i = 1:length(leftBracketIx),
            bracketedElements = [bracketedElements (leftBracketIx(i):rightBracketIx(i))];
        end
    end


    % Indentify must-cap letters, following period and semicolon.
    periodIx = findstr(Astring,'.');
    colonIx = findstr(Astring,':');
    hyphenIx = findstr(Astring,'-');
    periodAndColonIx = [periodIx;colonIx;hyphenIx];
    postPuctuationElements = [1];

    for i = 1:length(periodAndColonIx),
        wordToLeave = strtok(Astring(periodAndColonIx(i)+1:end));
        firstLetterOfAllWordIx = strfind(Astring,wordToLeave);

        candidateIx  = find(firstLetterOfAllWordIx > periodAndColonIx(i));
        firstLetterOfThisWordIx = min(firstLetterOfAllWordIx(candidateIx));
        postPuctuationElements = [postPuctuationElements;firstLetterOfThisWordIx];
    end


    % Capitalize all first letters...
    AstringLowerCase = lower(Astring);
    AstringUpperCase = upper(Astring);
    finalString = initialcaps(AstringLowerCase);



    %%%%%%%%%%%%% THE MAIN EVENT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    % Step through AstringLowerCase scanning for, and replacing, each preposition
    % in turn.  % Note the order of 3-word, 2-word, then 1-word prepositions in longList.
    % This order is important for the way the search and replace is done.
    for prepCount = 1:length(longList),
        % Make sure detected preposition is not prefix of another word:
        punctuationList = '. ,:;-!?/&(){}[]''"';
        for puncCount = 1:length(punctuationList),
            thisPrep = [longList{prepCount} punctuationList(puncCount)];  % different ending puctuation
            thisPrepCap = initialcaps(thisPrep);
            finalString = strrep(finalString,thisPrepCap,thisPrep);
        end
    end

    finalString(postPuctuationElements) = AstringUpperCase(postPuctuationElements);
    finalString(bracketedElements) = Astring(bracketedElements);
    finalString([leftBracketIx;rightBracketIx]) = [];

end


Contact us at files@mathworks.com