No BSD License  

image thumbnail

Baseball Pennant Race

by

 

05 Sep 2006 (Updated )

Chart your baseball division

PennantRace(league,division,year)
function PennantRace(league,division,year)
%PennantRace(league,division,[year])
%
%Plots a graph of games above .500 as a function of time for a given major
%league baseball division.  Provides a legend of the current standings along
%with the 'magic number' needed to clinch the title (if between 0 and 30).
%
%Data is pulled in using 'urlread' from the ESPN website and analyzed with
%a bunch of 'regexp' calls.
%
%Inputs:
%   league    string 'NL' or 'AL'
%   division  string 'West', 'Central', or 'East'
%   year      Default is current year.  Can only go back to 2002 
%  (I could have used 'retrosheet.org' for more historical data)
%
%The idea for these charts comes from John Warner Davenport who self-published
%'Baseball's Pennant Races: A Graphic View' in 1981
%
%See also http://alexreisner.com/baseball/history/race for more charts
%and http://www.pennant-race.com/ for (usually) current charts
%
%Written by G.M. Boynton 8/29/2006...
%Go Padres.

if ~exist('league')
    league = 'NL';
end
if ~exist('division')
    division = 'West';
end

switch(league)
    case 'NL'
        switch(division)
            case 'West'
                ESPNNames = {'sdg','lad','ari','sfo','col'};
                legendNames = {'San Diego','Los Angeles','Arizona','San Francisco','Colorado'};
            case 'Central'
                ESPNNames = {'stl','cin','mil','hou','chc','pit'};
                legendNames = {'St. Louis','Cincinatti','Milwaukee','Houston','Chicago','Pittsburgh'};
            case 'East'
                ESPNNames = {'nym','phi','fla','atl','was'};
                legendNames = {'New York','Philadelphia','Florida','Atlanta','Washington'};
        end
    case 'AL'
        switch(division)
            case 'West'
                ESPNNames = {'oak','laa','tex','sea'};
                legendNames = {'Oakland','Los Angeles','Texas','Seattle'};
            case 'Central'
                ESPNNames = {'det','min','chw','cle','kan'};
                legendNames = {'Detroit','Minnesota','Chicago','Cleveland','Kansas City'};
            case 'East'
                ESPNNames = {'nyy','bos','tor','bal','tam'};
                legendNames = {'New York','Boston','Toronto','Baltimore','Tampa Bay'};
        end
end

monthStr = {'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'};

%get the current year by default
if ~exist('year')
    tmp = clock;
    year = tmp(1);
end

wl = zeros(365,length(ESPNNames)); %wins (>0) and losses (<0) for each day
w = zeros(length(ESPNNames),1);   %total wins
l = zeros(length(ESPNNames),1);   %total losses
gl = zeros(length(ESPNNames),1);  %games left
startDate = datenum(year,1,1);
%loop through the teams, pulling in data from the ESPN website.
for teamNum = 1:length(ESPNNames)

    disp(legendNames{teamNum});
    url = ['http://sports.espn.go.com/mlb/teams/schedule?team=',ESPNNames{teamNum},'&season=',num2str(year),'&seasonType=2&half='];

    s1  = urlread([url,num2str(1)]);  %1st half of season
    s2 = urlread([url,num2str(2)]);   %2nd half of season
    s = [s1,s2];

    %monthEpr pulls out each month's worth of data
    monthExpr = '">(Apr|May|Jun|Jul|Aug|Sep|Oct)..*?(<tr class="colhead">|<br clear=all>)';
    [monthMat monthTok] = regexp(s,monthExpr,'match','tokens');
    %gameExpr pulls out each game within each month obtained with monthExpr
    gameExpr = '<td>(Mon|Tue|Wed|Thu|Fri|Sat|Sun).\s\d+</td>.*?<td.*?</td>.*?<td.*?</td>'; %pulls out line for each game day.
    gameMat = regexp(monthMat,gameExpr,'match');
    %WLExpr pulls out the date and outcome of each game obtained with gameExpr
    WLExpr = '<td>(Mon|Tue|Wed|Thu|Fri|Sat|Sun).\s(\d+)?</td>.*?">(W|L)</font';

    %loop through the months
    for month = 1:length(monthTok)
        %find the month number (1-12)
        monthNum(month) = strmatch(monthTok{month}{1},monthStr);
        %loop through the games for this month
        for i=1:length(gameMat{month})
            %find the 'W' or 'L' for this game
            [WLMat WLTok] = regexp(gameMat{month}{i},WLExpr,'match','tokens');
            if ~isempty(WLTok)
                %get the date
                dNum = datenum(year,monthNum(month),str2num(WLTok{1}{2}));
                x = dNum-startDate+1;
                %increment w, l and wl
                switch(WLTok{1}{3})
                    case 'W'
                        wl(x,teamNum) = wl(x,teamNum)+1;
                        w(teamNum) =w(teamNum)+1;
                    case 'L'
                        wl(x,teamNum) = wl(x,teamNum)-1;
                        l(teamNum) = l(teamNum)+1;
                end
            end
        end
    end
    %calculate the number of games left for this team
    gl(teamNum,1) = length(strfind(s,'partner=ot">Tickets</a></td>'));
end
%truncate the wl matrix to range of days containing games
firstGame = min(find(sum(abs(wl),2)~=0))-1;
lastGame = max(find(sum(abs(wl),2)~=0));
wl = wl(firstGame:lastGame,:);

%Here's something fun:  Uncomment the next two lines to generate a random season!
%g = find(wl~=0);
%wl(g) = sign(rand(size(g))-.5);

%Basis analysis.  cumsum(wl) is the accumulation of wins and losses, which
%is the basis for our chart.
cumwl = cumsum(wl);
%Find the current team order from best to worst.
[foo,id] = sort(cumwl(end,:),2,'descend');

%Calculate the 'magic number'.  For an easy reference, see:
%http://www.obsoletecomputermuseum.org/magic/magicexpo.shtml
nGames =  w(1)+l(1)+gl(1);
M = nGames+1 - w(id(1)) - l(id(2));

%Calculate current games behind and winning percentage.
final = cumwl(end,:);
gb = (max(final)-final)/2;
pct = w./(w+l);

%The rest is graphics stuff.
figure(1)
clf
hold on

xtick = datenum(year,unique(monthNum),1)-startDate-firstGame+1;
colList = {'k','b','r','g','m','y'};
set(gca,'Color',.9*[1,1,1]);

%plot the data and generate the legend
clear legendStr
for i=1:length(ESPNNames)
    plot(cumwl(:,id(i)),'-','LineWidth',2,'Color',colList{id(i)});
    if i==1
        if M<=0
            gbstr = '***';  %leading time has clinched the title
        else
            gbstr = '-';
        end
    else
        gbstr = sprintf('%4.1f',gb(id(i)));
    end
    pctstr = sprintf('%3.3f',pct(id(i)));
    pctstr = pctstr(2:end);
    legendStr{i} = sprintf('%13s %2d %2d %4s %4s',legendNames{id(i)},w(id(i)),l(id(i)),pctstr,gbstr);
end

plot([1,length(wl)],[0,0],'k:');
ylim = get(gca,'YLim');

%vertical dotted lines dividing each month
for i=1:length(xtick);
    plot(xtick(i)*[1,1],ylim,'k:');
end

%horizontal dotted lines
for i = [ceil(ylim(1)/2)*2:2:floor(ylim(2)/2)*2];
    if i==0
        plot([0,size(wl,1)+1],[i,i],'k-');
    else
        plot([0,size(wl,1)+1],[i,i],'k:');
    end
end

%month labels on x-axis
set(gca,'xTick',xtick+15);
set(gca,'XTickLabel',monthStr(unique(monthNum)));
set(gca,'XLim',[0,size(wl,1)+1]);

%ylabel, title and legend
ylabel('Games above .500');
if M>0 & M<30
    title(sprintf('%s %s, %d,  Magic Number %d',league,division,year,M));
else
    title(sprintf('%s %s, %d',league,division,year));
end
legend(legendStr,'FontName','Courier','Location','NorthWest');



Contact us