image thumbnail

POS Tag for first word in the sentence

by

 

18 Jul 2013 (Updated )

Finds the most probable Parts Of Speech tag for the first word in a sentence

checker_first(string, Training_String, Training_Tag, string_index)
% Written by : Samyukta Ramnath
% June 2013
% BITS Pilani University
% function[] = checker_first(string, Training_String, Training_Tag,
% string_index
% This function returns the most probable part of sentence tag for the
% first word of a sentence, taking in the string, Training Data, and the
% index of the word (in this case = 1, because it's the first word)


function[] = checker_first(string, Training_String, Training_Tag, string_index)
%% Initial definitions

global Tag_sequences;
 
Tags = {'DD','NN','PP','PN','ADJ','ADV','VB','PNN'};

%% Real code
count_total = 0;
count_art = 0;
count_noun = 0;
count_prep = 0;
count_pro = 0;
count_adj = 0;
count_adv = 0;
count_vb = 0;
count_pnn = 0;
flag = 0;


for i =1: size(Training_String,2)
    for j = 1:size(Training_String{i},2)
        if strcmp(string(string_index),Training_String{i}{j}) % if the word is present in the lexicon
            
            flag = flag + 1;
            Training_Tag{i}{j}; %Display the tag corresponding to the word
            % We have to count the number of times the word
            % appears as its tag
            
            count_total = count_total+1;
            if strcmp(Training_Tag{i}{j},Tags{1})
                count_art = count_art+1;
            elseif strcmp(Training_Tag{i}{j},Tags{2})
                count_noun = count_noun+1;
            elseif strcmp(Training_Tag{i}{j},Tags{3})
                count_prep = count_prep+1;
            elseif strcmp(Training_Tag{i}{j},Tags{4})
                count_pro = count_pro+1;
            elseif strcmp(Training_Tag{i}{j},Tags{5})
                count_adj = count_adj+1;
            elseif strcmp(Training_Tag{i}{j},Tags{6})
                count_adv = count_adv+1;
            elseif strcmp(Training_Tag{i}{j},Tags{7})
                count_vb = count_vb+1;
            elseif strcmp(Training_Tag{i}{j},Tags{8})
                count_pnn = count_pnn+1;
            end
        end
    end
end
            

for i = 1:8
    count_tag(i) = 0;
end
    


if flag>0 % That is, if it is  present in the lexicon 
    prob(1) = count_art / count_total;
    prob(2) = count_noun / count_total;
    prob(3) = count_prep / count_total;
    prob(4) = count_pro / count_total;
    prob(5) = count_adj / count_total;
    prob(6) = count_adv / count_total;
    prob(7) = count_vb / count_total;
    prob(8) = count_pnn / count_total;
    [C,I] = max(prob);
    Tag_sequences{1} = Tags{I};
elseif (flag<=0) % if it is not present in the lexicon and is also neither of the defined stuff
    
    for i = 1:size(Training_Tag,2)
        for j = 1:size(Tags,2)
            if strcmp(Training_Tag{i}{1},Tags{j})
                count_tag(j) = count_tag(j)+1;
            end
            prob_tag_first(j) = count_tag(j)/sum(count_tag); 
        end
    end
    [D,I2] = max(prob_tag_first);
    % So we have the most probable tag for the word, and the
    % Tag_sequences{1} cell is filled with this most probable tag.
    Tag_sequences{1} = Tags{I2};     
end

Contact us