Code covered by the BSD License  

Highlights from
POS Tag for first word in the sentence

image thumbnail

POS Tag for first word in the sentence

by

 

18 Jul 2013 (Updated )

Finds the most probable Parts Of Speech tag for the first word in a sentence

checker_first(string, Training_String, Training_Tag, string_index)
% Written by : Samyukta Ramnath
% June 2013
% BITS Pilani University
% function[] = checker_first(string, Training_String, Training_Tag,
% string_index
% This function returns the most probable part of sentence tag for the
% first word of a sentence, taking in the string, Training Data, and the
% index of the word (in this case = 1, because it's the first word)


function[] = checker_first(string, Training_String, Training_Tag, string_index)
%% Initial definitions

global Tag_sequences;
 
Tags = {'DD','NN','PP','PN','ADJ','ADV','VB','PNN'};

%% Real code
count_total = 0;
count_art = 0;
count_noun = 0;
count_prep = 0;
count_pro = 0;
count_adj = 0;
count_adv = 0;
count_vb = 0;
count_pnn = 0;
flag = 0;


for i =1: size(Training_String,2)
    for j = 1:size(Training_String{i},2)
        if strcmp(string(string_index),Training_String{i}{j}) % if the word is present in the lexicon
            
            flag = flag + 1;
            Training_Tag{i}{j}; %Display the tag corresponding to the word
            % We have to count the number of times the word
            % appears as its tag
            
            count_total = count_total+1;
            if strcmp(Training_Tag{i}{j},Tags{1})
                count_art = count_art+1;
            elseif strcmp(Training_Tag{i}{j},Tags{2})
                count_noun = count_noun+1;
            elseif strcmp(Training_Tag{i}{j},Tags{3})
                count_prep = count_prep+1;
            elseif strcmp(Training_Tag{i}{j},Tags{4})
                count_pro = count_pro+1;
            elseif strcmp(Training_Tag{i}{j},Tags{5})
                count_adj = count_adj+1;
            elseif strcmp(Training_Tag{i}{j},Tags{6})
                count_adv = count_adv+1;
            elseif strcmp(Training_Tag{i}{j},Tags{7})
                count_vb = count_vb+1;
            elseif strcmp(Training_Tag{i}{j},Tags{8})
                count_pnn = count_pnn+1;
            end
        end
    end
end
            

for i = 1:8
    count_tag(i) = 0;
end
    


if flag>0 % That is, if it is  present in the lexicon 
    prob(1) = count_art / count_total;
    prob(2) = count_noun / count_total;
    prob(3) = count_prep / count_total;
    prob(4) = count_pro / count_total;
    prob(5) = count_adj / count_total;
    prob(6) = count_adv / count_total;
    prob(7) = count_vb / count_total;
    prob(8) = count_pnn / count_total;
    [C,I] = max(prob);
    Tag_sequences{1} = Tags{I};
elseif (flag<=0) % if it is not present in the lexicon and is also neither of the defined stuff
    
    for i = 1:size(Training_Tag,2)
        for j = 1:size(Tags,2)
            if strcmp(Training_Tag{i}{1},Tags{j})
                count_tag(j) = count_tag(j)+1;
            end
            prob_tag_first(j) = count_tag(j)/sum(count_tag); 
        end
    end
    [D,I2] = max(prob_tag_first);
    % So we have the most probable tag for the word, and the
    % Tag_sequences{1} cell is filled with this most probable tag.
    Tag_sequences{1} = Tags{I2};     
end

Contact us