function pdfsearch()
% /////////////// automatic pdf search /////////////////////////
% files needed to run this program
%///////// befor using this program download these files in current folder
%///////// fontbox-1.1.0 http://archive.apache.org/dist/pdfbox/1.1.0/
%///////// pdfbox-1.1.0 http://archive.apache.org/dist/pdfbox/1.1.0/
%//////// jdk-7u10 http://jdk7.java.net/download.html
% 1. make testing_folder name directory and copy all pdf files from which
% you want to make search your files using key words//////////////////
% 2. make another directory named destination_folder to save all mapped
% files to copy in that (make this folder empty before every search ////
% 3. run pdfsearch.m and enter keywords to be searched from files
% click on start to begin search //////////////////////////////////////
% created by Er. Harjeet Singh on 11-6-2012
% Rad Innovations Sector 70 Mohali
% 0172-4667778, 9216497778
clear all
close all
clc
%/////////////////////////// GUI command set //////////////////////////////
%//////////////////////////////////////////////////////////////////////////
figure_1 = figure('MenuBar','none','Name','PDF search (hary eye)','NumberTitle','off','Position',[100,100,390,500],'Color',[.5 .5 .5]);
a1 = uipanel('BorderType','etchedin','ForegroundColor','Black','BackgroundColor',[.5 .5 .5],'Units'...
,'characters','Title','controls','Position',[10 14 60 24],'HighlightColor',[0 0 0]);
search_key1 =uicontrol('Parent',a1,'Style','text','String','keyword 1','Position',[5,210,100,30],'BackgroundColor',[.5 .5 .5],...
'FontSize',11,'HorizontalAlignment','Center');
search_key2 =uicontrol('Parent',a1,'Style','text','String','keyword 2','Position',[5,170,100,30],'BackgroundColor',[.5 .5 .5],...
'FontSize',11,'HorizontalAlignment','Center');
search_key3 =uicontrol('Parent',a1,'Style','text','String','keyword 3','Position',[5,130,100,30],'BackgroundColor',[.5 .5 .5],...
'FontSize',11,'HorizontalAlignment','Center');
search_key4 =uicontrol('Parent',a1,'Style','text','String','keyword 4','Position',[5,90,100,30],'BackgroundColor',[.5 .5 .5],...
'FontSize',11,'HorizontalAlignment','Center');
search_key5 =uicontrol('Parent',a1,'Style','text','String','keyword 5','Position',[5,50,100,30],'BackgroundColor',[.5 .5 .5],...
'FontSize',11,'HorizontalAlignment','Center');
search_key6 =uicontrol('Parent',a1,'Style','text','String','keyword 6','Position',[5,10,100,30],'BackgroundColor',[.5 .5 .5],...
'FontSize',11,'HorizontalAlignment','Center');
sel_image_main9 =uicontrol('Style','Pushbutton','String','start search','Position',[52,140,100,30],'BackgroundColor',[.7 .7 .7],...
'FontSize',11,'HorizontalAlignment','Center','CallBack',@start_data_cap);
sel_image_main10 =uicontrol('Style','Pushbutton','String','stop search','Position',[52,90,100,30],'BackgroundColor',[.7 .7 .7],...
'FontSize',11,'HorizontalAlignment','Center','CallBack',@stop_data_cap);
sel_image_main11 =uicontrol('Style','Pushbutton','String','exit','Position',[250,90,100,50],'BackgroundColor',[.8 .5 .5],...
'FontSize',11,'HorizontalAlignment','Center','CallBack',@exit_routine);
world1 =uicontrol('Parent',a1,'Style','Edit','String','IEEE','Position',[130,220,150,20],'BackgroundColor',[1 1 1],...
'FontSize',11,'HorizontalAlignment','Center');
world2 =uicontrol('Parent',a1,'Style','Edit','String','2012','Position',[130,180,150,20],'BackgroundColor',[1 1 1],...
'FontSize',11,'HorizontalAlignment','Center');
world3 =uicontrol('Parent',a1,'Style','Edit','String','','Position',[130,140,150,20],'BackgroundColor',[1 1 1],...
'FontSize',11,'HorizontalAlignment','Center');
world4 =uicontrol('Parent',a1,'Style','Edit','String','','Position',[130,100,150,20],'BackgroundColor',[1 1 1],...
'FontSize',11,'HorizontalAlignment','Center');
world5 =uicontrol('Parent',a1,'Style','Edit','String','','Position',[130,60,150,20],'BackgroundColor',[1 1 1],...
'FontSize',11,'HorizontalAlignment','Center');
world6 =uicontrol('Parent',a1,'Style','Edit','String','','Position',[130,20,150,20],'BackgroundColor',[1 1 1],...
'FontSize',11,'HorizontalAlignment','Center');
searching_text =uicontrol('Style','text','String','Found: 000','Position',[20,40,350,30],'BackgroundColor',[.5 .5 .5],...
'ForegroundColor',[.3 .8 .3],'FontSize',13,'HorizontalAlignment','Center');
drawnow;
found=0; % intinializing found variable to zero
files=dir('testing_folder'); % reading data from testing folder
brk_loop=0; % flag to break loop
%////////////// call back functions /////////////////////////////////////////
function start_data_cap(varargin) % call back function for start pushbutoon
set(sel_image_main9,'BackgroundColor',[.9 .3 .3]); % changing color of start switch to indicate buzy
drawnow
k1=get(world1,'String'); % reding user giving key words to search in pdf files
k2=get(world2,'String');
k3=get(world3,'String');
k4=get(world4,'String');
k5=get(world5,'String');
k6=get(world6,'String');
len=1; % making cell array from different keywords
if(length(k1)>0)
key_words{len}=k1;
len=len+1;
end
if(length(k2)>0)
key_words{len}=k2;
len=len+1;
end
if(length(k3)>0)
key_words{len}=k3;
len=len+1;
end
if(length(k4)>0)
key_words{len}=k4;
len=len+1;
end
if(length(k5)>0)
key_words{len}=k5;
len=len+1;
end
if(length(k6)>0)
key_words{len}=k6;
end
key_words
javaaddpath('pdfbox-1.1.0.jar'); % intilizing java pdfbox path
javaaddpath('fontbox-1.1.0.jar');
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.pdfbox.pdmodel.PDDocument.*;
import org.apache.pdfbox.util.PDFTextStripper.*;
import org.apache.fontbox.cmap.CMapParser.*;
pdfdoc = org.apache.pdfbox.pdmodel.PDDocument;
pdfStripper = org.apache.pdfbox.util.PDFTextStripper;
found=0;
if(length(files)>2)
ranking(1:length(files)-2,1:length(key_words))=0;
for k=3:length(files) % loop for searching all files in testing folder
current=files(k).name;
current1=strcat('testing_folder\',current)
current2=strcat('destination_folder\',current); % address for copying file in destination folder if have keywords
if(brk_loop==1) % break loop if user press stop button in between this loop
brk_loop=0;
break;
end
file_read=0;
try % try to read file if any problem exists skip that file
pdfdoc = pdfdoc.load(current1);
file_read=1;
end
if(file_read==1)
en_flag=pdfdoc.isEncrypted; % checking for encyption key current pdf
tf=length(files)-1;
cur=k-2;
dummy=strcat('Found:',num2str(found),'. .evaluating:',num2str(cur),'. .out of:',num2str(tf));
set(searching_text,'String',dummy);
drawnow
if(en_flag==0)
file_read=0;
try
pdfstr = char(pdfStripper.getText(pdfdoc));
pdfdoc.close()
file_read=1;
end
if(file_read==1)
for i=1:length(key_words) % searching for all keywords to be matched
current_char=key_words{i};
positions=strfind(pdfstr,current_char); % finding no of times current keyworld is in file
ranking(k-2,i)=ranking(k-2,i)+length(positions); % giving ranking to the current page
only_rank=ranking(k-2,:);
only_rank=logical(only_rank>0);
only_rank=sum(only_rank(:)); % adding all logical to find whether all key words include or not
if(only_rank==length(key_words)) % if have all keywords than copy that file in destination folder
display('copied one')
copyfile(current1,current2)
found=found+1;
pause(1)
end
end
end
end
end
end
end
set(sel_image_main9,'BackgroundColor',[.7 .7 .7]);
drawnow
end
function stop_data_cap(varargin)
brk_loop=1;
end
function exit_routine(varargin)
exit
end
end