image thumbnail

ARMADA Data Mining Tool version 1.4

by

 

13 Feb 2003 (Updated )

An association rule data mining tool for experimentation and analysis.

performMiningAnalysis(file_data,min_confidence,min_support,sup_type)
%-------------------------------------------------------------------------------------
%ARMADA - Association Rule Mining And Deduction Analysis
%Desciprtion:	Data Mining Tool for extraction of association rules and analysis
%					of deduction methods.
%Author& 
%Copyright: 	James Malone
%Date:			14/04/03
%Version:		1.2
%-------------------------------------------------------------------------------------

%-------------------------------------------------------------------------
%performMiningAnalysis.m
%
%Used for mining with given data set for all goals. Control module to call 
%different functions to perform mining then calls displayRules
function mine_data = performMiningAnalysis(file_data,min_confidence,min_support,sup_type)

%Start timer
tic;

%If min support is as percentage convert to number for calc.
if sup_type == 2
   no_sets = size(file_data,1);
   min_support = (no_sets/100)* min_support;
end

%Sort stored file to increase speed of mining----------------
no_sets = size(file_data,1);
max_length = size(file_data,2);
for a = 1:no_sets
   file_data(a,:) = sort(file_data(a,:));
end
%------------------------------------------------------------

%Initiate variables in case an error occurs during try statement
%because they are used later in program
candidates = 0;  
ordered_rules = [];

%Perform error check to see if file_data is empty - if it is set 
%variables and blank and report, otherwise begin mining process
if (~isempty(file_data))
	try
      fprintf('Beginning mining...\n')
      
		%Get first elements to begin comparisons - read in first line
		candidates = readFirstLine(file_data,max_length);
		%------------------------------------------------------------

		%Count instances of one set----------------------------------
		candidates = genOneCand(file_data,candidates,no_sets,max_length,min_support);
      cand_length = size(candidates,1);
      %If there are no candidates or only 1 then halt mining, as there 
      %will be no rules from only one item
      if (candidates == 0 | cand_length == 1) 
         fprintf('No rules');
         return;
      end
      %------------------------------------------------------------

		%For generating 2 item sets----------------------------------
      rules{1} = genTwoCand(file_data,candidates,cand_length,min_support);
      %Remove counts from end of array for next comparisons if there are any
      if (rules{1} ~= 0)
         new_candidates = rules{1}(:,1:2);
      else
         %Break out of loop if there are no two LHS rules because possible
         %RHS values do not need to be generated
         return;
      end   
      %------------------------------------------------------------
      
		%For generating 3 and more item sets-------------------------
   	if (max_length > 2)
   		temp_rules = genMultiCand(file_data,new_candidates,max_length,min_support);	
   		%Perform initial test to see if temp_rules has been added to, and therefore
   		%there are new rules to add and if so concatenate rules into rules variable
   		if temp_rules{1} ~= 0
            rules = cat(2,rules,temp_rules);
         end 
   	end
		%------------------------------------------------------------
		fprintf('Finished Generating Rules:\n');

		%Generate rule variants for minimum support------------------
		fprintf('Beginning generation of rule variants..\n');
		final_rules = genRuleVariants(rules,candidates,min_confidence);
		%------------------------------------------------------------
		fprintf('Mining completed.');	

		%Order rules as specified by user removing below min_confidence 
      %rules for each set of rules starting with 1LHS & format for displaying
      empty_flag = 1;
      for no = 1:size(final_rules,2) 
         if ~isempty(final_rules{no})
   			ordered_rules{no} = orderRules(final_rules{no}{1},final_rules{no}{2},final_rules{no}{3},final_rules{no}{4},min_confidence);
            if ~isempty(ordered_rules{no})
             	empty_flag = 0;  
            end
         end 
      end
      
      %If no rules have survived set ordered_rules to empty
      if empty_flag == 1
      	ordered_rules = [];   
      end
	catch
      lasterr
     	fprintf('WARNING: Error occured while mining rules\n');
      %Set ordered_rules to empty, indicating an error to later functions
      ordered_rules = [];
	end
end

%Finish timing the mining process
time = clock;
time_taken = toc

%Return the final rules data to produce mining report
mine_data{1} = ordered_rules;
mine_data{2} = candidates;
mine_data{3} = time_taken;
mine_data{4} = no_sets;

%End----------------------------------------------------------------------

Contact us