Code covered by the BSD License  

Highlights from
OpenCL Toolbox v0.17

OpenCL Toolbox v0.17

by

 

20 Jan 2011 (Updated )

OpenCL Toolbox for MATLAB

clkernel
% clkernel is a class that represents an OpenCL kernel object.
% It is used to create a function out of the CL kernels that were compiled
% and sent to the GPGPU device using the opencl.addfile and opencl.build
% functions. 
% 
% An example usage:
%
%   ocl = opencl();
%   ocl.initialize();
%
%   ocl.addfile('cl/simple_add.cl');
%   ocl.build();
%
% % Create some data objects:
%
%   x = clobject(single(1:10));
%   y = clobject(single(11:20));
%   z = clobject(zeros(1,10, 'single'));
%
%   % z = x+y:
%   global_work_size = [10,0,0];
%   local_work_size = [10,0,0];
%
%   addkernel = clkernel('add', global_work_size, local_work_size);
%   addkernel(x,y,z, uint32(10));
%   
% % Fetch z values:
%   values = z.get();
%
% See clkernel/clkernel 
%     clkernel/subsref
%     clkernel/execute
%
% Author:Radford Ray Juang
%

% Copyright (C) 2011 by Radford Ray Juang
%
% Permission is hereby granted, free of charge, to any person obtaining a copy
% of this software and associated documentation files (the "Software"), to deal
% in the Software without restriction, including without limitation the rights
% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
% copies of the Software, and to permit persons to whom the Software is
% furnished to do so, subject to the following conditions:
% 
% The above copyright notice and this permission notice shall be included in
% all copies or substantial portions of the Software.
% 
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
% THE SOFTWARE.
classdef clkernel < handle
    properties 
        device = 1
        id = [];        
    end
    
    methods 
        function self = clkernel(kernelname, global_dim, local_dim, target_device)
        % obj = clkernel(kernel_name)
        % obj = clkernel(kernel_name, global_work_size)
        % obj = clkernel(kernel_name, global_work_size, local_work_size)
        % obj = clkernel(kernel_name, global_work_size, local_work_size,
        %       target_device)
        %
        % Creates a kernel object that represents the compiled kernel
        % specified by kernel_name. This is the actual __kernel function
        % defined in the cl source files added using opencl.addfile and
        % opencl.build.
        %
        % kernel_name is a string containing the kernel function name in the
        % CL file.
        % 
        % global_work_size is the number of global compute units you want to
        % use. This is a 1x3 array containing the number of compute units in
        % each direction. For example, if you have 256 processors, you can
        % have the processors divide up the workload into 4x4x16 blocks:
        %    global_work_size = [4,4,16]
        % Or, you can divide the data into 16x16 blocks:        %
        %    global_work_size = [16,16,0]
        % Or you can just divide the data into 256 blocks:
        %    global_work_size = [256,0,0]
        %
        % If this is unspecified, the default is to divide into 128 blocks
        %
        % local_work_size specifies the number of local work groups to 
        % divide each global compute unit into. Think of this as like threads
        % on a processor. The global_work_size specifies the number of
        % processors to spread the division of labor over, whereas the
        % local_work_size specifies the number of threads to divide the
        % work for each processor. Threads can communicate and share memory 
        % with one another where as global compute blocks cannot, and it is
        % possible for multiple threads within each global compute unit to
        % execute at a time.
        %
        % Again, if this is unspecified, the default is 128.
        %
        % target_device is the index of the device to execute the kernel on.
        % If you initialized one device, you can safely ignore this parameter. 
        % However, if you've initialized more than one device, e.g.:
        %   ocl=opencl();
        %   ocl.initialize(1,[1,3,5]);
        %
        % then target_device=1 will execute the kernel on device 1
        %      target_device=2 will execute the kernel on device 3
        %      target_device=3 will execute the kernel on device 5
        %
        % If unspecified, the first device index is used. 
        % NOTE: Use of multiple target devices has not been tested.
        %
        % Once a kernel has been created with say:
        %   addkernel = clkernel('add', global_work_size, local_work_size);
        %
        % One can just execute the kernel by using the defined kernel as a
        % function. For example,
        %
        %    addkernel(buffA, buffB, buffC, uint32(10));
        %
        % Constants must be casted to the correct type that the kernel
        % requires. Non-constant variables must be clbuffer or clobject
        % instances. 
        %
        % NOTE: kernel execution is non-blocking. So, the function will 
        % return regardless of if kernel execution is completed.
        %
            if nargin < 2,
                global_dim = [];                
            end

            if nargin < 3,
                local_dim = [];
            end

            if nargin < 4,
                target_device = [];
            end

            if isempty(target_device),
                target_device = 1;
            end
           
            % Automatically pick a size (this is a bad idea in general)
            if isempty(global_dim),
                global_dim = [128, 0,0];
            end
            if isempty(local_dim),
                local_dim = [128,0,0];
            end

            self.device = target_device;               
            self.id = openclcmd('create_kernel', uint32(local_dim), uint32(global_dim), kernelname);
        end

        function value = subsref(self, S)
            % Overrides matlab ( ) functionality and passes the call to the
            % execute function. For example, if a kernel is created as
            % follows:
            %    f = clkernel(kernelname, global_dims, local_dims);
            %
            % Place the execution of the kernel on the device queue by :
            %    f(arg1, arg2, arg3);
            % 
            % And to ensure the execution is complete, make sure you call
            % opencl.wait. Example:
            %
            %   ocl = opencl();
            %   ocl.initialize(1,1);
            %
            %      ... 
            %
            %   ocl.wait();
            %
            index = S(1);
            if strcmp(index.type, '.'),
                % Function call or variable name?
                if ismethod(self, index.subs), 
                    % Function call
                    if numel(S) > 1 && strcmp(S(2).type, '()'),
                        args = S(2);
                        value = feval(index.subs, self, args.subs{:});
                        S(1:2) = [];
                    else
                        value = feval(index.subs, self);
                        S(1) = [];                        
                    end
                else
                    % Variable
                    S(1) = [];
                    value = self.(index.subs);
                end
            else
                self.execute(index.subs{:});
                value = [];
                S = S(2:end);
            end
            
            if ~isempty(S),
                value = subsref(value, S);
            end
        end
        
        function execute(self, varargin)            
            % obj.execute(arg1, arg2, ...)
            %
            % Place the execution of the kernel on the device queue
            % with the provided arguments.
            % arg1, ... are arguments to the kernel. Constant arguments must
            % be cast to the correct variable type before being passed.
            %
            % Non-constant arguments must be of type clbuffer or clobject
            %
             for i=1:numel(varargin) 
                argnum = i-1;
                argval = varargin{i};

                % Is the argument a clbuffer?
                S = whos('argval');
                
                kernelid = self.id;
                bufferid = -1;
                data = [];
                nbytes = 0;
                
                if strcmp(S.class, 'clbuffer'),
                    % It can be a buffer with actual data or buffer that is
                    % empty.
                    %                    
                    bufferid = argval.id;
                    if bufferid < 0,
                        %Local variable type:                        
                        nbytes = argval.num_bytes;
                    end
                elseif strcmp(S.class, 'clobject')
                    bufferid = argval.buffer.id;
                    if bufferid < 0,
                        %Local variable type:                        
                        nbytes = argval.num_bytes;
                    end                    
                elseif strcmp(S.class, 'double') || ...
                       strcmp(S.class, 'single') || ...
                       strcmp(S.class, 'char') || ...
                       strcmp(S.class, 'uint8') || ...
                       strcmp(S.class, 'uint16') || ...
                       strcmp(S.class, 'uint32') || ...
                       strcmp(S.class, 'uint64') || ...
                       strcmp(S.class, 'int8') || ...
                       strcmp(S.class, 'int16') || ...
                       strcmp(S.class, 'int32') || ...
                       strcmp(S.class, 'logical')
                    data = argval;
                else
                    error('Invalid type');
                end 
                
                openclcmd('set_kernel_args', kernelid, argnum, bufferid, data, int32(nbytes));
                %fprintf(1, 'set_kernel_args: kernelid = %d, argnum = %d, buffer=%d, data=%g, sz=%d\n', ...
                %    kernelid, argnum, bufferid, data, nbytes);
            end % for i
            
            openclcmd('execute_kernel', self.device-1, self.id);
        end        
    end
end

Contact us