MATLAB Examples

Contents

Create Simple Deep Leaning Network for Classification

and Generate Code to Run Algorithms on Raspberry Pi

This example shows how to create and train a simple convolutional neural network for defect detection, and how to generate code to implement pre/post image processing algorithms and convolutional neural network into Raspberry Pi.

This example demonstrates how to: #1. Load and explore image data. #2. Define the network architecture #3. Train the network #4. Confirm if trained network works well for new data #5. Walk through whole algorithm that consist of pre-processing, CNN and post-processing. #6. Generate C++ code for whole algorithm to test it on host machine #7. Generate C++ code for ARM target(Raspberry Pi) #8. Build and run the exe on the Raspberry Pi

clear; close all; imtool close all; clc;rng('default')

Load the sample data as an image datastore

categ = {'ok', 'ng'};
imds = imageDatastore(fullfile(pwd, 'images', categ), 'IncludeSubfolders',1,'LabelSource', 'foldernames')
countEachLabel(imds)
imds = 

  ImageDatastore with properties:

                       Files: {
                              '/home/keitaro/work/nutsDet/FileExchange/Submit/images/ok/frame1.png';
                              '/home/keitaro/work/nutsDet/FileExchange/Submit/images/ok/frame10.png';
                              '/home/keitaro/work/nutsDet/FileExchange/Submit/images/ok/frame100.png'
                               ... and 779 more
                              }
                      Labels: [ok; ok; ok ... and 779 more categorical]
    AlternateFileSystemRoots: {}
                    ReadSize: 1
                     ReadFcn: @readDatastoreImage


ans =

  2×2 table

    Label    Count
    _____    _____

     ng       509 
     ok       273 

Display some of the images of defect product(hexagon nut) in the datastore

These nuts has scratch or scoring on a surface

figure;
idx = find(imds.Labels == 'ng');
perm = randperm(size(idx,1),20);
for i = 1:20
    subplot(4,5,i);
    imshow(readimage(imds, idx(perm(i))));
end

Specify Training and Validation Sets

Separate the sets into training and validation data. Pick 90% of images from each set for the training data and the remainder, 10%, for the validation data

[trainingSet, validationSet] = splitEachLabel(imds, 0.9, 'randomize');

Define Network Architecture

layers = [imageInputLayer([128 128 1])
    convolution2dLayer(5,20)
	reluLayer()
	maxPooling2dLayer(2,'Stride',2)
    crossChannelNormalizationLayer(5,'K',1);
    convolution2dLayer(5,20)
	reluLayer()
	maxPooling2dLayer(2,'Stride',2)
    fullyConnectedLayer(512)
   	fullyConnectedLayer(2)
	softmaxLayer()
	classificationLayer()];

Data Augmentation

augmenter = imageDataAugmenter('RandXReflection',true,'RandYReflection',true,...
    'RandRotation', [-180 180]);
datasource = augmentedImageSource([128 128 1],trainingSet,'DataAugmentation',augmenter);

Specify Training Options

options = trainingOptions('sgdm', ...
    'MaxEpochs',1000, ...
    'InitialLearnRate',0.001, ...
    'ValidationData',validationSet, ...
    'ValidationFrequency',30, ...
    'ValidationPatience',Inf, ...
    'Verbose',false, ...
    'Plots','training-progress');

Train Network Using Training Data

trainnet = true;
if trainnet
    convnet = trainNetwork(datasource,layers,options);
else
    % Load pre-trained network
    load('NDNet3.mat')
end

Predict the labels of the validation data using trained network

predictedLabels = classify(convnet, validationSet);
valLabels = validationSet.Labels;

accuracy = sum(predictedLabels == valLabels)/numel(valLabels)
accuracy =

    0.9744

Walk through whole algorithm that consist of pre-processing,

CNN and post-processing.

Read new image captured by webCam The images used to train network were pre-processed image to fit it in input image size of network. Original image captured by webCam is like this;

img = imread('testImg.png');
figure, imshow(img)

% So, we need to extract where the nuts are as an ROI before passing image
% to the network.

Pre-processing for captured image from webCam

myNDNet_Preprocess is to extract ROI by using traditional image processing. Since generated C code from this m code will be integrated into main.cpp that uses OpenCV format, some format conversion need to be considerd.

wi = uint32(320);
he = uint32(240);
ch = uint32(3);

img = mat2ocv(img);
[Iori, imgPacked, num, bbox] = myNDNet_Preprocess(img);

Display pre-processed image(extracted nuts)

Row-major to Column-major conversion to show the image on MATLAB

imgPacked2 = zeros([128,128,4],'uint8');
for c = 1:4
    for i = 1:128
        for j = 1:128
            imgPacked2(i,j,c) = imgPacked((i-1)*128 + (j-1) + (c-1)*128*128 + 1);
        end
    end
end

% Display output images
figure,
for i = 1:num
    subplot(1,num,i)
    imshow(imgPacked2(:,:,i))
end

Classify detected nuts by using pretrained network

scores = zeros(2,4);
for i = 1:num
    scores(:,i) = predict(convnet, imgPacked2(:,:,i));
end
scores
scores =

    0.0000    1.0000         0         0
    1.0000    0.0000         0         0

Insert the labels in an image as an post-processing

Iori = reshape(Iori, [1, he*wi*ch]);
bbox = reshape(bbox, [1,16]);
scores = reshape(scores, [1, 8]);

out = myNDNet_Postprocess(Iori, num, bbox, scores, wi, he, ch);

Display post-processed image

sz = [he wi ch];
out = ocv2mat(out,sz);

figure, imshow(out)

% The hexagon nut on the right is defective. We can see that whole
% algorithm works well on this result.

Codegen for pre-processing, CNN and post-processing

Now we are ready to generate C code. Before targetting Raspberry Pi, generate CUDA C to confirm if generated code works well on the host machine.

armtarget = false;
codegenAll(convnet, wi, he, ch, armtarget)

Build and Run on Host machine

running at 25FPS on GPU enabled laptop(GeForce 930M)

system(['make ','cudnn']);
system(['./nutsDet_exe ','3 ','testImg.png']);
nvcc -o nutsDet_exe main_nutsDet.cpp \
         -Icodegen codegen/cnnbuild.a \
         -Icodegen codegen/lib/myNDNet_Preprocess/myNDNet_Preprocess.a \
         -Icodegen codegen/lib/myNDNet_Postprocess/myNDNet_Postprocess.a \
         -lcublas \
         -I./codegen/lib/myNDNet_Preprocess \
         -I./codegen/lib/myNDNet_Postprocess \
         -I"/usr/local/cudnn7/include" -L"/usr/local/cudnn7/lib64" -lcudnn \
         -I"/extern/include" \
         -I"/usr/local/include" -L"/usr/local/lib" -lopencv_imgproc -lopencv_core -lopencv_highgui -lopencv_video -lopencv_videoio -lopencv_objdetect -lopencv_imgcodecs  \
         -Wno-deprecated-gpu-targets
init done 
opengl support available 

Desplay output image

outImg = imread('outImg.png');
figure, imshow(outImg)

Generate Code for ARM Targets

Code generation for ARM processors using ARM Compute Library is done on host machine, but the build and execution is performed on the target platform by copying all the generated files to the platform

armtarget = true;
codegenAll(convnet, wi, he, ch, armtarget)
### Codegen Successfully Generated for arm device
Code generation successful: To view the report, open('codegen/lib/myNDNet_Preprocess/html/report.mldatx').
Code generation successful: To view the report, open('codegen/lib/myNDNet_Postprocess/html/report.mldatx').

Build the lib on Raspberry Pi3

Move the codegen folder and all the desired files from the host machine to the target platform. system('sshpass -p [password] scp (sourcefile) [username]@[hostname]:~/');

cpfiles = false

if cpfiles

hostinfo = 'kotsuka@172.18.91.247';
password = 'password';
basedir  = '/home/kotsuka/demos/nutsDet';
copyfiles(hostinfo, password, basedir)

end

% You may needo set the ARM_COMPUTELIB environment variable on the target platform
% pointing to the armcompute library install path correctly.
cpfiles =

  logical

   0

Build and run the exe on Raspberry Pi3

% make -C [basedir] arm_neon

% Run the executable with an input image file.

% ./nutsDet_exe, 3 testImg.png

% Or if you have webCam connected to Raspberry Pi, you can use captured
% image from webCam by using follwing command.

% ./nutsDet_exe 1 1

Supporting Functions

function codegenAll(convnet, wi, he, ch, armtarget)

    if armtarget
        cnncodegen(convnet,'targetlib','arm-compute');
    else
        cnncodegen(convnet,'targetlib','cudnn');
    end

    cfg = coder.config('lib','ecoder',true);
    cfg.GenerateCodeMetricsReport = true;
    cfg.GenerateReport = true;
    cfg.ReportPotentialDifferences = false;
    cfg.TargetLang = 'C++';
    cfg.EnableOpenMP = false;
    if armtarget
        cfg.GenCodeOnly = true;
    end
    cfg.TargetLangStandard = 'C++03 (ISO)';
    cfg.HardwareImplementation.ProdHWDeviceType = 'ARM Compatible->ARM Cortex';
    cfg.HardwareImplementation.TargetHWDeviceType = 'ARM Compatible->ARM Cortex';

    ARGS = cell(1,1);
    ARGS{1} = cell(1,1);
    ARGS{1}{1} = coder.typeof(uint8(0),[240, 320, 3]);
    %ARGS{1}{1} = coder.Constant(img);

    % Invoke MATLAB Coder.
    codegen -config cfg myNDNet_Preprocess -args ARGS{1} -nargout 4

    ARGS{2} = cell(7,1);
    ARGS{2}{1} = coder.typeof(uint8(0),[1 230400]);
    ARGS{2}{2} = coder.typeof(0);
    ARGS{2}{3} = coder.typeof(0,[1 16]);
    ARGS{2}{4} = coder.typeof(0,[1 8]);
    ARGS{2}{5} = coder.Constant(wi);
    ARGS{2}{6} = coder.Constant(he);
    ARGS{2}{7} = coder.Constant(ch);

    % Invoke MATLAB Coder.
    codegen -config cfg myNDNet_Postprocess -args ARGS{2} -nargout 1
end

function copyfiles(hostinfo, password, basedir)

    % Move the codegen folder and all the desired files from the host development computer
    % to the target platform using scp command of the format
    system(['sshpass -p ' password ' scp main_nutsDet_arm.cpp ' hostinfo ':' basedir]);
    system(['sshpass -p ' password ' scp testImg.png ' hostinfo ':' basedir]);
    system(['sshpass -p ' password ' scp Makefile ' hostinfo ':' basedir]);
    system(['sshpass -p ' password ' scp -r codegen ' hostinfo ':' basedir]);

    % Build the lib on target platform using ssh command
    system(['sshpass -p ' password ' ssh ' hostinfo ' "make -C ' basedir '/codegen/lib/myNDNet_Preprocess -f myNDNet_Preprocess_rtw.mk"']);
    system(['sshpass -p ' password ' ssh ' hostinfo ' "make -C ' basedir '/codegen/lib/myNDNet_Preprocess -f myNDNet_Postprocess_rtw.mk"']);
    system(['sshpass -p ' password ' ssh ' hostinfo ' "make -C ' basedir '/codegen -f cnnbuild_rtw.mk"']);

end
nvcc -c  -rdc=true  -Xcompiler -fPIC -Xcudafe "--diag_suppress=unsigned_compare_with_zero" -O0 -g -G -arch sm_35  -I"/home/keitaro/work/nutsDet/FileExchange/Submit/codegen" -I"/usr/local/cudnn7/include" -o "cnn_api.o" "cnn_api.cpp"
nvcc -c  -rdc=true  -Xcompiler -fPIC -Xcudafe "--diag_suppress=unsigned_compare_with_zero" -O0 -g -G -arch sm_35  -I"/home/keitaro/work/nutsDet/FileExchange/Submit/codegen" -I"/usr/local/cudnn7/include" -o "MWCNNLayerImpl.o" "MWCNNLayerImpl.cu"
nvcc -c  -rdc=true  -Xcompiler -fPIC -Xcudafe "--diag_suppress=unsigned_compare_with_zero" -O0 -g -G -arch sm_35  -I"/home/keitaro/work/nutsDet/FileExchange/Submit/codegen" -I"/usr/local/cudnn7/include" -o "MWTargetNetworkImpl.o" "MWTargetNetworkImpl.cu"
nvcc -c  -rdc=true  -Xcompiler -fPIC -Xcudafe "--diag_suppress=unsigned_compare_with_zero" -O0 -g -G -arch sm_35  -I"/home/keitaro/work/nutsDet/FileExchange/Submit/codegen" -I"/usr/local/cudnn7/include" -o "cnn_exec.o" "cnn_exec.cpp"
nvcc -lib -Xlinker -rpath,"/bin/glnxa64",-L"/bin/glnxa64" -lc -Xnvlink -w -Wno-deprecated-gpu-targets -g -G -arch sm_35  -o cnnbuild.a cnn_api.o MWCNNLayerImpl.o MWTargetNetworkImpl.o cnn_exec.o -L".." "/usr/local/cudnn7/lib64/libcudnn.so" -lcublas -lcudart -lcusolver 
### Created: cnnbuild.a
### Successfully generated all binary outputs.
Code generation successful: To view the report, open('codegen/lib/myNDNet_Preprocess/html/report.mldatx').
Code generation successful: To view the report, open('codegen/lib/myNDNet_Postprocess/html/report.mldatx').