function gutenberg_download_zip
% GUTENBERG_DOWNLOAD_ZIP opens the file gutenberg_files.mat produced by
% the function gutenberg_pages (a copy of the file is included in this
% package, updated to the 7th of January 2009) and begins to download all
% zip files which have been found. After the download, a database with
% information regarding the archive is generated.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% EXAMPLE:
% gutenberg_pages
% gutenberg_download_zip
%
% OR, BETTER:
%
% load('gutenberg_files', 'file_name_zip', 'file_address_zip', 'file_n_zip');
% gutenberg_download_zip
%
% DOWNLOADING ALL THE ZIP ARCHIVE MIGHT TAKE --> SEVERAL HOURS <--
% AFTER SUCH A HUGE DOWNLOAD --> MANY GYGABYTES <-- PLEASE KEEP ONE COPY
% FOR YOU AND GIVE ONE TO A FRIEND, OR A LIBRARY, OR A SCHOOL, OR A
% PASSER-BY.
% CONTRIBUTE TO DISTRIBUTE.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% IMPORTANT: PLEASE, REMEMBER THAT GUTENBERG DVDS CAN BE DOWNLOADED FOR
% FREE FROM:
% http://www.gutenberg.org/wiki/Gutenberg:The_CD_and_DVD_Project
%
% THE DVDS DO NOT CONTAIN THE MP3 FILES
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% YOU CAN GET AN OFFLINE VERSION OF THE PROJECT GUTENBER WEB SITE:
% http://www.gutenberg.org/wiki/Gutenberg:Feeds
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% YOU CAN GET THE PROJECT GUTENBERG CATALOG DATA:
% http://www.gutenberg.org/wiki/Gutenberg:Feeds
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% This is a very rudimental way for downloading files from the Project
% Gutenberg (http://www.gutenberg.org).
% If you know of a more elegant/efficient way to obtain the same result
% and you want to share I would greatly appreciate it. If you can show me
% how to do the same (or better) in a different programming language (C,
% Java, etc.), I will greatly appreciate it.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %
% Author: Francesco Pozzi %
% E-Mail: francesco.pozzi@anu.edu.au %
% Date: 6 January 2009 %
% %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
load('gutenberg_files', 'file_name_zip', 'file_address_zip', 'file_n_zip')
% Exclude doubles:
[file_name_unique, indexes] = unique(file_name_zip);
% A subdirectory for zip files
try
mkdir('gutenberg_zip_files')
end
cd('gutenberg_zip_files')
% Start the timer
tic;
% Download all
broken_links_zip = [];
for i = 1:length(indexes)
if all(file_name_unique{i}((end-2):end) == 'zip') || all(file_name_unique{i}((end-2):end) == 'ZIP')
try
% Download
urlwrite(file_address_zip{indexes(i)}, file_name_unique{i});
catch
% Detect broken hyperlinks:
broken_links_zip = [broken_links_zip, indexes(i)];
end
end
% Please, make a pause from a download and the next
a = toc;
if a < 2 * i, pause(2 * i - a), end;
end
% After the download, generate a database containing address of file on the
% internet, date of download, size in bytes and if the file was
% successfully downloaded.
fid = fopen('gutenberg_files_zip.txt', 'w');
fprintf(fid, 'Address');
fprintf(fid, '\t');
fprintf(fid, 'Date');
fprintf(fid, '\t');
fprintf(fid, 'Bytes');
fprintf(fid, '\t');
fprintf(fid, 'Double or broken link');
fprintf(fid, '\n');
for i = 1:file_n_zip
fprintf(fid, file_address_zip{i});
fprintf(fid, '\t');
if (~isempty(find(indexes == i)) & isempty(find(broken_links_zip == i)))
temp1 = dir(file_name_zip{i});
fprintf(fid, temp1.date);
fprintf(fid, '\t');
fprintf(fid, num2str(temp1.bytes));
fprintf(fid, '\t');
fprintf(fid, '');
fprintf(fid, '\n');
else
fprintf(fid, '');
fprintf(fid, '\t');
fprintf(fid, '');
fprintf(fid, '\t');
fprintf(fid, 'Yes');
fprintf(fid, '\n');
end
end
fclose(fid);