#! /usr/bin/perl # Allow stuff to be sent out immediately BEGIN { $| = 1 } # Get out stuff use strict; use Image::Magick; use File::Basename; use Digest::MD5 qw(md5 md5_hex md5_base64); # Be able to easily get the hash of a file sub md5_file { open FILE, @_[0] or die "can't open file @_[0]\n"; binmode FILE; my $hash = Digest::MD5->new->addfile(*FILE)->hexdigest; close FILE; return $hash; } # Set paths my $source_folder; my $dest_folder; # Get them if (scalar @ARGV != 2) { print "Usage: ",__FILE__," sourcefolder destfolder\n"; exit 1; } else { ($source_folder, $dest_folder) = @ARGV; } # Make sure trailing slashes are there $source_folder .= $source_folder =~ /\/$/ ? '' : '/'; $dest_folder .= $dest_folder =~ /\/$/ ? '' : '/'; # Make sure they exist die "Source folder ($source_folder) does not exist.\n" unless -d $source_folder; die "Destination folder ($dest_folder) does not exist.\n" unless -d $dest_folder; # Make sure they exist in a sane way die "Source and destination folders cannot be the same, dumbfuck.\n" if $source_folder eq $dest_folder; # Gotta have write support die "Destination folder is not writable\n" unless -w $dest_folder; # Handle the list of existing former hashes my $hash_list_file = $dest_folder.'.hashes.txt'; my %existing_hash_list; # Create it if it doesn't exist unless (-e $hash_list_file) { open HASH_FILE, '>:encoding(UTF-8)', $hash_list_file; close HASH_FILE; # Notif print "Completed hash list not found; empty one created.\n"; } else { # It does exist; get contents open HASH_FILE, '<:encoding(UTF-8)', $hash_list_file; while () { $existing_hash_list{$1} = $2 if m/^([a-z0-9]{32})\|([^\$]+)$/; } close HASH_FILE; # Notif print "Loaded completed hash list with ",scalar(keys(%existing_hash_list))," images to potentially skip\n"; } # Gather images in both my @source_images = glob "$source_folder*.{jpg,png,bmp,JPG,PNG,BMP}"; my @dest_images = glob "$dest_folder*.{jpg,png,bmp,JPG,PNG,BMP}"; # Convert to basenames $_ = basename $_ foreach @source_images; $_ = basename $_ foreach @dest_images; # Only look at useful files in dest @dest_images = grep { /^\d+\.jpg$/ } @dest_images; # Hash them and don't include duplicates my %source_hashes; # Get source hashes my $i = 1; my $skip = 0; my $num = scalar @source_images; print "Hashing source images..\n"; foreach (@source_images) { my $hash = md5_file $source_folder.$_; unless (defined($source_hashes{$hash}) || defined($existing_hash_list{$hash})) { $source_hashes{$hash} = $_; } else { $skip++; } print "\r$i/$num"; $i++; } # See if we're skipping any print "\nSkipping $skip verified images" if $skip > 0; # Prepare for the iteration my $num_work = scalar keys %source_hashes; my $name_length = length $num_work; my $name_iterate = 1; # See if we should start numbering after last existing if (scalar @dest_images > 0 && $dest_images[-1] =~ /^(\d+)\.jpg$/) { $name_length = length $1 if length $1 > $name_length; $name_iterate = $1 + 1; } # Actually do the conversion print "\nConverting/Moving..\n"; my $i = 0; # Append known hashes my $do_hashes = 0; if (-w $hash_list_file) { if (open HASH_FILE, '>>:encoding(UTF-8)', $hash_list_file) { $do_hashes = 1; } } # Deal with each image while (my($hash, $current_name) = each(%source_hashes)) { # Decide the new name my $new_name = '0' x ( $name_length - length("$name_iterate")) . "$name_iterate.jpg"; # Do the conversion my $image = Image::Magick->new(quality=>90); $image->Read("$source_folder$current_name"); $image->Write("$dest_folder$new_name"); undef $image; # These are used for next pass around $name_iterate++; $i++; # So we can skip this next time we run the script if ($do_hashes) { print HASH_FILE "$hash|$current_name\n"; } # Status print "$i/$num_work\r"; } if ($do_hashes) { close HASH_FILE; } # Success message print "\nDone\n";