#! /usr/bin/perl

# Allow stuff to be sent out immediately
BEGIN { $| = 1 }

# Get out stuff
use strict;
use Image::Magick;
use File::Basename;
use Digest::MD5 qw(md5 md5_hex md5_base64);

# Be able to easily get the hash of a file
sub md5_file {
	open FILE, @_[0] or die "can't open file @_[0]\n";
	binmode FILE;
	my $hash = Digest::MD5->new->addfile(*FILE)->hexdigest;
	close FILE;
	return $hash;
}

# Set paths
my $source_folder;
my $dest_folder;

# Get them
if (scalar @ARGV != 2) {
	print "Usage: ",__FILE__," sourcefolder destfolder\n";
	exit 1;
}
else {
	($source_folder, $dest_folder) = @ARGV;
}

# Make sure trailing slashes are there
$source_folder .= $source_folder =~ /\/$/ ? '' : '/'; 
$dest_folder .= $dest_folder =~ /\/$/ ? '' : '/'; 

# Make sure they exist
die "Source folder ($source_folder) does not exist.\n" unless -d $source_folder;
die "Destination folder ($dest_folder) does not exist.\n" unless -d $dest_folder;

# Make sure they exist in a sane way
die "Source and destination folders cannot be the same, dumbfuck.\n" if $source_folder eq $dest_folder;

# Gotta have write support
die "Destination folder is not writable\n" unless -w $dest_folder;

# Handle the list of existing former hashes
my $hash_list_file = $dest_folder.'.hashes.txt';
my %existing_hash_list;

# Create it if it doesn't exist
unless (-e $hash_list_file) {
	open HASH_FILE, '>:encoding(UTF-8)', $hash_list_file;
	close HASH_FILE;

	# Notif
	print "Completed hash list not found; empty one created.\n";
}
else {
	# It does exist; get contents
	open HASH_FILE, '<:encoding(UTF-8)', $hash_list_file;
	while (<HASH_FILE>) {
		$existing_hash_list{$1} = $2 if m/^([a-z0-9]{32})\|([^\$]+)$/;
	}
	close HASH_FILE;

	# Notif
	print "Loaded completed hash list with ",scalar(keys(%existing_hash_list))," images to potentially skip\n";
}

# Gather images in both
my @source_images = glob "$source_folder*.{jpg,png,bmp,JPG,PNG,BMP}";
my @dest_images = glob "$dest_folder*.{jpg,png,bmp,JPG,PNG,BMP}";

# Convert to basenames
$_ = basename $_ foreach @source_images;
$_ = basename $_ foreach @dest_images;

# Only look at useful files in dest
@dest_images = grep { /^\d+\.jpg$/ } @dest_images;

# Hash them and don't include duplicates
my %source_hashes;

# Get source hashes
my $i = 1;
my $skip = 0;
my $num = scalar @source_images;
print "Hashing source images..\n";
foreach (@source_images) {
	my $hash = md5_file $source_folder.$_;
	unless (defined($source_hashes{$hash}) || defined($existing_hash_list{$hash})) {
		$source_hashes{$hash} = $_;
	}
	else {
		$skip++;
	}
	print "\r$i/$num";
	$i++;
}

# See if we're skipping any
print "\nSkipping $skip verified images" if $skip > 0;

# Prepare for the iteration
my $num_work = scalar keys %source_hashes;
my $name_length = length $num_work;
my $name_iterate = 1;

# See if we should start numbering after last existing
if (scalar @dest_images > 0 && $dest_images[-1] =~ /^(\d+)\.jpg$/) {
	$name_length = length $1 if length $1 > $name_length;
	$name_iterate = $1 + 1;
}

# Actually do the conversion
print "\nConverting/Moving..\n";
my $i = 0;

# Append known hashes
my $do_hashes = 0;
if (-w $hash_list_file) {
	if (open HASH_FILE, '>>:encoding(UTF-8)', $hash_list_file) {
		$do_hashes = 1;
	}
}

# Deal with each image
while (my($hash, $current_name) = each(%source_hashes)) {

	# Decide the new name
	my $new_name = '0' x ( $name_length - length("$name_iterate")) . "$name_iterate.jpg";
	
	# Do the conversion
	my $image = Image::Magick->new(quality=>90);
	$image->Read("$source_folder$current_name");
	$image->Write("$dest_folder$new_name");
	undef $image;
	
	# These are used for next pass around
	$name_iterate++;
	$i++;

	# So we can skip this next time we run the script
	if ($do_hashes) {
		print HASH_FILE "$hash|$current_name\n";
	}

	# Status
	print "$i/$num_work\r";
}

if ($do_hashes) {
	close HASH_FILE;
}

# Success message
print "\nDone\n";