#!/usr/bin/perl
# Filename:	AmazonCovers
# Author:	David Ljung Madison <DaveSource.com>
# See License:	http://MarginalHacks.com/License/
# Description:	Gets amazon covers for a hierarchy of artist/album/
use strict;

##################################################
# Setup the variables
##################################################
my $PROGNAME = $0; $PROGNAME =~ s|.*/||;
my ($BASENAME,$PROGNAME) = ($0 =~ m|(.*)/(.+)|) ? ($1?$1:'/',$2) : ('.',$0);

# Pick one:
#my $FETCH = "GET -H 'user-agent: Mozilla/5.0'";
my $FETCH = "lynx -source";

##################################################
# Signals
##################################################
sub all_done { exit; }
$SIG{INT} = \&all_done; $SIG{TERM} = \&all_done;
$SIG{HUP} = \&all_done; $SIG{QUIT} = \&all_done;
$SIG{EXIT} = \&all_done; $SIG{__DIE__} = \&all_done;

##################################################
# Usage
##################################################
sub fatal {
  foreach my $msg (@_) { print STDERR "[$PROGNAME] ERROR:  $msg\n"; }
  exit(-1);
}

sub usage {
  foreach my $msg (@_) { print STDERR "ERROR:  $msg\n"; }
  print STDERR "\n";
  print STDERR "Usage:\t$PROGNAME [-d] <file>\n";
  print STDERR "\tDoes something to the given file\n";
  print STDERR "\t-d\tSet debug mode\n";
  print STDERR "\n";
  exit -1;
}

sub parse_args {
  my %opt;
  while (my $arg=shift(@ARGV)) {
    if ($arg =~ /^-h$/) { usage(); }
    if ($arg =~ /^-d$/) { $MAIN::DEBUG=1; next; }
    if ($arg =~ /^-D$/) { $MAIN::DEBUG=2; next; }
    if ($arg =~ /^-/) { usage("Unknown option: $arg"); }
    if ($opt{album}) {
      usage("Too many paths specified [$arg and $opt{path}]") if $opt{path};
    } elsif ($opt{artist}) {
      $opt{album} = $arg;
    } elsif ($opt{path}) {
      $opt{artist} = $arg;
      ($opt{artist},$opt{album}) = ($1,$2)
        if $opt{artist} =~ m|^([^/]+)/([^/]+)/?$|;
    } else {
      $opt{path}=$arg;
    }
  }

  $opt{path} = '.' unless $opt{path};
  #usage("No path defined") unless $opt{path};

  \%opt;
}

sub debug {
  return unless $MAIN::DEBUG;
  foreach my $msg (@_) { print STDERR "[$PROGNAME] $msg\n"; }
}

##################################################
# Amazon code
##################################################
my $UA;

#use HTTP::Request;
#use HTTP::Request::Common;
#use LWP::UserAgent;
#sub agent {
#  $UA = LWP::UserAgent->new unless $UA;
#  $UA;
#}
#  # LWP technique from music search - doesn't seem to work?
#  my $base = 'http://www.amazon.com';
#  my $action = '/exec/obidos/search-handle-form/ref=s_sf_pm/102-3054195-5872102';
#  my $resp = $ua->request(POST "$base$action",
#    [
#    'page'			=> 1,
#    'index'			=> 'music',
#    'field-artist'		=> $artist,
#    'field-title'			=> $album,
#    'field-label'		=> '',
#    'field-binding'		=> '',
#    'field-is-available-used'	=> 1,
#    ]);


use Net::Amazon;
use Net::Amazon::Request::Artist;
sub agent {
  $UA = Net::Amazon->new(
    token     => '1F2GP1QDE4HKE8SYMT82',
    #max_pages => 1,
    max_pages => 20,	# 10/page, we want to cover >100 matches
    # This won't be enough for a few cases, such as, "Prince, Purple Rain"
    # (Though it's actually [Music from the Motion Picture "Purple Rain"])
    # It would be neat if we could ask for extra pages after looking at a few.
  ) unless $UA;
  $UA;
}

sub artist_info {
  my ($artist) = @_;

  my $ua = agent();

  my $req = Net::Amazon::Request::Artist->new(artist => $artist);
  my $resp = $ua->request($req);

  unless ($resp->is_success()) {
    print "Error looking up [$artist] ", $resp->message(),"\n";
    return;
  }

  my %info;
  foreach my $prop ( $resp->properties ) {
    my $album = $prop->ProductName();
    my $url = $prop->ImageUrlLarge();

    # Unfortunately we might have non-unique ProductNames,
    # For example, Mighty Mighty Bosstones, "Let's Face It [Explicit Lyrics]"
    # has the "[Explicit Lyrics]" stripped from the product name.
    # Unfortunately this case means we keep the URL that is bad..  :(
    my $count = 1;
    my $orig = $album;
    while ($info{$album}) { $album = $orig." ".$count++; }

    debug("Found: $artist - $album");
    $info{$album} = $url;
  }
  \%info;
}

# What about:  "Pinch Me [Import CD] [CD-SINGLE]" vs "Pinch Me CD Single"???
# Also consider stripping artist name from album if there's anything left
#   (example: Air Supply, Greatest Hits)
sub simplify {
  my ($str,$pass) = @_;

  # Parens
  #$str =~ s/\[import\]/import/ig;# if $pass<2;	# Don't use imports just yet..
  $str =~ s/\([^\)]+\)//g if $pass>1;
  $str =~ s/\[[^\]]+\]//g if $pass>1;
  # Punctuation
  $str =~ s/[\"\'\.\:\!\?]//g;

  # Simple words
  $str =~ s/the//ig if $pass>1;
  $str =~ s/\&/and/g;

  # Ending crap
  $str =~ s/volume/vol/i;
  $str =~ s/dis[ck] .*//i if $pass>2;
  $str =~ s/dis[ck] \d+//i if $pass>1;
  $str =~ s/(live|remix)//i if $pass>1;

  # Rip it up!
  $str =~ s/[^a-zA-Z0-9]//g if $pass>1;
  $str =~ s/[0-9]//g if $pass>2;
  $str =~ s/\s//g;
  lc($str);
}

# Generally amazon uses jpgs, and the error image is a gif
sub good_image {
  my ($url,$img) = @_;

  return 1 if -s $img && $url !~ /\.jpg$/;

  if (-s $img && open(IMG, $img)) {
    my $head;
    if (sysread(IMG,$head,5)==5) {
      close IMG;
      return 1 if $head ne 'GIF89';
    }
    close IMG;
  }

  # Remove the image
  debug("Bad image: [$img] from [$url]");
  unlink $img;
  return 0;
}

my %TRIED;
sub fetch {
  my ($artist,$album,$url,$to) = @_;

  return 0 unless $url;

  return 0 if $TRIED{$url}++;

  # Try to fetch
  system("$FETCH \Q$url\E > \Q$to\E");

  # Sometimes an error is because we don't have the large image.
  # We can usually get the small URL from the large.
  # Large: http://images.amazon.com/images/P/B000002B46.01.LZZZZZZZ.jpg
  # Small: http://images.amazon.com/images/P/B000002B46.01._SCMZZZZZZZ_.jpg
  unless (good_image($url,$to)) {
    $url =~ s/LZZZZZZZ/_SCMZZZZZZZ_/g;
    system("$FETCH \Q$url\E > \Q$to\E");

    return 0 unless good_image($url,$to);
  }

  print "$artist :: $album\n";
  print "  -> $url\n" if $MAIN::DEBUG>1;
  return 1;
}

sub geturl {
  my ($artist,$album,$info,$cover) = @_;

  my $Album = simplify($album);

  # First pass, look for match without ignoring parenthesis
  print "FIRST PASS\n" if $MAIN::DEBUG>1;
  foreach my $alb ( keys %$info ) {
    print "COMP: $Album eq ",simplify($alb),"\n" if $MAIN::DEBUG>1;
    return if $Album eq simplify($alb)
      && fetch($artist,$album,$info->{$alb},$cover);
  }

  # Second pass, strip parens and more..
  $Album = simplify($album,2);
  print "SECOND PASS\n" if $MAIN::DEBUG>1;
  foreach my $alb ( keys %$info ) {
    print "COMP: $Album eq ",simplify($alb,2),"\n" if $MAIN::DEBUG>1;
    return if $Album eq simplify($alb,2)
      && fetch($artist,$album,$info->{$alb},$cover);
  }

  # Third pass, find something!
  $Album = simplify($album,3);
  print "THIRD PASS\n" if $MAIN::DEBUG>1;
  foreach my $alb ( keys %$info ) {
    print "COMP: $Album eq ",simplify($alb,3),"\n" if $MAIN::DEBUG>1;
    return if $Album eq simplify($alb,3)
      && fetch($artist,$album,$info->{$alb},$cover);
  }

  print "NOT FOUND: $artist :: $album\n";
  return;
}

##################################################
# Main code
##################################################
sub getdir {
  my ($dir) = @_;
  opendir(DIR,$dir) || fatal("Couldn't read directory or empty [$dir]");
  my @dir = grep(!/^\.{1,2}$/ && -d "$dir/$_",readdir(DIR));
  closedir(DIR);
  fatal("Directory was empty [$dir]") unless @dir;
  @dir;
}

sub main {
  my $opt = parse_args();

  my $path = $opt->{path};

  foreach my $artist ( ($opt->{artist}) || getdir($path) ) {
    my $artist_path = $artist;

    $artist =~ s/_/ /g;
    $artist =~ s/ s$/'s/g;	# Hack: Loss of ['s]

    my $info = -1;
    foreach my $album ( ($opt->{album}) || getdir("$path/$artist_path") ) {
      my $cover = "$path/$artist_path/$album/Cover.jpg";
      debug("Found cover for $artist :: $album") if -f $cover;
      next if -f $cover;

      $album =~ s/^\d{4}-//;	# I store album as "year-album"
      $album =~ s/_/ /g;
      $info = artist_info($artist) if $info==-1;
      geturl($artist,$album,$info,$cover);
    }
  }
}
main();
