#!/usr/bin/perl # Filename: AmazonCovers # Author: David Ljung Madison # See License: http://MarginalHacks.com/License/ # Description: Gets amazon covers for a hierarchy of artist/album/ use strict; ################################################## # Setup the variables ################################################## my $PROGNAME = $0; $PROGNAME =~ s|.*/||; my ($BASENAME,$PROGNAME) = ($0 =~ m|(.*)/(.+)|) ? ($1?$1:'/',$2) : ('.',$0); # Pick one: #my $FETCH = "GET -H 'user-agent: Mozilla/5.0'"; my $FETCH = "lynx -source"; ################################################## # Signals ################################################## sub all_done { exit; } $SIG{INT} = \&all_done; $SIG{TERM} = \&all_done; $SIG{HUP} = \&all_done; $SIG{QUIT} = \&all_done; $SIG{EXIT} = \&all_done; $SIG{__DIE__} = \&all_done; ################################################## # Usage ################################################## sub fatal { foreach my $msg (@_) { print STDERR "[$PROGNAME] ERROR: $msg\n"; } exit(-1); } sub usage { foreach my $msg (@_) { print STDERR "ERROR: $msg\n"; } print STDERR "\n"; print STDERR "Usage:\t$PROGNAME [-d] \n"; print STDERR "\tDoes something to the given file\n"; print STDERR "\t-d\tSet debug mode\n"; print STDERR "\n"; exit -1; } sub parse_args { my %opt; while (my $arg=shift(@ARGV)) { if ($arg =~ /^-h$/) { usage(); } if ($arg =~ /^-d$/) { $MAIN::DEBUG=1; next; } if ($arg =~ /^-D$/) { $MAIN::DEBUG=2; next; } if ($arg =~ /^-/) { usage("Unknown option: $arg"); } if ($opt{album}) { usage("Too many paths specified [$arg and $opt{path}]") if $opt{path}; } elsif ($opt{artist}) { $opt{album} = $arg; } elsif ($opt{path}) { $opt{artist} = $arg; ($opt{artist},$opt{album}) = ($1,$2) if $opt{artist} =~ m|^([^/]+)/([^/]+)/?$|; } else { $opt{path}=$arg; } } $opt{path} = '.' unless $opt{path}; #usage("No path defined") unless $opt{path}; \%opt; } sub debug { return unless $MAIN::DEBUG; foreach my $msg (@_) { print STDERR "[$PROGNAME] $msg\n"; } } ################################################## # Amazon code ################################################## my $UA; #use HTTP::Request; #use HTTP::Request::Common; #use LWP::UserAgent; #sub agent { # $UA = LWP::UserAgent->new unless $UA; # $UA; #} # # LWP technique from music search - doesn't seem to work? # my $base = 'http://www.amazon.com'; # my $action = '/exec/obidos/search-handle-form/ref=s_sf_pm/102-3054195-5872102'; # my $resp = $ua->request(POST "$base$action", # [ # 'page' => 1, # 'index' => 'music', # 'field-artist' => $artist, # 'field-title' => $album, # 'field-label' => '', # 'field-binding' => '', # 'field-is-available-used' => 1, # ]); use Net::Amazon; use Net::Amazon::Request::Artist; sub agent { $UA = Net::Amazon->new( token => '1F2GP1QDE4HKE8SYMT82', #max_pages => 1, max_pages => 20, # 10/page, we want to cover >100 matches # This won't be enough for a few cases, such as, "Prince, Purple Rain" # (Though it's actually [Music from the Motion Picture "Purple Rain"]) # It would be neat if we could ask for extra pages after looking at a few. ) unless $UA; $UA; } sub artist_info { my ($artist) = @_; my $ua = agent(); my $req = Net::Amazon::Request::Artist->new(artist => $artist); my $resp = $ua->request($req); unless ($resp->is_success()) { print "Error looking up [$artist] ", $resp->message(),"\n"; return; } my %info; foreach my $prop ( $resp->properties ) { my $album = $prop->ProductName(); my $url = $prop->ImageUrlLarge(); # Unfortunately we might have non-unique ProductNames, # For example, Mighty Mighty Bosstones, "Let's Face It [Explicit Lyrics]" # has the "[Explicit Lyrics]" stripped from the product name. # Unfortunately this case means we keep the URL that is bad.. :( my $count = 1; my $orig = $album; while ($info{$album}) { $album = $orig." ".$count++; } debug("Found: $artist - $album"); $info{$album} = $url; } \%info; } # What about: "Pinch Me [Import CD] [CD-SINGLE]" vs "Pinch Me CD Single"??? # Also consider stripping artist name from album if there's anything left # (example: Air Supply, Greatest Hits) sub simplify { my ($str,$pass) = @_; # Parens #$str =~ s/\[import\]/import/ig;# if $pass<2; # Don't use imports just yet.. $str =~ s/\([^\)]+\)//g if $pass>1; $str =~ s/\[[^\]]+\]//g if $pass>1; # Punctuation $str =~ s/[\"\'\.\:\!\?]//g; # Simple words $str =~ s/the//ig if $pass>1; $str =~ s/\&/and/g; # Ending crap $str =~ s/volume/vol/i; $str =~ s/dis[ck] .*//i if $pass>2; $str =~ s/dis[ck] \d+//i if $pass>1; $str =~ s/(live|remix)//i if $pass>1; # Rip it up! $str =~ s/[^a-zA-Z0-9]//g if $pass>1; $str =~ s/[0-9]//g if $pass>2; $str =~ s/\s//g; lc($str); } # Generally amazon uses jpgs, and the error image is a gif sub good_image { my ($url,$img) = @_; return 1 if -s $img && $url !~ /\.jpg$/; if (-s $img && open(IMG, $img)) { my $head; if (sysread(IMG,$head,5)==5) { close IMG; return 1 if $head ne 'GIF89'; } close IMG; } # Remove the image debug("Bad image: [$img] from [$url]"); unlink $img; return 0; } my %TRIED; sub fetch { my ($artist,$album,$url,$to) = @_; return 0 unless $url; return 0 if $TRIED{$url}++; # Try to fetch system("$FETCH \Q$url\E > \Q$to\E"); # Sometimes an error is because we don't have the large image. # We can usually get the small URL from the large. # Large: http://images.amazon.com/images/P/B000002B46.01.LZZZZZZZ.jpg # Small: http://images.amazon.com/images/P/B000002B46.01._SCMZZZZZZZ_.jpg unless (good_image($url,$to)) { $url =~ s/LZZZZZZZ/_SCMZZZZZZZ_/g; system("$FETCH \Q$url\E > \Q$to\E"); return 0 unless good_image($url,$to); } print "$artist :: $album\n"; print " -> $url\n" if $MAIN::DEBUG>1; return 1; } sub geturl { my ($artist,$album,$info,$cover) = @_; my $Album = simplify($album); # First pass, look for match without ignoring parenthesis print "FIRST PASS\n" if $MAIN::DEBUG>1; foreach my $alb ( keys %$info ) { print "COMP: $Album eq ",simplify($alb),"\n" if $MAIN::DEBUG>1; return if $Album eq simplify($alb) && fetch($artist,$album,$info->{$alb},$cover); } # Second pass, strip parens and more.. $Album = simplify($album,2); print "SECOND PASS\n" if $MAIN::DEBUG>1; foreach my $alb ( keys %$info ) { print "COMP: $Album eq ",simplify($alb,2),"\n" if $MAIN::DEBUG>1; return if $Album eq simplify($alb,2) && fetch($artist,$album,$info->{$alb},$cover); } # Third pass, find something! $Album = simplify($album,3); print "THIRD PASS\n" if $MAIN::DEBUG>1; foreach my $alb ( keys %$info ) { print "COMP: $Album eq ",simplify($alb,3),"\n" if $MAIN::DEBUG>1; return if $Album eq simplify($alb,3) && fetch($artist,$album,$info->{$alb},$cover); } print "NOT FOUND: $artist :: $album\n"; return; } ################################################## # Main code ################################################## sub getdir { my ($dir) = @_; opendir(DIR,$dir) || fatal("Couldn't read directory or empty [$dir]"); my @dir = grep(!/^\.{1,2}$/ && -d "$dir/$_",readdir(DIR)); closedir(DIR); fatal("Directory was empty [$dir]") unless @dir; @dir; } sub main { my $opt = parse_args(); my $path = $opt->{path}; foreach my $artist ( ($opt->{artist}) || getdir($path) ) { my $artist_path = $artist; $artist =~ s/_/ /g; $artist =~ s/ s$/'s/g; # Hack: Loss of ['s] my $info = -1; foreach my $album ( ($opt->{album}) || getdir("$path/$artist_path") ) { my $cover = "$path/$artist_path/$album/Cover.jpg"; debug("Found cover for $artist :: $album") if -f $cover; next if -f $cover; $album =~ s/^\d{4}-//; # I store album as "year-album" $album =~ s/_/ /g; $info = artist_info($artist) if $info==-1; geturl($artist,$album,$info,$cover); } } } main();