#!/usr/bin/perl # Name: Toons # Author: David Ljung Madison # See License: http://MarginalHacks.com/License use strict; use Time::Local; my $DEBUG=0; my $VERBOSE=0; my $BROWSER = "browser"; my $USE_BROWSER=0; my $LOOP=0; # Do we use perlLWP 'HEAD' or do we try to do it ourselves? my $USE_HEAD=1; my $PATH="/var/tmp/cartoons"; my $OLD_DAYS=30; # Wipe anything past 30 days mkdir($PATH,0755) if (! -d $PATH); my $CATALOG="$PATH/catalog"; mkdir($CATALOG,0755) if (! -d $CATALOG); my $USE_DEBUG_PATH=0; my $DEBUG_PATH="$PATH/DEBUG"; # Run this command on images after download (before viewing) #my $POST_PROCESSING = "fix_img"; my $POST_PROCESSING = 0; # So we save to a specific place #chdir($PATH); chdir("/stuff/Cartoons"); my @ORIGINAL_ARGV=@ARGV; # Reap zombies $SIG{CHLD} = 'IGNORE'; # In army/decimal hours: my $WAKE_TIME=9.25; # 9:15 AM # See replacements below for explanation of %? strings # Each comic is an array: [ name, URL, days_offset, search, replace ] my @URLS = ( [ "Calvin_And_Hobbes", "http://images.ucomics.com/comics/ch/%Y/ch%y%m%d.gif", -11*365-3 ], #NO BACK:[ "Calvin_And_Hobbes", "http://www.uexpress.com/ups/comics/ch/ieindex.html", 0 ], #No more:#[ "Bizarro", "http://images.ucomics.com/comics/bz/%Y/bz%y%m%d.gif", 0 ], [ "Bizarro", "http://est.rbma.com/content/Bizarro?date=%Y%m%d", -14, 0,0, "http://www.kingfeatures.com/features/comics/bizarro/about.htm" ], #Nor:...#[ "Bizarro", "http://www.kingfeatures.com/features/comics/bizarro/about.htm", 0 ], #EXTRA: [ "Bizarro", "http://www.klean-jokes.com/comics/bz/archive/%Y/%m/pics/bz%y%m%d.gif", -2 ], [ "Duplex", "http://www.uexpress.com/ups/comics/dp/index.html", 0 ], # Archives - start viewing 10/14/93(thur) on 05/27/1999(thur) # No sat/sun - use %(0,j,j,j,j,j,0)upg kludge [ "Doctor_Fun_old", "http://www.ibiblio.org/Dave/Dr-Fun/df%z%m/df%z%m%d.%(j,j,j,j,j,0,0)upg", -2130 ], [ "Doctor_Fun", "http://www.ibiblio.org/Dave/Dr-Fun/df%Y%m/df%Y%m%d.%(j,j,j,j,j,0,0)upg", 0 ], #NO BACK:[ "Doonesbury", "http://www.doonesbury.com/strip/dailydose/index.cfm", 0, "comics/db" ], [ "Doonesbury", "http://images.ucomics.com/comics/db/%Y/db%y%m%d.gif", -1], #NO BACK:[ "Dilbert", "http://www.unitedmedia.com/comics/dilbert", 0 ], [ "Dilbert", "http://www.unitedmedia.com/comics/dilbert/archive/dilbert-%Y%m%d.html", 0, '/comics/dilbert/archive/images/dilbert\d+.gif' ], [ "Get_Fuzzy", "http://www.unitedmedia.com/comics/getfuzzy/archive/getfuzzy-%Y%m%d.html", 0, '/comics/getfuzzy/archive/images/getfuzzy\d+.gif'], #[ "Pearls_Before_Swine", "http://www.unitedmedia.com/comics/pearls/archive/pearls-%Y%m%d.html", 0, "archive/images/pearls" ], [ "Randolph_Itch", "http://www.comics.com/comics/randolphitch/archive/randolphitch-%Y%m%d.html", 0, "archive/images/randolphitch" ], #[ "Mister_Boffo", "http://www.uexpress.com/ups/comics/mb/index.html", 0 ], [ "Mister_Boffo", "http://images.ucomics.com/comics/mb/%Y/mb%y%m%d.gif", 0 ], [ "Willy_n_Ethel", "http://www.tmsfeatures.com/tmsfeatures/servlet/com.featureserv.util.Download?file=%Y%m%dcswil-a-p.jpg&code=cswil", 0 ], [ "Boondocks", "http://images.ucomics.com/comics/bo/%Y/bo%y%m%d.gif", 0 ], #[ "Foxtrot", "http://www.foxtrot.com/comics/strips/ft%y%m%d.gif", -14 ], [ "Foxtrot", "http://images.ucomics.com/comics/ft/%Y/ft%y%m%d.gif", 0 ], #[ "Not_Available", "http://www.famousdesigns.com/wfcomics/notavailable/%Y/%(na,0,0,0,0,0,0)u%Y%m%d.gif", 0 ], [ "Red_Meat", "http://www.redmeat.com/redmeat/%(0,0,current,0,0,0,0)u/", 0 ], [ "Bob_The_Angry_Flower", "http://angryflower.com/archive.html", 0, '.html', '.gif' ], ##[ "Robotman", "http://www.unitedmedia.com/comics/robotman/ab.html", 0 ], #[ "Robotman", "http://www.unitedmedia.com/comics/monty/archive/monty-%Y%m%d.html", 0 ], # Salon magazine provides: Mon: tomo (jpg), Tues: lay (gif), Wed: knig (gif), Thur: boll (gif) # Instead of this: #[ "Salon_Magazine", "http://www.salon.com/comics/%(tomo,lay,knig,boll)u/%Y/%m/%d/%(tomo,lay,knig,boll)u/story.%(jpg,gif,gif,gif)u", 0 ], # We'll just do jpg *and* gif, since tomo switches back and forth.. [ "Salon_Magazine", "http://www.salon.com/comics/%(tomo,lay,knig,boll)u/%Y/%m/%d/%(tomo,lay,knig,boll)u/story.gif", 0 ], [ "Salon_Magazine", "http://www.salon.com/comics/%(tomo,lay,knig,boll)u/%Y/%m/%d/%(tomo,lay,knig,boll)u/story.jpg", 0 ], # Ctoons is flaky? #[ "Quigmans", "http://www.ctoons.com/studio/quigmans/images/%y%m%d.gif", -7 ], #[ "Quigmans", "http://www.comicspage.com/daily/cpqgm/%Y%m%dcpqgm-a.gif", 0 ], [ "SinFest", "http://sinfest.net/comics/sf%Y%m%d.gif", 0 ], #[ "Mixed_Media", "http://www.ctoons.com/studio/mixed/%(0,0,0,0,0,0,images)u/%Y%m%d-01.gif", -7 ], [ "Mixed_Media", "http://www.comicspage.com/daily/csmix/%Y%m%dcsmix-%(a,a,a,a,a,a,s)u.gif", -7 ], # Hi-Res (img may need fix_img) [ "Penny_Arcade", "http://www.penny-arcade.com/images/%Y/%Y%m%dl.jpg", 0], #[ "Penny_Arcade", "http://www.penny-arcade.com/images/%Y/%Y%m%dl.gif", 0], [ "Dork_Tower", "http://www.gamespy.com/comics/dorktower/comics/gamespy/GameSpy%3D.jpg", -11428], [ "Parking_Lot_Is_Full", "http://www.plif.com/archive/%(0,0,0,0,0,0,wc)u%w.gif", -9590], #[ "Joy_Of_Tech", "http://www.joyoftech.com/joyoftech/joyimages/%3D.gif", -11710], #[ "Diesel_Sweeties", "http://images.clango.org/strips/sw%2D.gif", -11734], # Implemented some sort of security, based on http_referer? # Could get with: GET -H 'Referer: $first_page' $img > ... # Needs a referrer, but I'm not reading this anyways.. #[ "Liberty_Meadows", "http://www.creators.com/%(%m%d)S/lib/lib%m%dg.gif", 0 ], # Not that funny.. #[ "Real_Life_Adventures", "http://images.ucomics.com/comics/rl/%Y/rl%y%m%d.gif", 0 ], #[ "Mother_Goose_And_Grimm", "http://www.grimmy.com/images/%Y/%m%d00.gif", -7 ], #[ "Mother_Goose_And_Grimm", "http://aolsvc.toonville.aol.com/Content1/Grimmy/%m%d00.gif", 0 ], #[ "Speed_Bump", "http://www.creators.com/%(%m%d)S/bmp/bmp%m%dg.gif", 0 ], ##[ "Off_The_Mark", "http://cgi.mercurycenter.com/premium/comics/%m_%d/off_the_mark.gif", 0 ], #[ "Off_The_Mark", "http://offthemark.com/rotate/%y%m%d.gif", 0 ], #[ "Fusco_Bros", "http://images.ucomics.com/comics/fu/%Y/fu%y%m%d.gif", 0 ], ##[ "Close_To_Home", "http://www.uexpress.com/ups/comics/cl/index.html", 0 ], #[ "Close_To_Home", "http://images.ucomics.com/comics/cl/%Y/cl%y%m%d.gif", 0 ], #[ "Rubes", "http://www.creators.com/%(%m%d)S/rub/rub%m%dg.gif", 0 ], #[ "Non_Sequitur", "http://images.ucomics.com/comics/nq/%Y/nq%y%m%d.gif", 0, "ucomics.com/comics" ], #[ "Non_Sequitur", "http://www.ctoons.com/studio/nonse%(0,0,0,0,0,0,q)u/images/%Y%m%d-01.gif", -10 ], # Ick is gone # [ "Ick", "http://www.ctoons.com/studio/ick/images/%z%m%d.gif", -7 ], # Frumpy is gone. *sob* # [ "Frumpy_The_Clown", "http://www.creators.com/comics/compage/frc.asp", 0, "frumpy.gif" ], # Don't read these anymore, don't know if they still work. # [ "Editorial_Chuck_Assay", "http://editorial.ctoons.com/studio/%Y/%m/%Y%m%d-01.gif", 0 ], # [ "Editorial_Bob_Gorell", "http://editorial.ctoons.com/studio/%Y/%m/%Y%m%d-02.gif", 0 ], # [ "Shoe", "http://www.ctoons.com/studio/shoe/images/%z%m%d.gif", -7 ], # [ "General_Protection_Fault", "http://www.gpf-comics.com/comics/gpf%Y%m%d.gif", 0], # [ "Frankenstudent", "http://frankenstudent.com/comicsf/fnkst%Y%m%d.gif", 0], # [ "College_Roomies_From_Hell", "http://www.crfh.net/comics/crfh%Y%m%d.gif", 0], # [ "Bruno_The_Bandit", "http://www.brunothebandit.com/comics/%Y%m%da.gif", 0], # [ "Goats", "http://www.goats.com/comix/%y%m/goats%y%m%d.png", 0 ], # [ "Goat_old", "http://www.goats.com/comix/%y%m/goats%y%m%d.gif", -1541 ], # [ "Sluggy_Freelance", "http://pics.sluggy.com/comics/%y%m%d%(a,a,a,a,a,a,0)u.gif", 0 ], # # Sunday kludge.. # [ "Sluggy_Freelance_S", "http://pics.sluggy.com/comics/%y%m%d%(0,0,0,0,0,0,b)u.jpg", 0 ], # [ "Sluggy_Freelance_1", "http://pics.sluggy.com/comics/%y%m%d%(0,0,0,0,0,0,b)u1.jpg", 0 ], # [ "Sluggy_Freelance_2", "http://pics.sluggy.com/comics/%y%m%d%(0,0,0,0,0,0,b)u2.jpg", 0 ], # [ "Sluggy_Freelance_3", "http://pics.sluggy.com/comics/%y%m%d%(0,0,0,0,0,0,b)u3.jpg", 0 ], # [ "Sluggy_Freelance_4", "http://pics.sluggy.com/comics/%y%m%d%(0,0,0,0,0,0,b)u4.jpg", 0 ], # [ "Sluggy_Freelance_5", "http://pics.sluggy.com/comics/%y%m%d%(0,0,0,0,0,0,b)u5.jpg", 0 ], # [ "Sluggy_Freelance_6", "http://pics.sluggy.com/comics/%y%m%d%(0,0,0,0,0,0,b)u6.jpg", 0 ], ); # uclick needs cookies, and lynx -cookie doesn't seem to work? # [ "Doonesbury", "http://www2.uclick.com/feature/%Y/%m/%d/db.gif", 0 ], # [ "Close_To_Home", "http://www2.uclick.com/feature/%Y/%m/%d/cl.gif", 0 ], # [ "Foxtrot", "http://www2.uclick.com/feature/%Y/%m/%d/ft.gif", 0 ], # [ "Fusco_Bros", "http://www2.uclick.com/feature/%Y/%m/%d/fu.gif", 0 ], sub get_date { my ($offset) = @_; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time+$offset*60*60*24); sprintf("%4.4d-%2.2d-%2.2d",$year+1900,$mon+1,$mday); } # Replaces %? in URL with date and other information. The date # can be offset by a number of days sub do_replacements { my ($url,$offset) = @_; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time+$offset*60*60*24); $mday=sprintf("%2.2d",$mday); # Convert 0-31 to 00-31 $mon=sprintf("%2.2d",$mon+1); # Convert 0-11 to 1-12 my $fullyear=1900+$year; my $twoyear=sprintf("%0.2d",(($fullyear/100)-int($fullyear/100)+.001)*100); $wday=7 if (!$wday); # Sunday is actually 7 (see 'man date') # These date options are supported: (standards stolen from strftime) # %a Abbreviated weekday name. Example: wed. # %d Day of the month as a two-digit decimal number [01-31]. Example: 12 # %m Month as a decimal two-digit number [01-12]. Example: 01 # %u Weekday as a one-digit decimal number [1-7 (Monday-Sunday)]. # Example: 3 is Wednesday. # %z Year - 1900. Example: 93 == 1993, 102 == 2002. # %y Two digit year. Example: 02 # %Y Year with century as a four-digit decimal number [1970-2069]. # Example: 2002. # %w Week count since (unix epoch) time began. # %D Day count since (unix epoch) time began. # %(blah)S Do replacements on "blah" for last sunday # # We can also index into our own array using the day, year, whatever... # Example: %(M,T,W,R,F,S,Su)u [starts at index 1] while ($url =~ /%(\d)?(\([^\)]*\))?(.)/g) { my ($optional_width,$optional_array, $format) = ($1, $2, $3); my $replace; $replace=(qw(sun mon tues wed thurs fri sat))[$wday] if ($format eq "a"); $replace=$wday if ($format eq "u"); $replace=$mday if ($format eq "d"); $replace=$mday if ($format eq "d"); $replace=$mon if ($format eq "m"); $replace=$year if ($format eq "z"); $replace=$twoyear if ($format eq "y"); $replace=$fullyear if ($format eq "Y"); $replace=int((time/(60*60*24)+$offset)/7) if ($format eq "w"); $replace=int(time/(60*60*24)+$offset) if ($format eq "D"); $replace=sprintf("%${optional_width}.${optional_width}d",$replace) if (defined $optional_width); if ($format eq "S") { my ($pre,$post) = ($`,$'); $optional_array =~ s/^\(//; $optional_array =~ s/\)$//; my $wday_tmp = $wday; $wday_tmp = 0 if $wday_tmp == 7; $url = $pre.do_replacements($optional_array,$offset-$wday_tmp).$post; next; } if ($optional_array) { $optional_array =~ s/^\(//; $optional_array =~ s/\)$//; my @arr=split(/,/,$optional_array); return 0 if (!$arr[$replace-1]); $replace=$arr[$replace-1]; } $url="$`$replace$'"; } $url; } ################################################## # Does a file exist? ################################################## sub url_split { my ($url) = @_; # Break it down! $url =~ s|^https?://||; return ($1,$2) if ($url =~ m|^([^/]+)/(.*)$|); return ($url,""); } use FileHandle; use IPC::Open2; sub foundTelnet { my ($url) = @_; my ($domain,$page) = url_split($url); my $found=0; # KLUDGE: ASSUME http: method (https?) my $loc = "http://$domain/$page"; $loc = "/$page" if ($domain =~ /redmeat.com/); # TOTAL KLUDGE! my $pid = open2( \*READER, \*WRITER, "telnet $domain 80 2>/dev/null" ); WRITER->autoflush(); # default here, actually # KLUDGE: ASSUME http: method (https?) print WRITER "HEAD $loc HTTP/1.0\n\n"; while () { $found=1 if /Bad Request/; # Hack, ucomics.com doesn't like my HEAD for some reason.. $found=1 if /Authorization Required/; # Hack- redmeat.com doesn't like found() $found=1 if /HTTP.*[23]\d\d/; last if ($found); } close(READER); close(WRITER); my $wait=waitpid($pid,0); # So we don't collect defunct processes return $found; } sub foundHEAD { my ($url) = @_; my $found=0; my $saveCHLD = $SIG{CHLD}; undef $SIG{CHLD}; if (!open(HEAD,"HEAD $url|")) { $USE_HEAD = 0; return found(@_); } $SIG{CHLD} = $saveCHLD; while () { $found=1 if /200 OK/; last if ($found); } close(HEAD); return $found; } sub found { $USE_HEAD ? foundHEAD(@_) : foundTelnet(@_); } sub bad { print STDERR @_; 0; } ################################################## # Get a .gif URL ################################################## sub get_img { my ($url,$dir,$date,$refer) = @_; my $type = ($url =~ /\.([^\.\/]+)$/) ? $1 : "gif"; $type =~ s/&.*//g; exec("$BROWSER $url") if ($USE_BROWSER); return print "$url\n" if ($VERBOSE); print DEBUG "get_img($url)\n" if ($DEBUG); $dir="$PATH/$dir"; mkdir($dir,0755) if (! -d $dir); return bad("Couldn't make directory: $dir\n") if (! -d $dir); return bad("URL not found [$url]\n") if (!found($url)); my $path="$dir/${date}.$type"; my $CMD = "lynx -cookies -source"; $CMD = "GET -H 'Referer: $refer'" if $refer; system("$CMD \"$url\" > $path"); return bad("Couldn't fetch $url to $path\n") if ($?); return $path; } # Get a toon # This is either a gif (use get_img) or HTML with the # gif hidden in an tag sub get_toon { my ($url,$name,$date,$search_for,$replace,$refer) = @_; # It's just an img - get it. return get_img($url,$name,$date,$refer) #Hack: just get it if we specify a refer (bizarro hack.. urgh) if $url =~ /\.(gif|jpg|png)(&|$)/ || $refer; # Figure out the root path for the local URL my ($root_path) = $url =~ m|(http://[^/]+)|; if (!$root_path) { print STDERR "Couldn't figure out root path of $url\n"; return 0; } # Figure out current path my $cur_path = $url; $cur_path =~ s|[^/]+$||; if (!found($url)) { print STDERR "URL not found [$url]\n"; return 0; } # Read the page if (!open(HTML,"lynx -source \"$url\" |")) { print STDERR "Couldn't get $url\n"; return 0; } print DEBUG "Scan HTML: $url\n" if ($DEBUG); # Find an .gif that isn't in an images or widgets path (works for C&H) # Actually - some sites use 'images' - but they also have archive|comic in the path my $found_image; if ($search_for) { my $tag; while() { print STDERR "Couldn't find HTML $url\n" if (/HTTP.*Object Not Found/i); my $what = ($search_for =~ /\.html?$/) ? 'a[^>]* href' : 'img[^>]* src'; if ((undef,$tag)=/<$what=("|')?([^>'"]*$search_for[^>'"\s]*)/i) { print DEBUG "FOUND TAG: $tag\n" if ($DEBUG==2); $found_image=$tag; last unless $replace; # SHOULD BE A PERL ANON SUB (so we can do whatever) my $repl=eval { $tag =~ s/$search_for/$replace/e }; # returns number of replacements $found_image=$tag; last; } } } else { while() { my $img; # Stupid heuristic # Look for an image tag that doesn't contain 'images' or 'widgets' # unless we have 'archive' or 'comic' - but we need to have a number # somewhere in the gif filename # Also ignore 'banner' and 'logo' and 'header' print STDERR "Couldn't find HTML $url\n" if (/HTTP.*Object Not Found/i); print DEBUG "FOUND IMG: $img\n" if ($DEBUG==2 && ((undef,$img)=/]* SRC=("|'|)([^"']+\.gif)/i)); if (((undef,$img)=/]* SRC=("|'|)([^"']+\.(gif|jpg))/i) && ($img =~ m|[0-9][^/]*\.gif| || $img =~ /story/) && ($img !~ /(banner|logo|ads|header|nav|SendAStrip)/) && ($img !~ /(images|widgets)/ || $img =~ /(archive\/images|images\/comic)/)) { close(HTML); $found_image=$img; last; } # As a last resort, do we have any ? !$found_image && ((undef,$found_image) = /]* HREF=("|'|)([^"']+\.gif)("|'|>)/i); } } close(HTML); if ($found_image) { # Absolute URL? (i.e.: http://comics.com/funny.gif) return get_img($found_image,$name,$date,$refer) if ($found_image =~ /^http/); # Local-absolute URL? (i.e.: /comics/today.gif) return get_img("$root_path$found_image",$name,$date,$refer) if ($found_image =~ m|^/|); # Just a path (i.e.: today.gif) return get_img("$cur_path$found_image",$name,$date,$refer); } print DEBUG "Couldn't find an image tag\n"; return 0; } sub display_toons { my ($iconic,@toons) = @_; system("$POST_PROCESSING @toons") if $POST_PROCESSING; # Double the window except on Sundays (Sun comics are usually large) #my $expand= (localtime(time))[6] ? "-expand 2" : ""; # Nah, always expand my $expand= "-expand 2"; my $iconic=$iconic ? "-iconic" : ""; exec("xv $iconic -raw $expand @toons") unless fork; } sub gather_toons { my ($pattern,@back) = @_; $back[0]=0 unless @back; my (@got); clean_old_files(); ######################### # Get the toons ######################### my $url_L; foreach $url_L ( @URLS ) { my ($name,$url,$offset,$search_for,$replace,$refer) = @{$url_L}; next if ($pattern && $name !~ /$pattern/i); my $back; foreach $back ( @back ) { my $rep_url = do_replacements($url,$offset-$back); next unless ($rep_url); my $img=get_toon($rep_url,$name,get_date($offset-$back),$search_for,$replace,$refer); print DEBUG "Got toon: $img\n" if (($pattern || $DEBUG) && $img); push(@got,$img) if ($img); } } return if ($VERBOSE); # return if ($pattern); ######################### # Write the 'found' file ######################### unless (@back) { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time); my $date=sprintf("%2.2d%2.2d%2.2d",$year,$mon+1,$mday); open(NEW,">$CATALOG/$date") || die("Couldn't open $CATALOG/$date\n"); print NEW join("\n",@got)."\n"; close(NEW); } ######################### # Display the toons ######################### display_toons($pattern ? 0 : 1,@got); } ################################################## # Usage ################################################## sub usage { my $msg; foreach $msg (@_) { print "ERROR: $msg\n"; } print "\n"; print "Usage:\t$MAIN::PROGNAME [-d] [pattern]\n"; print "\tFetch toons every day from the web and display them\n"; print "\n"; print "\tpattern Only fetch toons with names that match this pattern\n"; print "\t-back Gather toons for a certain number of days back - won't work on some toons\n"; print "\t Lists are okay: \"-back 1-10\"\n"; print "\t-d Set debug mode\n"; print "\t-D Heavy debug mode\n"; print "\t-v Just print URLs of toons\n"; print "\t-b Run browser on the URLs found\n"; print "\n"; exit -1; } # Convert 1-10 to 1,2,..10 sub parse_list { my ($str) = @_; return $str if ($str =~ /^-?\d+$/); return parse_list(split(/,/,$str)) if ($str =~ /,/); die("[$MAIN::PROGNAME] Couldn't understand list of numbers [$str]\n") if ($str !~ /^(-?\d+)-(-?\d+)$/); return $1..$2 if ($1<$2); return reverse($2..$1); } sub parse_args { my ($pattern,@back); while ($#ARGV>=0) { my $arg=shift(@ARGV); if ($arg =~ /^-h$/) { usage(); } if ($arg =~ /^-D$/) { $DEBUG=2; next; } if ($arg =~ /^-d$/) { $DEBUG=1; next; } if ($arg =~ /^-v$/) { $VERBOSE=1; next; } if ($arg =~ /^-b$/) { $USE_BROWSER=1; next; } if ($arg =~ /^-back$/) { @back=parse_list(shift(@ARGV)); next; } if ($arg =~ /^-loop$/) { $LOOP=1; next; } if ($arg =~ /^-/) { usage("Unknown option: $arg"); } usage("Too many patterns specified [$arg and $pattern]") if (defined($pattern)); $pattern=$arg; } # Deal with debug if ($DEBUG) { if ($USE_DEBUG_PATH) { open(DEBUG,">$DEBUG_PATH") || die("Couldn't open: $DEBUG_PATH\n"); select((select(DEBUG), $| = 1) [0]); #Turn off buffering of output } else { open(DEBUG,">&STDERR") || die("Couldn't dup to STDERR\n"); } } ($pattern,@back); } sub clean_old_files { opendir(DIR,$PATH) || die("Couldn't open $PATH\n"); my @dirs=grep(!/\.{1,2}/ && -d "$PATH/$_",readdir(DIR)); closedir(DIR); foreach my $d ( @dirs ) { opendir(DIR,"$PATH/$d") || die("Couldn't open $PATH/$d"); unlink grep(-M $_ >= $OLD_DAYS, map("$PATH/$d/$_",readdir(DIR))); closedir(DIR); } } sub main { my ($pattern,@back)=parse_args(); return gather_toons($pattern,@back) if (@back || $pattern || !$LOOP); # Loop (stop if we ever see this script modified) while (-M $0>=0) { print DEBUG `date` if ($DEBUG); # Get all the cartoons gather_toons(undef,@back); # Wait until WAKE_TIME my $last_midnight=timelocal(0,0,0,(localtime(time))[3..5]); my $next_midnight=24*60*60+$last_midnight; my $till_next_midnight=$next_midnight-time; my $sleep_time=$till_next_midnight+$WAKE_TIME*60*60; print DEBUG "Sleep until $till_next_midnight + ", $WAKE_TIME*60*60, " = $sleep_time\n" if ($DEBUG); sleep($till_next_midnight+$WAKE_TIME*60*60); } close(DEBUG) if ($DEBUG); # The script was modified, restart print STDERR "RESTARTING..\n"; exec($0,@ORIGINAL_ARGV) if ($LOOP); } main();