What's New?

#!/usr/bin/perl # Filename: whats_new # Author: David Ljung Madison # See License: http://MarginalHacks.com/License # Description: Creates an HTML "What's New" index # Input: Reads a file (or STDIN) of "domain path" # Bugs: Can get caught in infinite loops due to symlinks, # use -ignore to avoid these paths. use strict; use Cwd; umask 022; ################################################## # Setup the variables ################################################## my $PROGNAME = $0; $PROGNAME =~ s|.*/||; # List of [domain,path] pairs, read from file/stdin my @DOMAINS; my $HTML = "s?html?"; my $DEFAULT = "index.html"; my $NO_INDEX = ".no_index"; # Don't index these directories my $NO_CONTENTS = ".no_contents"; # Don't index contents my $MY_URL = "http://MarginalHacks.com/#whats_new"; sub debug { return unless $MAIN::DEBUG; foreach my $msg (@_) { print STDERR "[$PROGNAME] $msg\n"; } } sub Debug { return unless $MAIN::DEBUG>1; foreach my $msg (@_) { print STDERR "[$PROGNAME] $msg\n"; } } ######################### # Usage ######################### # Read in the domain info into @DOMAINS sub get_sites { my ($file) = @_; debug("get_sites($file)"); open(FILE,"<$file") || usage("Can't read input [$file]"); while () { chomp; my ($dom,$path) = split(/\t/,$_); # If this domain is an alias for another domain, just add it to that list push(@DOMAINS,[$dom,$path]); Debug(" add [$dom, $path]"); } close FILE; } sub usage { foreach my $msg (@_) { print STDERR "ERROR: $msg\n"; } print STDERR "\n"; print STDERR "Usage:\t$PROGNAME [-d] \n"; print STDERR "\tBuilds a \"What's New\" index\n"; print STDERR "\n"; print STDERR "Reads file (or '-' for STDIN) for domain info:\n"; print STDERR " domain path\n"; print STDERR "\n"; print STDERR "-ignore Ignore these paths/files\n"; print STDERR "-upto Don't show anything older than -upto days\n"; print STDERR "-q No errors (for crontab)\n"; print STDERR "\n"; exit -1; } my $IGNORE; my $UPTO; sub parse_args { my $file; while (my $arg=shift(@ARGV)) { if ($arg =~ /^-h$/) { usage(); } if ($arg =~ /^-q$/) { $MAIN::QUIET=1; next; } if ($arg =~ /^-d$/) { $MAIN::DEBUG+=1; next; } if ($arg =~ /^-D$/) { $MAIN::DEBUG+=2; next; } if ($arg =~ /^-ignore(=(.+))?$/) { $IGNORE= $2 ? $3 : shift @ARGV; next; } if ($arg =~ /^-up_?to(=(.+))?$/) { $UPTO= $2 ? $3 : shift @ARGV; next; } if ($arg =~ /^-./) { usage("Unknown option: $arg"); } usage("Too many inputs specified [$arg and $file]") if (defined($file)); $file=$arg; } usage("No input defined") unless $file; get_sites($file); } my %URL_MOD; # Hash of URLs to their MOD times my %SAW_DIR; # Hash of dir paths we've done (avoid symlink loops) sub get_directory { my ($domain, $dir, $path) = @_; Debug("get_directory($domain,..,$path)"); # Skip it if we've done it already my $abs = Cwd::abs_path($path); next if $SAW_DIR{$abs}; $SAW_DIR{$abs}++; return unless $dir; return if $IGNORE && $path =~ /$IGNORE/; # Skip it if it's the path of one of our subdomains, let that index it foreach my $d ( @DOMAINS ) { return if $d->[1] =~ /^$dir./ && $path =~ /^$d->[1]/; } # Read the directory unless (opendir(DIR, $path)) { return if $MAIN::QUIET; die("[$PROGNAME] Couldn't read directory [$path]\n"); } my @dir = grep(-d "$path/$_" || /\.$HTML$/ && !/$DEFAULT/, grep(!/^\.{1,2}$/, readdir(DIR)) ); @dir = grep( "$path/$_" !~ /$IGNORE/, @dir) if $IGNORE; closedir(DIR); # Handle directories and html return unless (@dir); my $url_path = $path; $url_path =~ s|^$dir|http://$domain|g; foreach my $file ( @dir ) { unless (-d "$path/$file") { my $url = "$url_path/$file"; $URL_MOD{$url} = -M "$path/$file"; } elsif (!-f "$path/$file/$NO_INDEX") { get_directory($domain, $dir, "$path/$file") unless (-f "$path/$file/$NO_CONTENTS"); my $def = "$path/$file/$DEFAULT"; my $def_url = "$url_path/$file/"; $URL_MOD{$def_url} = -M $def if -f $def && (!$IGNORE || $def !~ /$IGNORE/); } } } ######################### # Header/footer ######################### sub header { my $upto = $UPTO ? "
Last $UPTO days of changes" : ""; my $date = localtime; < What's New? What's New listing, generated by $PROGNAME$upto on $date

END_OF_HEADER } sub footer { my $date = localtime; <

Generated on $date; END_OF_FOOTER } ################################################## # Main code ################################################## sub main { parse_args(); # Get the site index for each site my %did; foreach my $d ( @DOMAINS ) { my ($domain,$path) = @$d; next if $did{$path}++; debug("Getting: $domain [$path]"); get_directory($domain, $path, $path); } # Sort it and print it.. print header(); print "

\n"; print "\n"; foreach my $url ( sort {$URL_MOD{$a} <=> $URL_MOD{$b}} keys %URL_MOD ) { my $age = $URL_MOD{$url}; last if $UPTO && $age>$UPTO; debug("Printing: $url"); $url =~ s|^http://||; #$url =~ s|$DEFAULT$||; my @url = split('/',$url,-1); my $u; print "\n"; } print "

\n"; while (my $p = shift(@url)) { my $slash = @url ? "/" : ""; undef @url unless $#url || $url[0]; my $n = $p; $n =~ s/_/ /g; $n =~ s/\.$HTML$//g; print "" if @url && !$u; print "

" unless @url; print "$n"; print @url ? " / " : "\n"; $u.="/$p"; } my $old; if ($age>7*52) { $old = int($age/(7*52))." years"; } elsif ($age>7) { $old = int($age/7)." weeks"; } elsif ($age>1) { $old = int($age)." days"; } else { $old = (int($age*24)||1)." hours"; } print "

>$old

\n"; print "\n"; print footer(); } main();