#!/usr/bin/perl
# Filename:	whats_new
# Author:	David Ljung Madison <DaveSource.com>
# See License:	http://MarginalHacks.com/License
# Description:	Creates an HTML "What's New" index
# Input:	Reads a file (or STDIN) of "domain <tab> path"
# Bugs:		Can get caught in infinite loops due to symlinks,
# 		use -ignore to avoid these paths.
use strict;
use Cwd;

umask 022;

##################################################
# Setup the variables
##################################################
my $PROGNAME = $0;
$PROGNAME =~ s|.*/||;

# List of [domain,path] pairs, read from file/stdin
my @DOMAINS;

my $HTML	= "s?html?";
my $DEFAULT	= "index.html";
my $NO_INDEX	= ".no_index";			# Don't index these directories
my $NO_CONTENTS	= ".no_contents";		# Don't index contents

my $MY_URL	= "http://MarginalHacks.com/#whats_new";

sub debug {
  return unless $MAIN::DEBUG;
  foreach my $msg (@_) { print STDERR "[$PROGNAME] $msg\n"; }
}
sub Debug {
  return unless $MAIN::DEBUG>1;
  foreach my $msg (@_) { print STDERR "[$PROGNAME] $msg\n"; }
}

#########################
# Usage
#########################
# Read in the domain info into @DOMAINS
sub get_sites {
  my ($file) = @_;
  debug("get_sites($file)");
  open(FILE,"<$file") || usage("Can't read input [$file]");
  while (<FILE>) {
    chomp;
    my ($dom,$path) = split(/\t/,$_);
    # If this domain is an alias for another domain, just add it to that list
    push(@DOMAINS,[$dom,$path]);
    Debug("  add [$dom, $path]");
  }
  close FILE;
}

sub usage {
  foreach my $msg (@_) { print STDERR "ERROR:  $msg\n"; }
  print STDERR "\n";
  print STDERR "Usage:\t$PROGNAME [-d] <file>\n";
  print STDERR "\tBuilds a \"What's New\" index\n";
  print STDERR "\n";
  print STDERR "Reads file (or '-' for STDIN) for domain info:\n";
  print STDERR "  domain <tab> path\n";
  print STDERR "\n";
  print STDERR "-ignore <regexp>   Ignore these paths/files\n";
  print STDERR "-upto   <days>     Don't show anything older than -upto days\n";
  print STDERR "-q                 No errors (for crontab)\n";
  print STDERR "\n";
  exit -1;
}

my $IGNORE;
my $UPTO;
sub parse_args {
  my $file;
  while (my $arg=shift(@ARGV)) {
    if ($arg =~ /^-h$/) { usage(); }
    if ($arg =~ /^-q$/) { $MAIN::QUIET=1; next; }
    if ($arg =~ /^-d$/) { $MAIN::DEBUG+=1; next; }
    if ($arg =~ /^-D$/) { $MAIN::DEBUG+=2; next; }
    if ($arg =~ /^-ignore(=(.+))?$/) { $IGNORE= $2 ? $3 : shift @ARGV; next; }
    if ($arg =~ /^-up_?to(=(.+))?$/) { $UPTO= $2 ? $3 : shift @ARGV; next; }
    if ($arg =~ /^-./) { usage("Unknown option: $arg"); }
    usage("Too many inputs specified [$arg and $file]") if (defined($file));
    $file=$arg;
  }
  usage("No input defined") unless $file;

  get_sites($file);
}

my %URL_MOD;	# Hash of URLs to their MOD times
my %SAW_DIR;	# Hash of dir paths we've done (avoid symlink loops)
sub get_directory {
  my ($domain, $dir, $path) = @_;

  Debug("get_directory($domain,..,$path)");

	# Skip it if we've done it already
	my $abs = Cwd::abs_path($path);
	next if $SAW_DIR{$abs};
	$SAW_DIR{$abs}++;

  return unless $dir;
  return if $IGNORE && $path =~ /$IGNORE/;

  # Skip it if it's the path of one of our subdomains, let that index it
  foreach my $d ( @DOMAINS ) {
    return if $d->[1] =~ /^$dir./ && $path =~ /^$d->[1]/;
  }

  # Read the directory
  unless (opendir(DIR, $path)) {
 		return if $MAIN::QUIET;
 		die("[$PROGNAME] Couldn't read directory [$path]\n");
	}
  my @dir = grep(-d "$path/$_" || /\.$HTML$/ && !/$DEFAULT/,
                 grep(!/^\.{1,2}$/, readdir(DIR)) );
  @dir = grep( "$path/$_" !~ /$IGNORE/, @dir) if $IGNORE;
  closedir(DIR);

  # Handle directories and html
  return unless (@dir);

  my $url_path = $path;
  $url_path =~ s|^$dir|http://$domain|g;

  foreach my $file ( @dir ) {
    unless (-d "$path/$file") {
      my $url = "$url_path/$file";
      $URL_MOD{$url} = -M "$path/$file";
    } elsif (!-f "$path/$file/$NO_INDEX") {
      get_directory($domain, $dir, "$path/$file")
        unless (-f "$path/$file/$NO_CONTENTS");
      my $def = "$path/$file/$DEFAULT";
      my $def_url = "$url_path/$file/";
      $URL_MOD{$def_url} = -M $def if -f $def && (!$IGNORE || $def !~ /$IGNORE/);
    }
  }
}

#########################
# Header/footer
#########################
sub header {
  my $upto = $UPTO ? "<br>Last $UPTO days of changes" : "";
  my $date = localtime;
  <<END_OF_HEADER;
<html>
  <head>
    <title>
      What's New?
    </title>
    <style type='text/css'>
    <!--
      .parent { font-size: x-small }
    -->
    </style>
  </head>
  <body bgcolor='white'>
    What's New listing, generated by <a href='$MY_URL'>$PROGNAME</a>$upto on $date<p>
    <p><hr><p>
END_OF_HEADER
}

sub footer {
  my $date = localtime;
  <<END_OF_FOOTER;
    <p><hr><p>
    Generated on $date;
  </body>
</html>
END_OF_FOOTER
}

##################################################
# Main code
##################################################
sub main {
  parse_args();

  # Get the site index for each site
  my %did;
  foreach my $d ( @DOMAINS ) {
    my ($domain,$path) = @$d;
    next if $did{$path}++;
    debug("Getting: $domain [$path]");
    get_directory($domain, $path, $path);
  }

  # Sort it and print it..
  print header();
  print "<center>\n";
  print "<table cellspacing='0'><tr valign='bottom'>\n";
  foreach my $url ( sort {$URL_MOD{$a} <=> $URL_MOD{$b}} keys %URL_MOD ) {
    my $age = $URL_MOD{$url};
    last if $UPTO && $age>$UPTO;
    debug("Printing: $url");
    $url =~ s|^http://||;
    #$url =~ s|$DEFAULT$||;
    my @url = split('/',$url,-1);
    my $u;
    print "<td align='right'>\n";
    while (my $p = shift(@url)) {
      my $slash = @url ? "/" : "";
      undef @url unless $#url || $url[0];
      my $n = $p; $n =~ s/_/ /g; $n =~ s/\.$HTML$//g;
      print "<span class='parent'>" if @url && !$u;
      print "</span>&nbsp;</td><td bgcolor='#ffff00'>" unless @url;
      print "<a href='http:/$u/$p$slash'>$n</a>";
      print @url ? " / " : "\n";
      $u.="/$p";
    }
    my $old;
    if ($age>7*52) {
      $old = int($age/(7*52))." years";
    } elsif ($age>7) {
      $old = int($age/7)." weeks";
    } elsif ($age>1) {
      $old = int($age)." days";
    } else {
      $old = (int($age*24)||1)." hours";
    }
    print "</td><td><span style='white-space: nowrap'>&nbsp; &gt;$old</span></td></tr><tr valign='bottom'>\n";
  }
  print "</tr></table>\n";
  print "</center>\n";
  print footer();
} main();
