#!/usr/bin/perl
# Filename:	File_Browser.cgi
# Author:	David Ljung Madison <DaveSource.com>
# See License:	http://MarginalHacks.com/License
# Description:	Browser for a hierarchial database
#		Specifically doesn't follow directory links for security reasons
#		To do directory links make a file called
#		  <name>=LINK:<database>:path:path:path..
#		To do URL links, make a file called:
#		  <name>=URL:<url>  where url has \ instead of /
#
# TODO:  Links should just open on the spot, as opposed to opening the
#        link elsewhere in the hierarchy.
use strict;

##################################################
# Paths
##################################################
my $DEFAULT_INDEX = "index.html";

my $LINE_GIF	= "line.gif";
my $LINE_WIDTH	= 420;
my $FOLDER_DIR	= "http://DavePics.com/Icons/Folders";
my $ORDER_FILE	= ".order";	# For ordering directory contents

# The directory we are running from
my $ROOT	= $0;
   $ROOT =~ s|/[^/]*$||;

# Try to convert this to a URL (Kludge - we don't know what httpd.conf says)
# Try using $SCRIPT_NAME first
my $ROOT_URL	= $ENV{'SCRIPT_NAME'};
if ($ROOT_URL) {
  $ROOT_URL =~ s|/[^/]*$||;
} else {
  # Old kludgy method:
  # Are we in a home directory?  Convert to ~user
  ($ROOT_URL=$ROOT) =~ s#/(user|users|home)/(.*)/public_html#/~$2#;
}

# What is the name of our script?
my $SCRIPT	= $0;
$SCRIPT =~ s|.*/||;
# If it's 'index.*' then use the name of the parent directory
$SCRIPT = "$1.cgi" if ($SCRIPT =~ /^index\./ && $0 =~ m|/([^/]+)/[^/]+$|);
$SCRIPT =~ s/Fringe.DaveSource.com/Fringe/ig;		# Kludge!

# If '_all' is in the name then take it out and turn on SHOW_ALL
my $SHOW_ALL= ($SCRIPT =~ /_all/);
$SCRIPT =~ s/_all//;

# What's my name?
my $NAME	= $SCRIPT;
$NAME =~ s|\.cgi$||;
my $MAIN_DIR=$NAME;
chdir($ROOT);

# Settings

# The browser can open up multiple folders at once, but this can
# lead to an exponentially growing number of possible "pages" that
# web crawlers waste lots of time in.
# Use rel=nofollow on links that open up disconnected folders:
my $NOFOLLOW_MULTIOPEN = 1;

# Do we allow symlinks in the database?  Generally this is a bad idea.
my $FOLLOW_SYMLINKS = 0;

# Global vars..  blagh
my %QUERY;
my @ITEMS;

##################################################
# The folder gifs
##################################################
my $BIG=0;	# Do we use the big icons?
my ($OPEN,$CLOSED,$DOC);
if ($BIG) {
  $OPEN="<img src=$FOLDER_DIR/Open_Folder.big.gif border=0 width=34 height=29>";
  $CLOSED="<img src=$FOLDER_DIR/Closed_Folder.big.gif border=0 width=32 height=25>";
  $DOC="<img src=$FOLDER_DIR/Doc.big.gif border=0 width=30 height=38>";
} else {
  $OPEN="<img src=$FOLDER_DIR/Open_Folder.gif alt=O border=0 width=14 height=11>";
  $CLOSED="<img src=$FOLDER_DIR/Closed_Folder.gif alt=0 border=0 width=14 height=11>";
  $DOC="<img src=$FOLDER_DIR/Doc.gif alt='-' border=0 width=9 height=11>";
}
my $LINK ="<img src=$FOLDER_DIR/Link.gif alt='>' border=0 width=17 height=16>";
my $L_LINE="<img src=$FOLDER_DIR/L-Line.gif alt='L' border=0 width=12 height=12>";
my $T_LINE="<img src=$FOLDER_DIR/T-Line.gif alt='+' border=0 width=12 height=12>";
my $BAR="<img src=$FOLDER_DIR/Bar.gif alt='|' border=0 width=12 height=12>";
my $NO_BAR="<img src=$FOLDER_DIR/Clear_Dot.gif alt='.' border=0 width=12 height=12>";

##################################################
# Draw a line
##################################################
sub line { print "\n<p><img src=$LINE_GIF alt='", "_"x50 , "'><p>\n\n"; }

##################################################
# Parse the query.
#
# The query is a set of actions.  Each action is
# either:  <action> or <action>=<string>
# The actions are separated by either '&' or '?'
#
# This pulls the actions apart and builds an associative
# array (called %QUERY) that contains the actions
#
# If a specific action is seen multiple times, the
# string is a set of strings separated by ':'
##################################################
sub parse_query {
  my (@actions,$i);
  ####################
  # Actions:
  # QUERY_STRING is of the form:  "action=QUERY{'search'}"
  # Where action is:  search or search_email
  ####################

  my @query = split(/[\&\?]/, $ENV{QUERY_STRING});
  foreach my $query ( @query ) {
    my ($action,$var);
    $action=$query if (!(($action,$var) = ($query =~ /([^=]*)=(.*)/)));
    if (!$QUERY{$action}) {
      $QUERY{$action}=$var;
    } else {
      $QUERY{$action}.=":$var";		# Add to the action list
    }
  }
}

##################################################
# Header and search form
##################################################
sub cgi_header {

  print <<HEADER;
Content-type: text/html


<html>
<head>
<title>$NAME Browser</title>
<link rel="SHORTCUT ICON" href="/favicon.ico">
</head>

<body>

HEADER
}

sub header {
  print "<table width=$LINE_WIDTH>\n";

  print "<td width=60% align=left>\n";

  if (open(TITLE,"${ROOT}/${NAME}.title")) {
    while(<TITLE>) {print;}  close(TITLE);
  } else {
    print "<h2><b><i><hr>\n";
    print "$NAME Browser";
    print "<hr></i></b></h2>\n";
  }

  print "</td>\n<td align=right>\n";

  print "<form action=$SCRIPT method=get>\n";
  print "<b>Search</b>\n";
  print "<input type=text";
  print " name='search' size=15 maxlength=45>\n";
  print "</form>\n";

  print "</td>\n";

  print "</table>\n";

#  print "<h3>Root path:  $ROOT</h3>\n";

  print "<h3>This is the full contents of the <a href=\"$SCRIPT\">$NAME</a></h3>\n"
    if ($SHOW_ALL);

  line();
}

##################################################
# Access
##################################################
sub access {
  my $ACCESS_COUNT_FILE="${ROOT}/${NAME}.access";
  if (!open(ACCESS,"<$ACCESS_COUNT_FILE")) {
    system("echo 0 > $ACCESS_COUNT_FILE");
    system("/bin/date +'%r, %a, %b %e, %Y' >> $ACCESS_COUNT_FILE");
    if (!open(ACCESS,"<$ACCESS_COUNT_FILE")) {
      print "ERROR:  Cannot open access count file [$ACCESS_COUNT_FILE]\n";
      return;
    }
  }
  my ($count,$access_date)=<ACCESS>;
  close(ACCESS);

  chop($count);  chop($access_date);

  $count++;
  print "Accessed $count times since $access_date<br>";

  if (!open(ACCESS,">$ACCESS_COUNT_FILE")) {
    print "ERROR:  Couldn't write access count file  [$ACCESS_COUNT_FILE]\n";
    return;
  }
  print ACCESS "$count\n$access_date\n";
  close(ACCESS);
}

##################################################
# Footer
##################################################
sub footer {
  line();
  print "<h4></b>\n";
  print "Questions, comments or corrections?<br>\n";
  print "<ul>\n";
  print "  <li>  Back to <a href=\"http://GetDave.com/\">GetDave</a> or <a href=\"http://DaveSource.com/\">DaveSource</a>\n";
  print "  <li>  Klunky source available at <a href=\"http://MarginalHacks.com/\">Marginal Hacks</a>\n";
#  print "  <li>  More software at: <a href=\"http://www.allworldsoft.com/\">www.AllWorldSoft.com</a>,\n";
#  print "        <a href=\"http://www.freedownloadscenter.com/\">Free Downloads Center</a>\n";
  print "  <li>  ";
  access();
  print "  <li>  <a href=\"/${NAME}\">Non-cgi database browsing</a>\n";
  print "  <li>  <a href=\"/Site_Index/\">Site Index</a>\n"
    if (-f "Site_Index.html");
#  print "  <li>  <a href=\"${NAME}_all.cgi\">open everything</a>",
#        "  (so search engines see all the contents)\n"
#    unless ($SHOW_ALL);
  print "  <li>  <a href=\"${NAME}.cgi?show_folders=1\">Show All Folders</a>\n"
    unless ($QUERY{'show_folders'});
  print "</ul>\n";

  print "\n</body>\n</html>\n";
}

#########################
# Is this path supposed to be open?
#
# It is if any of the $QUERY{'opens'} paths contain this path
#
# If so, then we return all the paths in $QUERY{'opens'} that don't contain
# this path.  (This is for making the close href).
# -- Unless we are doing a search - then we don't actually want to
#    close the folders we select - we want to open them, since a search
#    doesn't really fully open the folders it shows.  In that case
#    we check the 'opens' and just return the path components
#
# If this is the last folder to close then the paths to return
# will be null - we avoid this problem by putting a colon at the
# begin of the return string no matter what (this also makes it
# easier to build  :).  This is stripped off after the call to check_open
#########################
sub check_open {
  my ($path)=@_;

  return 1 if ($SHOW_ALL || $QUERY{'show_folders'});

  my ($without)=":";
  my ($found)=0;

  return 0 if (!$QUERY{'opens'});

  foreach my $op (split(/:/,$QUERY{'opens'})) {
    if ($op =~ /^$path$/) {
      $found++;
    } else {
      $without.="$op:";
    }
  }
  return 0 unless ($found);
  # Strip off the last colon if it's not the only colon
  $without =~ s/:$// unless ($without eq ":");
  return $without;
}

#########################
# This adds all the components for a path so it opens
# (Links use this)
#########################
sub path_components {
  my ($path) = @_;
  my ($new,$tmp);
  # Figure out all the pieces that make up this path
  foreach my $piece ( split(/\//,$path) ) {
    $new .= ":${tmp}$piece";
    $tmp .= "$piece/";
  }
  return $new;
}
sub force_open {
  my ($force_path) = @_;

  my $new_string = path_components($force_path);

  # Add anything else from QUERY{'opens'}
  foreach my $path ( split(/:/,$QUERY{'opens'}) ) {
    $new_string.=":$path" if (!($force_path =~ /^${path}(\/.*)*$/));
  }
  return $new_string;
}

#########################
# Draw the T-lines and L-Lines
#########################
sub do_bars {
  my ($level)=@_;

  return unless ($level);

  my $i;
  for($i=0; $i<$level-1; $i++) {
    if ($ITEMS[$i]<0) {
      print "$NO_BAR";
    } else {
      print "$BAR";
    }
  }
  if (--$ITEMS[$level-1]<0) {
    print "$L_LINE";
  } else {
    print "$T_LINE";
  }
}

#########################
# Show a database link (actually now only used for =LINK)
#########################
sub show_link {
  my ($link_name,$path,$level)=@_;

  # Convert : to /
  $path =~ s|:|/|g;

  # Put the database name at the front
  $path="${NAME}/$path";

  do_bars($level);

  print "<a href=\"$SCRIPT?opens=".force_open($path)."\">\n";
  print "$CLOSED <i>$link_name</i></a> -&gt; $path<br>\n";
}

#########################
# Show a url
#########################
sub show_url {
  my($name,$url,$level)=@_;

  $name =~ s|($QUERY{'search'})|<b><font color=red>$1</b></font>|ig
    if ($QUERY{'search'});

  # URL uses \ instead of / so it can be a filename
  $url =~ s|\\|/|g;

  do_bars($level);

  print "<a target=${NAME}-ext href=\"$url\">";
  print "$LINK $name</a><br>\n";
}

#########################
# Show a file
#########################
sub show_file {
  my ($path,$file,$level)=@_;

  # Don't show ordering file
  return $ITEMS[$level-1]-- if ($file eq $ORDER_FILE);

  # This only partially works (screws up T_LINE and L_LINE)
  return if ($QUERY{'search'} && !($file =~ /$QUERY{'search'}/i));

  if ($file =~ /=LINK:/) {
    if ($file =~ /^(.*)=LINK:${NAME}:(.*)$/) {
      # There should be code for NOFOLLOW_MULTIOPEN here.. No big deal
      show_link($1,$2,$level)
    } else {
      # Try not to screw up the L_LINE (it'll happen if this is the last item)
      $ITEMS[$level-1]--;
    }
    return;
  }

  if ($file =~ /^(.*)=URL:(.*)$/) {
    show_url($1,$2,$level);
    return;
  }

  my ($file_string) = $file;
  $file_string =~ s|($QUERY{'search'})|<b><font color=red>$1</b></font>|ig
    if ($QUERY{'search'});

  do_bars($level);

  my $url = $file eq $DEFAULT_INDEX ? "$ROOT_URL/$path/" : "$ROOT_URL/$path/$file";
  print "<a target=$NAME href=\"$url\">";
  print "$DOC " if ($file eq $DEFAULT_INDEX);
  print "$file_string</a><br>\n";
}

#########################
# Sort files
#########################
sub sort_files {
  my $full_path=shift(@_);

  my %order;	# Arbitrary ordering file
  if (open(ORDER,"$full_path/$ORDER_FILE")) {
    while(<ORDER>) {
      chomp;
      s/#.*$//;  next if (/^\s*$/);	# Ignore comments, blank lines
      $order{$_}=$.;
    }
  }
  close(ORDER);

  # index.html goes first
  $order{$DEFAULT_INDEX}=-2 if (!$order{$DEFAULT_INDEX});
  $order{"README"}=-1 if (!$order{"README"});

  sort {
      return $order{$a} <=> $order{$b} if ($order{$a} && $order{$b});
      return -1 if ($order{$a});
      return 1 if ($order{$b});
      return $a cmp $b;
    }
    @_;
}

#########################
# Check if opens have conflicting paths
# Return 1 if we have two paths in the list that aren't parent/children
#########################
sub nofollow_multiopen {
  my ($opens) = @_;
  return 0 unless $NOFOLLOW_MULTIOPEN;
  return 0 unless $opens;
  my @opens = split(/:/,$opens);
  my $longest = $opens[0];
  foreach my $open ( @opens ) {
    ($open,$longest) = ($longest,$open) if length($open)>length($longest);
    return 1 unless $longest =~ /^$open/;
  }
  return 0;
}

#########################
# Show a directories contents
#########################
sub show_dir {
  my ($full_path,$level)=@_;
  my $i;

  my ($dir)=($full_path =~ /([^\/]+)$/);
  my $dir_string=$dir;
  $dir_string =~ s|($QUERY{'search'})|<b><font color=red>$1</b></font>|ig
    if ($QUERY{'search'});
  $dir_string =~ s|_| |g;

  # Don't follow symbolic links
  return if (-l $full_path && !$FOLLOW_SYMLINKS);

  #########################
  # Deal with the level bars
  #########################
#  for($i=0;$i<$level-1;$i++) {
#    print "$BAR";
#  }
#  print "$T_LINE" if ($level);

  my $new_paths = check_open($full_path);
  if ($new_paths) {
    #########################
    # This directory is open - show it and do show_dir again
    #########################
    do_bars($level);

    # Pull off the first colon
    $new_paths=substr($new_paths,1);
    my $opens = $new_paths eq "" ? "" : "?opens=$new_paths";
    my $href = "<a href=\"${SCRIPT}$opens\">";
    if (-f "${full_path}/$DEFAULT_INDEX") {
      print "$href\n";
      print "$OPEN</a>\n";
      print "<a target=$NAME href=\"$ROOT_URL/$full_path/\">$DOC</a>\n";
      print "$href\n";
      print "$dir_string</a><br>\n";
    } else {
      print "$href\n";
      print "$OPEN $dir_string</a><br>\n";
    }
    if (opendir(DIR,$full_path)) {
      my (@files)=sort_files($full_path, grep(!/^\.\.?$/,readdir(DIR)));
      closedir(DIR);
      $ITEMS[$level]=$#files;	# Number of items for this level
      my ($file);
      for($i=0; $i<=$#files; $i++) {
        if (-d "$full_path/$files[$i]") {
          show_dir("$full_path/$files[$i]",$level+1);
        } else {
          show_file($full_path,$files[$i],$level+1)
            unless ($QUERY{'show_folders'});
        }
      }
    } else {
      print "<b>ERROR:  Couldn't read directory: $dir</b><br>\n";
    }
  } else {
    #########################
    # This directory is closed
    #########################
    # This only partially works (screws up T_LINE and L_LINE)
    if ($full_path eq $MAIN_DIR ||
        !$QUERY{'search'} || ($full_path =~ /$QUERY{'search'}/i)) {

      do_bars($level);

      my $opens=($QUERY{'opens'}?"$QUERY{'opens'}:":"").${full_path};
      my $href = nofollow_multiopen($opens) ?
        "<a rel=nofollow href=\"$SCRIPT?opens=$opens\">" :
        "<a href=\"$SCRIPT?opens=$opens\">";
      if (-f "${full_path}/$DEFAULT_INDEX") {
        print "$href\n";
        print "$CLOSED\n";
        print "<a target=$NAME href=\"$ROOT_URL/$full_path/\">$DOC</a>\n";
        print "$href\n";
        print "$dir_string</a>\n";
        print "<br>\n";
      } else {
        print "$href\n";
        print "$CLOSED $dir_string</a><br>\n";
      }
    }
  }
}

##################################################
# Search code
##################################################
sub do_search {
  my ($full_path)=@_;
  # Don't follow symbolic links
  return if (-l $full_path && !$FOLLOW_SYMLINKS);

  #Only needed if someone puts 'opens=' and 'search=' into a URL
  #$QUERY{'opens'}.=":" unless ($QUERY{'opens'} =~ /:$/);

  my $show_me=0;

  # Open the directory if it matches?
  #my($dir)=($full_path =~ /([^\/]+)$/);
  #$show_me=1 if ($dir =~ /$QUERY{'search'}/i);

  if (opendir(DIR,$full_path)) {
    my @files=sort grep(!/^\.\.?$/,readdir(DIR));
    closedir(DIR);
    foreach my $file ( @files ) {
      # Ignore database links
      next if ($file =~ /^LINK:/);
      # Check if we contain the string
      $show_me++ if ($file =~ /$QUERY{'search'}/i);
      # If it's a directory, go down
      $show_me+= do_search("$full_path/$file") if (-d "$full_path/$file");
    }
    $QUERY{'opens'}.="${full_path}:" if ($show_me);
  }
  return $show_me;
}

##################################################
# Main code
##################################################
sub main {
  cgi_header();

  print "<body bgcolor=FFFFFF link=0000FF alink=00FF00 vlink=0000FF>\n";

  parse_query() if (!$SHOW_ALL);

  header();

  my $search_success=do_search("$MAIN_DIR")
    if ($QUERY{'search'});

  print "<center><h3>No results found for search: $QUERY{'search'}</h3></center><p>\n"
    if ($QUERY{'search'} && !$search_success);

  show_dir("$MAIN_DIR",0);

  # Show the description if everything is closed
  # (What if people link to it with an open string and the directories
  #  change?  - I guess they'll just get a closed database..)
  if (!$QUERY{'opens'} && !$search_success && !$QUERY{'show_folders'}) {
    if (open(DESC,"${ROOT}/${NAME}.desc")) {
      while(<DESC>) {print;} close(DESC);
    }

  } else {
    # Close all - only show if something is open
    print "<p><a href=\"$SCRIPT\">Close</a> all directories<br>\n";
  }

  footer();
} main();
