#!/usr/bin/perl
# Filename:	dus
# Author:	David Ljung Madison <DaveSource.com>
# See License:	http://MarginalHacks.com/License
# Description:	Disk Usage Sorter
use strict;

##################################################
# Setup the variables
##################################################
my $PROGNAME = $0;
$PROGNAME =~ s|.*/||;

# Ignore files below this size (k)
my $IGNORE_SIZE	= 40;
# Ignore files this much lower than the top amount
my $IGNORE_PERCENT = 20;

my $DU_ARGS	= "-x";
#my $DU_ARGS	= "-dk";

my $DU		= "/usr/bin/du";

my $LEVELS	= 3;

##################################################
# Usage
##################################################
sub usage {
  my $msg;
  foreach $msg (@_) { print "ERROR:  $msg\n"; }
  print "\n";
  print "Usage:\t$PROGNAME [-d] <path>\n";
  print "\tDisk Usage Sorter\n";
  print "\t-d         Sort by path depth\n";
  print "\t-l <num>   Depth to display\n";
  print "\n";
  exit -1;
}

sub parse_args {
  my $dir;
  my $by_depth=0;
  my $arg;
  while ($#ARGV>=0) {
    $arg=shift(@ARGV);
    if ($arg =~ /^-h$/) { usage(); }
    if ($arg =~ /^-d$/) { $by_depth=1; next; }
    if ($arg =~ /^-l(evels)?(=(.+))?$/) { $LEVELS=$3 ? $3 : shift(@ARGV); next; }
    if ($arg =~ /^-/) { usage("Unknown option: $arg"); }
    usage("Too many paths specified [$arg and $dir]") if $dir;
    $dir=$arg;
  }

  $by_depth=1 if ($LEVELS);

  $dir=$dir || ".";

  #usage("Couldn't find directory: [$dir]") unless -d $dir;

  # Cheap way to get real path
  my $pwd=`pwd`; chomp($pwd);
  chdir($dir) || usage("Couldn't find [$dir]");
  $dir=`pwd`;  chomp($dir);
  chdir($pwd);

  # Out file
  my $out=$dir;
  $out =~ s|.*/||;
  $out = $out ? "$out.usage" : "usage";

  ($dir,$out,$by_depth);
}

sub shell_safe {
  my ($s) = @_;
  $s =~ s/(\s|\\|\&|\'|\")/\\$1/g;
  $s;
}

sub filesystem {
  my ($path) = @_;
  my $fs=`df $path | tail -1 | cut -d' ' -f 1`;
  chomp($fs);
  $fs;
}

sub show_hash(%) {
  my (%levels) = @_;

  my @paths = keys %levels;
  return print OUT "[no values greater than ${IGNORE_SIZE}k]\n\n"
    unless (@paths);
  @paths = sort { $levels{$b} <=> $levels{$a} } @paths;
  my $ignore_below = $levels{$paths[0]} * $IGNORE_PERCENT / 100;
  my $p;
  foreach $p ( @paths ) {
    last if ($levels{$p} < $ignore_below);
    printf OUT "%-10d %s\n",$levels{$p},$p;
  }
  print OUT "\n";
}

sub show_level($@) {
  my ($level, @levels) = @_;

  print OUT "Level $level Directories\n";
  print OUT "-------------------\n";
  show_hash(%{$levels[$level]});
}

##################################################
# Main code
##################################################
sub main {
  my ($dir,$out,$by_depth) = parse_args();

  my $dir_fs=filesystem(shell_safe($dir));

  # Level 0
  print STDERR "$PROGNAME:  ";

  opendir(DIR,$dir) || usage("Couldn't opendir [$dir]??");
  my @contents=grep(!/^\.{1,2}$/,readdir(DIR));
  closedir(DIR);

  my (@level,@level_valid,%files);

  my $path;
  foreach $path ( @contents ) {
    $path="$dir/$path";
    my $safe_path = shell_safe($path);
    if (-d $path && filesystem($safe_path) eq $dir_fs) {
      open(DU,"$DU $DU_ARGS $safe_path|") || die("Couldn't open pipe to $DU\n");
      while(<DU>) {
        if (/^(\d+)\s+(\S.+)/) {
          my ($k,$p) = ($1,$2);
          next if ($k < $IGNORE_SIZE);
          $p =~ s/^$dir//;
          $p =~ s|^/||;
          if ($by_depth) {
            my $lvl;
            foreach $lvl ( reverse 0..($LEVELS+1) ) {
              my $re = "/.*" x $lvl;
              next if ($re && $p !~ /$re/);
              $level[$lvl]{$p}=$k unless ($lvl == $LEVELS+1);
              $level_valid[$lvl]=1;
              last;
            }
          } else {
            $files{$p}=$k;
          }
          
        }
      }
      close(DU);
      print STDERR ".";
    }
  }
  print STDERR "\n";

  print "Writing output [$out]\n";
  open(OUT,">$out") || usage("Couldn't write output file [$out]");
  if ($by_depth) {
    foreach ( 0..$LEVELS ) {
      show_level($_,@level) if ($level_valid[$_]);
    }
  } else {
    show_hash(%files);
  }
  close(OUT);

}
main();

