#!/usr/bin/perl
# Filename:     xags
# Author:       David Ljung Madison <DaveSource.com>
my $VERSION =   1.03;
# See License:  http://MarginalHacks.com/License
# Description:  GNU xargs 4.1 replacement.  See version()
# Written:      In 8 inspired hours on 7/5/2001
use strict;

##################################################
# Setup the variables
##################################################
my $PROGNAME = $0;
$PROGNAME =~ s|.*/||;

##################################################
# Usage
##################################################
sub version {
	my $v = sprintf("%4.2f",$VERSION);
	print <<END_VERSION;

xags version $v

Author:   David Ljung Madison <DaveSource.com>
License:  http://MarginalHacks.com/License
          (This program is *not* a derivative work,
           I have not viewed/used/modified any of the GNU xargs source)

Black-box rewrite of GNU xargs version 4.1 except:
 0)  --version is different, obviously
 1)  Can properly shell quote input lines
 2)  Can handle "-i -n"
 3)  Additional option:  -L/--nosplit-lines
 4)  --max-chars is only bounded by memory (instead of 20k) if not set
 5)  Properly exits if size exceeded *only* when --exit specified
 6)  Poorly distinguishes between exit 126 and exit 127
 7)  Starts commands as soon as it's read enough input
     (as opposed to getting all of stdin first)
 8)  Ordering of options doesn't matter

END_VERSION
	exit 0;
}

sub usage {
	my $msg;

	foreach $msg (@_) { print STDERR "$PROGNAME: $msg\n"; }

	my $usage =<<END_USAGE;
Usage: $PROGNAME [-0prtx] [-e[eof-str]] [-i[replace-str]] [-l[max-lines]]
			 [-n max-args] [-s max-chars] [-P max-procs] [--null] [--eof[=eof-str]]
			 [--replace[=replace-str]] [--max-lines[=max-lines]] [--interactive]
			 [--max-chars=max-chars] [--verbose] [--exit] [--max-procs=max-procs]
			 [--max-args=max-args] [--no-run-if-empty] [--version] [--help]
			 [-L] [--nosplit-lines]
			 [command [initial-arguments]]
END_USAGE

	# Stupid xargs prints to STDERR,STDOUT depending on whether there's an error
	if (@_) {
		print STDERR $usage;
		exit 1;
	} else {
		print STDOUT $usage;
		exit 0;
	}
}

sub parse_args {
	my ($cmd,%opt);
	$opt{eof} = "_";	# Stupid xargs "feature"
	while ($#ARGV>=0) {
		my $arg=shift(@ARGV);
		# GNU xargs v4.1 options
		if ($arg =~ /^-(h|-help)$/) { usage(); }
		if ($arg =~ /^-(0|-null)$/) { $opt{null}=1; next; }
		if ($arg =~ /^-(e|-eof=?)(.*)$/) { $opt{eof}=$2; next; }
		if ($arg =~ /^-(i|-replace=?)(.+)?$/) { $opt{replace}=defined $2 ? $2 : "{}"; next; }
		if ($arg =~ /^-(l|-max-lines=?)(.+)?$/) { $opt{maxlines}=defined $2 ? $2 : 1; next; }
		if ($arg =~ /^-(n|-max-args=)(.+)?$/) { $opt{maxargs}=defined $2 ? $2 : shift @ARGV; next; }
		if ($arg =~ /^-(p|-interactive)$/) { $opt{interactive}=1; next; }
		if ($arg =~ /^-(r|-no-run-if-empty)$/) { $opt{norunifempty}=1; next; }
		if ($arg =~ /^-(s|-max-chars=)(.+)$/) { $opt{maxchars}=defined $2 ? $2 : shift @ARGV; next; }
		if ($arg =~ /^-(t|-verbose)$/) { $opt{verbose}=1; next; }
		if ($arg =~ /^--version$/) { version(); }
		if ($arg =~ /^-(x|-exit)$/) { $opt{exit}=1; next; }
		if ($arg =~ /^-(P|-max-procs=(.+))$/) { $opt{maxprocs}=defined $2 ? $2 : shift @ARGV; next; }

		# Additional options
		if ($arg =~ /^-(L|-nosplit-lines)$/) { $opt{nosplitlines}=1; next; }

		if ($arg =~ /^--/) { usage("unrecognized option \`$arg'"); }
		if ($arg =~ /^-(.)?/) { usage("invalid option -- $1"); }
		$cmd = join(" ",$arg,@ARGV);  undef @ARGV;
	}

	# Implications:

	# --replace implies --exit and --max-lines 1
	if (defined $opt{replace}) {
		$opt{nosplitlines}=1;
		$opt{maxlines}=1 unless $opt{maxargs};
	}

	# And maxlines implies exit
	$opt{exit}=1 if $opt{maxlines};

	# And interactive implies verbose
	$opt{verbose}=1 if $opt{interactive};

	# Default command
	$cmd = $cmd || "/bin/echo";

	# Error compatible
	if ($opt{maxchars} && $opt{maxchars} <= length $cmd) {
		print STDERR "$PROGNAME: can not fit single argument within argument list size limit\n";
		exit 1;
	}
	usage("value for -n option must be >= 1")
		if (defined $opt{maxargs} && $opt{maxargs}<1);
	usage("value for -l option must be >= 1")
		if (defined $opt{maxlines} && $opt{maxlines}<1);

	($cmd,\%opt);
}

##################################################
# Command code
##################################################
# Properly build the command array.
# Conceivably I could have made this easier with using a command string
# and perls \Q\E capabilities instead.
sub make_cmd {
	my ($opt,$cmd,@xag) = @_;

	my @cmd = split(/\s+/,$cmd);

	# Replace args in command if --replace
	if (defined $opt->{replace}) {
		my @copy = @cmd;
		undef @cmd;
		my $append=0;
		foreach ( @copy ) {
			if ($_ eq $opt->{replace}) {
				push(@cmd,@xag);
				next;
			}
			my @tmp = split($opt->{replace},$_);
			push(@cmd,shift @tmp);
			foreach (@tmp) {
				my @tmpxag = @xag;
				$tmpxag[-1] .= $_;
				$cmd[-1] .= shift @tmpxag;
				push(@cmd, @tmpxag) if @tmpxag;
			}
		}
	} else {
		push(@cmd, @xag);
	}

	@cmd;
}

sub cmd_len {
	my ($opt,$cmd,@xag) = @_;
	my @cmd = make_cmd($opt,$cmd,@xag);
	my $cmd = join(" ",@cmd);
	length $cmd;
}

my @pids;
sub do_cmd {
	my ($opt,$cmd,@xag) = @_;
	return unless @xag;

	my @leftover;
	if ($opt->{maxchars}) {
		while (@xag && $opt->{maxchars} <= cmd_len($opt,$cmd,@xag)) {
			unshift(@leftover,pop(@xag));
		}
		# Not bug compatible with GNU xargs:
		#push(@xag,shift(@leftover)) if (!@xag && !$opt->{exit});
		unless (@xag) {
			print STDERR "$PROGNAME: argument line too long\n";
			exit 1;
		}
	}

	my @cmd = make_cmd($opt,$cmd,@xag);

	print STDERR "@cmd" if ($opt->{verbose});

	# Prompt?
	if ($opt->{interactive}) {
		print STDERR "?...";
		$_ = <PROMPT>;
		return do_cmd($opt,$cmd,@leftover) unless /^y/;
	} elsif ($opt->{verbose}) {
		print STDERR " \n";	# For reasons unclear, xargs puts in an extra " "
	}

	my $pid = fork;
	die("[$PROGNAME] Couldn't fork a new process") if ($pid == -1);
	unless ($pid) {
		exec @cmd;
		# Cheat on error returned - guess as to 126/127, and technically
		# this would be indistinguishable from a program that returns the same
		exit(126) if ($! =~ /permission denied/i);
		exit(127);	# Cheat..
	}
	push(@pids,$pid);
	while ($#pids+1 >= $opt->{maxprocs}) {
		$pid = wait;
		last if ($pid == -1);
		my $ret  = $? >> 8;
		my $int  = $? & 127;
		my $core = $? & 128;
		if ($ret==255) {
			# 124 if the command exited with status 255
			print STDERR "$PROGNAME: $cmd[0]: exited with status 255; aborting\n";
			exit 124;
		}
		if ($int) {
			# 125 if the command is killed by a signal
			print STDERR "$PROGNAME: $cmd[0]: terminated by signal $?\n";
			exit 125;
		}
		if ($ret==126) {
			#(cheat)	#126 if the command cannot be run
			print STDERR "$PROGNAME: $cmd[0]: Permission denied\n";
			exit 126;
		}
		if ($ret==127) {
			#(cheat)	#127 if the command is not found
			print STDERR "$PROGNAME: $cmd[0]: No such file or directory\n";
			exit 127;
		}

		# 0 if it succeeds
		if ($?) {
			if ($ret>=1&&$ret<=125) {
				# 123 if any invocation of the command exited with status 1-125
				$opt->{EXIT}=123;
			} else {
				# 1 if some other error occurred.
				$opt->{EXIT}=1 unless $opt->{EXIT};
			}
		} else {
			$opt->{EXIT}=0 unless $opt->{EXIT};
		}

		@pids = grep($_ != $pid, @pids);
	}

	# Leftover?
	do_cmd($opt,$cmd,@leftover) if @leftover;
	
}

##################################################
# Main code
##################################################
sub main {
	my ($cmd,$opt) = parse_args();

	$/ = "\0" if $opt->{null};

	if ($opt->{interactive}) {
		usage("Can't do --interactive unless /dev/tty is available")
			unless -r "/dev/tty";
		open(PROMPT,"</dev/tty")
			|| die("Couldn't read /dev/tty for --interactive!\n");
	}

	my @xag;
	my $lines = 0;
	while (<STDIN>) {
		chomp unless $opt->{null};

		# If we are going by lines, then ignore blanks
		next if (/^\s*$/ && !$opt->{maxchars} && !$opt->{maxargs});

		$lines++;

		# Look for --eof
		last if (defined $opt->{eof} && /^\s*$opt->{eof}\s*$/);

		# xargs splits up according by args, regardless of whether we are doing
		# args, lines or chars, so we can quote space.  Annoying.
		my @args = ($opt->{nosplitlines} || $opt->{null}) ? ($_) : split(/(\s+)/);
		# Ignore leading space
		shift @args, shift @args if ($args[0] eq "");

		my ($quote,$quoted);
		while (@args) {
			my ($arg,$space) = (shift @args, shift @args);
			# Are we in a quote?
			if ($quote) {
				# Are we ending the quote?
				if ($arg =~ s/([^$quote]*)$quote(.*)//) {
					$quoted .= $1;
					# We may be starting a new quote
					unshift(@args,$2,$space);
					$quote = 0;
					next;
				}
				# Still in quotes
				$quoted .= $arg.$space;
				next;
			}

			# Are we starting a quote?  (-0 doesn't do quotes)
			if (!$opt->{null} && $arg =~ s/([^"']*)(["'])(.*)//) {
				$quoted .= $1;
				$quote = $2;
				# We may be ending the quote this word..
				unshift(@args,$3,$space);
				next;
			}

			# Not in quotes, not starting quotes (may have just ended them, though)
			push(@xag, $quoted.$arg);
			$quoted = "";


			# Trailing blank lines (but only if --max-lines set!?) will
			# logically continue
			if ($opt->{maxlines} && /\s$/) {
				# This should arguably go before quoting, but we want
				# to be bug compatible with xargs  :(
				$lines--;
				next;
			}

			# Have we collected enough args for maxargs?
			if ( ($opt->{maxargs}  && ($#xag+1 >= $opt->{maxargs})) ||
					 ($opt->{maxchars} && ($opt->{maxchars}-2 <= cmd_len($opt,$cmd,@xag))) ) {
				do_cmd($opt,$cmd,@xag);
				undef @xag;
				$lines = 0;
			}
		}

		# Error compatible, even if I don't like it
		die("[$PROGNAME] unmatched double quote\n") if $quote eq '"';
		die("[$PROGNAME] unmatched single quote\n") if $quote eq "\'";
		die("[$PROGNAME] leftover quote? $quoted\n") if $quoted;

		# Have we collected enough lines for maxlines?
		if ($opt->{maxlines}  && $lines >= $opt->{maxlines}) {
			do_cmd($opt,$cmd,@xag);
			undef @xag;
			$lines = 0;
		}

	}

	# Do at least one command unless --no-run-if-empty
	push(@xag,"") if (!defined $opt->{EXIT} && !@xag && !$opt->{norunifempty});

	# Do any args leftover
	do_cmd($opt,$cmd,@xag) if (@xag);

	close PROMPT if $opt->{interactive};

	# Finish waiting for children
	while (@pids) {
		my $pid = wait;
		last if ($pid == -1);
		@pids = grep($_ != $pid, @pids);
	}

	# Exit code
	exit($opt->{EXIT});
}
main();
