#!/usr/bin/perl
# Filename:	xml_parser
# Author:	David Ljung Madison <DaveSource.com>
# See License:	http://MarginalHacks.com/License
# Description:	parses xml
use strict;

##################################################
# Setup the variables
##################################################
my $PROGNAME = $0;
$PROGNAME =~ s|.*/||;

##################################################
# Usage
##################################################
sub usage {
  my $msg;
  foreach $msg (@_) { print "ERROR:  $msg\n"; }
  print "\n";
  print "Usage:\t$PROGNAME [-d] <file>\n";
  print "\tDoes something to the given file\n";
  print "\t-d\tSet debug mode\n";
  print "\n";
  exit -1;
}

sub parse_args {
  my $file;
  while ($#ARGV>=0) {
    my $arg=shift(@ARGV);
    if ($arg =~ /^-h$/) { usage(); }
    if ($arg =~ /^-d$/) { $MAIN::DEBUG=1; next; }
    if ($arg =~ /^-/) { usage("Unknown option: $arg"); }
    usage("Too many files specified [$arg and $file]") if (defined($file));
    $file=$arg;
  }
  usage("No file defined") if (!defined($file));

  $file;
}

##################################################
# Utilities
##################################################
sub count_nl {
  my ($str) = @_;
  $str =~ tr/\n/\n/;
}

sub parse_tag_attr {
  my ($line,$str) = @_;

  my %attr;
  my $tag;

  $str =~ s/^\s*//;  $str =~ s/\s*$//;
  my ($name,@tag_attr) = split(/\s+/,$str);
  $tag = $name;
  foreach my $ta ( @tag_attr ) {
    if ($ta =~ /^([^=]+)=([^=]+)$/) {
      my ($k,$v) = ($1,$2);
      $v = $1 if $v =~ /^'(.*)'$/;
      $v = $1 if $v =~ /^"(.*)"$/;
      $attr{$k} = $v;
    } else {
      die("[line $line] Tag argument [$ta] should be of the form [key=value]\n");
    }
  }
  ($tag,\%attr);
}

##################################################
# Parse xml where data and tags are mixed (data can
# occur in non-leaf tags
##################################################
sub add_data {
  my ($arr,$data) = @_;
  return if $data =~ /^\s*$/;
  $data =~ s/^\s*//;
  $data =~ s/\s*$//;
  push(@$arr,$data);
}

my $line = 1;
sub parse_xml{
  my ($str) = @_;

  my @xml;
  # Parse xml into data structure.
  # Each <tag>...</tag> gets pushed onto an array as a hash.
  #  Example:   <image name=bob size=big>.....</image>
  #  $tag{tag} = "image";
  #  $tag{attr} = HASH: ( name => bob, size => big );
  #  $tag{data} = array of all tags/data inside the tags
  my ($data,$tag_str);
  while ($str) {
    if ($str =~ /^([^<]*)<!--(.*)$/s) {
      # Ignore comments
      ($data,$str) = ($1,$2);

      # Push any data on from before the comment
      add_data(\@xml,$data);
      $line += count_nl($data);

      die("[line $line] Couldn't find end of comment\n")
        unless ($str =~ s/(.*-->)//s);
      $line += count_nl($1);

    } elsif ($str =~ /^([^<]*)<([^>]+)>(.*)$/s) {
      # Deal with tags
      ($data,$tag_str,$str) = ($1,$2,$3);

      # Push any data on from before the tag
      add_data(\@xml,$data);

      # Line number
      $line += count_nl($data)+count_nl($tag_str);

      # Handle tag attr
      my %tag;
      ($tag{tag},$tag{attr}) = parse_tag_attr($line,$tag_str);

      # Find the end tag and parse everything in between
      die("[line $line] Couldn't find end of tag <$tag{tag}>\n")
        unless ($str =~ /^(.*)<\/$tag{tag}>(.*)$/s);
      ($data,$str) = ($1,$2);
      $tag{data} = parse_xml($data,$line);

      push(@xml,\%tag);

    } else {
      # No tags left, just push data
      add_data(\@xml,$str);
      $line += count_nl($str);
      undef $str;
    }
  }

  \@xml;
}

##################################################
# Display (for debug)
##################################################
sub display_xml{
  my ($xml,$lvl) = @_;
  # Debug - print it out
  foreach my $data ( @$xml ) {
    if (ref $data eq "HASH") {
      print "  "x$lvl,"<$data->{tag}>\n";
      print "  "x$lvl."  ATTR: ".join(",",map("${_}->$data->{attr}{$_}", keys %{$data->{attr}}))."\n"
        if ($data->{attr} && %{$data->{attr}});
      display_xml($data->{data},$lvl+1);
      print "  "x$lvl,"</$data->{tag}>\n";
    } else {
      print "  "x$lvl,"[$data]\n";
    }
  }
}

##################################################
# Main code
##################################################
sub main {
  my $xml = parse_args();

  open(XML,$xml) || usage("Couldn't open xml: $xml");
  my @xml = <XML>;
  close(XML);
  my $xml = join('',@xml);

  display_xml( parse_xml($xml) );
}
main();
