#!/usr/bin/perl
# Filename:	submit
# Author:	David Ljung Madison <DaveSource.com>
# See License:	http://MarginalHacks.com/License
# Description:	Attempts to submit URLs to a bunch of search engines
#		Very similar to 'SimpleSubmit.com'
use strict;

##################################################
# Setup the variables
##################################################
my $PROGNAME = $0;
$PROGNAME =~ s|.*/||;

# Replacements allowed in $EMAIL and @SITES information:
#  %s = Name of search site [i.e., Hotbot.com]
#  %u = Url we are submitting [i.e., http://DaveSource.com]
#  %e = Your email (required for some sites)
my $EMAIL;
my @SITES = (
	#[ "Search Engine",
	#  "Add URL",
	#  "Success string",
	#  "Failure string", "failure string", ...
	#],
	[ "Lycos.com",
	  "http://www.lycos.com/cgi-bin/spider_now.pl?query=%u&email=%e",
	  "successfully spidered your page",
	  "unable to spider your page",
	],
	[ "Hotbot.com",
	  "http://hotbot.lycos.com/addurl.asp?MM=1&success_page=http://hotbot.lycos.com/addurl.asp&failure_page=http://hotbot.lycos.com/help/oops.asp&ACTION=subscribe&SOURCE=hotbot&ip=156.1.60.60&redirect=http://hotbot.lycos.com/addurl2.html&newurl=%u&email=%e&send=Submit+my+site",
	  "Got it.*Your Web site will be added",
	],
	[ "Google.com",
	  "http://www.google.com/addurl?dq=&q=%u",
	  "successfully added to google",
	  "error submitting",
	],
	[ "AltaVista.com",
	  "http://add-url.altavista.com/cgi-bin/newurl?ad=1&q=%u",
	  "Your URL will be submitted for processing",
	  "not seem to be a valid URL",
	  "could not connect to the host",
	],
	[ "Go.com",
	  "http://www.go.com/AddUrl/AddingURL?sv=IS&lk=noframes&url=%u&CAT=Add%2FUpdate+Site",
	  "successfully submitted",
	],
	[ "WhatUSeek.com",
	  "http://add.whatuseek.com/cgi-bin/addurl.cgi?submit=Add+This+URL&url=%u&email=%e",
	  "Thank you for submitting your site",
	  "Invalid URL",
	],
	[ "Excite.com",
	  "http://www.excite.com/cgi/add_url.cgi?look=excite&url=%u&email=%e",
	  "site has been submitted",
	],
	[ "NorthernLight.com",
	  "http://urls.northernlight.com/cgi-bin/urlsubmit.pl?page=%u&email=%e&contact=%e",
	  "Thank you for your URL",
	  "did not understand your URL",
	],
	[ "MixCat.com",
	  "http://www.mixcat.com/cgi-bin/search.cgi?Action=AddURL&Mode=AnonAdd&Realm=Mixcat+Search&URL=%u",
	  "this URL added",
	  "Error",
	],
	[ "InfoMak.com",
	  "http://url.infomak.com/index.html?url=%u&email=%e",
	  "Your URL has been added",
	],
	# Doesn't work - requires GET method  :(
	#[ "AllTheWeb.com",
	#  "http://www.ussc.alltheweb.com/add_url.php3?Submit=%u&email=%e",
	#  "has been scheduled for addition",
	#],
	);

##################################################
# Usage
##################################################
sub usage {
  my $msg;
  foreach $msg (@_) { print "ERROR:  $msg\n"; }
  print "\n";
  print "Usage:\t$PROGNAME [-d] <email> <url> ...\n";
  print "\tAttempts to submit URLs to a bunch of search engines\n";
  print "\n";
  print "\tYour email is required.\n"  unless ($EMAIL);
  print "\t%s in your email will be replaced with the search engine name.\n";
  print "\n";
  print "\tSuccess/failure is printed for each search engine.\n";
  print "\tSuccess only means the engine accepted the URL,\n";
  print "\tnot that it is necessarily valid or will be indexed.\n";
  #print "\t-d\tSet debug mode\n";
  print "\n";
  exit -1;
}

sub parse_args {
  my @urls;
  my $arg;
  while ($#ARGV>=0) {
    $arg=shift(@ARGV);
    if ($arg =~ /^-h$/) { usage(); }
    if ($arg =~ /^-d$/) { $MAIN::DEBUG=1; next; }
    if ($arg =~ /\@/ && !$EMAIL) { $EMAIL=$arg; next; }
    if ($arg =~ /^-/) { usage("Unknown option: $arg"); }
    push(@urls,$arg);
  }
  usage("No email specified") unless ($EMAIL);
  usage("No urls specified") unless (@urls);

  @urls;
}

##################################################
# Submit a url
##################################################
sub submit {
  my ($url, $search, $search_url, $pass, @fail) = @_;

  # Replacements
  $search_url =~ s/%e/$EMAIL/g;
  $search_url =~ s/%s/$search/g;
  $search_url =~ s/%u/$url/g;

  print STDERR "Submit url to $search [$url]:\n  $search_url\n" if ($MAIN::DEBUG);
  open(SUBMIT,"lynx -source '$search_url' |") ||
    die("[$PROGNAME] Couldn't run lynx [$search]\n");
  while(<SUBMIT>) {
    return close(SUBMIT), print "[$PROGNAME]  Success:  $search  [$url]\n"
      if ($pass && /$pass/i);
    foreach my $fail ( @fail ) {
      return close(SUBMIT), print "[$PROGNAME]  Failure:  $search  [$url]\n"
        if (/$fail/i);
    }
    #print "-> $_" if ($MAIN::DEBUG);
  }
  close(SUBMIT);
  print "[$PROGNAME]  Unknown results:  $search  [$url]\n";
}

sub clean_url {
  my ($url) = @_;
  $url =~ s|^http://||;
  "http://$url";
}

##################################################
# Main code
##################################################
sub main {
  foreach my $url ( parse_args() ) {
    foreach my $site_L ( @SITES ) {
      submit(clean_url($url), @$site_L);
    }
  }
}
main();

