#!/usr/bin/perl
# Filename: html2jpg
# Author: David Ljung Madison
my $VERSION= 1.00;
# See License: http://MarginalHacks.com/License
# Description: Takes a screenshot of an HTML page (uses opera)
use strict;
##################################################
# Setup the variables
##################################################
my $PROGNAME = $0;
$PROGNAME =~ s|.*/||;
my $GRAB = 'xwd -silent -nobdrs -id %id | convert -quality 85 - %out';
my $XINFO = 'xwininfo -tree -root';
my $BROWSER = 'opera'; # Required by find_window() - see usage()
# Default snapshot type (based off html2)
my $TYPE = ($PROGNAME =~ /2(.+)$/) ? $1 : "jpg";
##################################################
# Usage
##################################################
sub usage {
foreach my $msg (@_) { print STDERR "ERROR: $msg\n"; }
print STDERR <
Takes a screenshot of an HTML page and saves to an image
-o postfix determines image type [$PROGNAME.$TYPE]
-s Time to sleep before window dump
-g Browser geometry
-d Set debug mode
Kludges:
- We don't know when the page is finished loading
- Rule of thumb for finding subwindow is guesswork!
If you're getting the wrong window, comment out call to subwindow()
- Browser geometry ignored if browser doesn't create a new window
(Such as opera in "mdi" mode)
- Dependent on xwd command and output of xwininfo and ...
- Browser can't be iconified or partially off-screen
- Only gets portion of html displayed in browser
- Requires "opera" browser - update find_window() for other browsers
- Opens up a bunch o' windows in your browser and leaves them there
Examples:
% $PROGNAME MarginalHacks.com -o MH.gif # Snapshot of MarginalHacks
% $PROGNAME GetDave.com -o gif:- | xv - # Pipe output, type gif
Heck - I just needed something to automate screenshots of HTML output.
Ask to add '-remote SaveAsImage(file)' if you don't like it. :)
Author: David Ljung Madison
License: http://MarginalHacks.com/License
Please see! http://MarginalHacks.com/Pay
USAGE
exit -1;
}
sub version {
print "\n";
printf "This is $PROGNAME version %4.2f\n",$VERSION;
print "\n";
print "Copyright (c) 2002 David Ljung Madison \n";
print "\n";
exit -1;
}
sub parse_args {
my $url;
my ($out,$sleep,$geom) = ("$PROGNAME.$TYPE",5,"800x600");
while (my $arg=shift(@ARGV)) {
if ($arg =~ /^-h$/) { usage(); }
if ($arg =~ /^-v$/) { version(); }
if ($arg =~ /^-d$/) { $MAIN::DEBUG=1; next; }
if ($arg =~ /^-o$/) { $out = shift(@ARGV); next; }
if ($arg =~ /^-s$/) { $sleep = shift(@ARGV); next; }
if ($arg =~ /^-g$/) { $geom = shift(@ARGV); next; }
if ($arg =~ /^-.+/) { usage("Unknown option: $arg"); }
usage("Too many URLs specified [$arg and $url]") if defined $url;
$url=$arg;
}
usage("No URLs specified!") unless $url;
($out,$url,$sleep,$geom);
}
sub debug {
return unless $MAIN::DEBUG;
foreach my $msg (@_) { print STDERR "[$PROGNAME] $msg\n"; }
}
##################################################
# Main code
##################################################
sub load {
my ($url,$geom) = @_;
# -newwindow doesn't seem to work on my MDI opera setup
system("$BROWSER -geometry $geom -remote \'openURL($url,new-window)\'");
}
# Geometry regexp for xwininfo (saves x and y and offset)
my $GEOM_RE = '\s(\d+)x(\d+)\+(\-?\d+)\+(\-?\d+)\s+\+\-?\d+\+\-?\d+$';
# Find smallest subwindow that is at least 80%
# (We're trying to get rid of the scrollbars, menubars, etc...)
sub subwindow {
my ($spacing,$window,$x,$y) = @_;
my $smallest = $x*$y;
$x*=.8;
$y*=.8;
while () {
# We're done traversing this window's tree when we find a new window
# with the same or less spacing (or if we run out of xinfo).
return $window if (/^(\s+)0x[0-9a-f]+/ && length($1)<=length($spacing));
if (/^\s+(0x[0-9a-f]+).*$GEOM_RE/ &&
# $4>=0 && $5>=0 && # Only look for positive offset windows?
$2>=$x && $3>=$y && $2*$3<$smallest) {
$window = $1;
$smallest = $2*$3;
debug("Smaller subwindow: $window ${2}x$3",$_);
}
}
return $window;
}
sub find_window {
open(XINFO,"$XINFO|") || die("Couldn't run: [$XINFO]\n");
# Find the opera window (and the current title of the top window)
my ($spacing,$title,$x,$y);
while() {
# This could easily break and is very opera specific (works on 6.03)
last if (($spacing,$title,$x,$y) = (/^(\s+)0x[0-9a-f]+ "Opera .*\[(.+)\]": \("opera" "opera"\)\s*$GEOM_RE$/));
}
die("Couldn't find window [Opera] in [$XINFO]\n") unless $title && $x && $y;
# Now find the subwindow with the same title
my $window;
while () {
die("Couldn't find subwindow [$title] in Opera windows:\n[$XINFO]\n")
if (/^(\s+)0x[0-9a-f]+/ && length($1)<=length($spacing));
if (/^(\s+)(0x[0-9a-f]+)\s+"$title".*$GEOM_RE/) {
($spacing,$window,$x,$y) = ($1,$2,$3,$4);
last;
}
}
die("Couldn't find subwindow [$title] in Opera windows:\n[$XINFO]\n")
unless $window;
debug("Found: $window [$title] ${x}x$y");
$window = subwindow($spacing,$window,$x,$y);
debug("Final window: $window");
close XINFO;
$window;
}
sub grab {
my ($id,$out) = @_;
my $grab = $GRAB;
$grab =~ s/%id/$id/g;
$grab =~ s/%out/$out/g;
system("$grab");
die("Trouble with grab [$?]:\n $grab\n") if $?;
}
sub main {
my ($out,$url,$sleep,$geom) = parse_args();
load($url,$geom);
sleep($sleep);
my $window = find_window();
grab($window,$out);
}
main();