#!/usr/bin/perl # Filename: html2jpg # Author: David Ljung Madison my $VERSION= 1.00; # See License: http://MarginalHacks.com/License # Description: Takes a screenshot of an HTML page (uses opera) use strict; ################################################## # Setup the variables ################################################## my $PROGNAME = $0; $PROGNAME =~ s|.*/||; my $GRAB = 'xwd -silent -nobdrs -id %id | convert -quality 85 - %out'; my $XINFO = 'xwininfo -tree -root'; my $BROWSER = 'opera'; # Required by find_window() - see usage() # Default snapshot type (based off html2) my $TYPE = ($PROGNAME =~ /2(.+)$/) ? $1 : "jpg"; ################################################## # Usage ################################################## sub usage { foreach my $msg (@_) { print STDERR "ERROR: $msg\n"; } print STDERR < Takes a screenshot of an HTML page and saves to an image -o postfix determines image type [$PROGNAME.$TYPE] -s Time to sleep before window dump -g Browser geometry -d Set debug mode Kludges: - We don't know when the page is finished loading - Rule of thumb for finding subwindow is guesswork! If you're getting the wrong window, comment out call to subwindow() - Browser geometry ignored if browser doesn't create a new window (Such as opera in "mdi" mode) - Dependent on xwd command and output of xwininfo and ... - Browser can't be iconified or partially off-screen - Only gets portion of html displayed in browser - Requires "opera" browser - update find_window() for other browsers - Opens up a bunch o' windows in your browser and leaves them there Examples: % $PROGNAME MarginalHacks.com -o MH.gif # Snapshot of MarginalHacks % $PROGNAME GetDave.com -o gif:- | xv - # Pipe output, type gif Heck - I just needed something to automate screenshots of HTML output. Ask to add '-remote SaveAsImage(file)' if you don't like it. :) Author: David Ljung Madison License: http://MarginalHacks.com/License Please see! http://MarginalHacks.com/Pay USAGE exit -1; } sub version { print "\n"; printf "This is $PROGNAME version %4.2f\n",$VERSION; print "\n"; print "Copyright (c) 2002 David Ljung Madison \n"; print "\n"; exit -1; } sub parse_args { my $url; my ($out,$sleep,$geom) = ("$PROGNAME.$TYPE",5,"800x600"); while (my $arg=shift(@ARGV)) { if ($arg =~ /^-h$/) { usage(); } if ($arg =~ /^-v$/) { version(); } if ($arg =~ /^-d$/) { $MAIN::DEBUG=1; next; } if ($arg =~ /^-o$/) { $out = shift(@ARGV); next; } if ($arg =~ /^-s$/) { $sleep = shift(@ARGV); next; } if ($arg =~ /^-g$/) { $geom = shift(@ARGV); next; } if ($arg =~ /^-.+/) { usage("Unknown option: $arg"); } usage("Too many URLs specified [$arg and $url]") if defined $url; $url=$arg; } usage("No URLs specified!") unless $url; ($out,$url,$sleep,$geom); } sub debug { return unless $MAIN::DEBUG; foreach my $msg (@_) { print STDERR "[$PROGNAME] $msg\n"; } } ################################################## # Main code ################################################## sub load { my ($url,$geom) = @_; # -newwindow doesn't seem to work on my MDI opera setup system("$BROWSER -geometry $geom -remote \'openURL($url,new-window)\'"); } # Geometry regexp for xwininfo (saves x and y and offset) my $GEOM_RE = '\s(\d+)x(\d+)\+(\-?\d+)\+(\-?\d+)\s+\+\-?\d+\+\-?\d+$'; # Find smallest subwindow that is at least 80% # (We're trying to get rid of the scrollbars, menubars, etc...) sub subwindow { my ($spacing,$window,$x,$y) = @_; my $smallest = $x*$y; $x*=.8; $y*=.8; while () { # We're done traversing this window's tree when we find a new window # with the same or less spacing (or if we run out of xinfo). return $window if (/^(\s+)0x[0-9a-f]+/ && length($1)<=length($spacing)); if (/^\s+(0x[0-9a-f]+).*$GEOM_RE/ && # $4>=0 && $5>=0 && # Only look for positive offset windows? $2>=$x && $3>=$y && $2*$3<$smallest) { $window = $1; $smallest = $2*$3; debug("Smaller subwindow: $window ${2}x$3",$_); } } return $window; } sub find_window { open(XINFO,"$XINFO|") || die("Couldn't run: [$XINFO]\n"); # Find the opera window (and the current title of the top window) my ($spacing,$title,$x,$y); while() { # This could easily break and is very opera specific (works on 6.03) last if (($spacing,$title,$x,$y) = (/^(\s+)0x[0-9a-f]+ "Opera .*\[(.+)\]": \("opera" "opera"\)\s*$GEOM_RE$/)); } die("Couldn't find window [Opera] in [$XINFO]\n") unless $title && $x && $y; # Now find the subwindow with the same title my $window; while () { die("Couldn't find subwindow [$title] in Opera windows:\n[$XINFO]\n") if (/^(\s+)0x[0-9a-f]+/ && length($1)<=length($spacing)); if (/^(\s+)(0x[0-9a-f]+)\s+"$title".*$GEOM_RE/) { ($spacing,$window,$x,$y) = ($1,$2,$3,$4); last; } } die("Couldn't find subwindow [$title] in Opera windows:\n[$XINFO]\n") unless $window; debug("Found: $window [$title] ${x}x$y"); $window = subwindow($spacing,$window,$x,$y); debug("Final window: $window"); close XINFO; $window; } sub grab { my ($id,$out) = @_; my $grab = $GRAB; $grab =~ s/%id/$id/g; $grab =~ s/%out/$out/g; system("$grab"); die("Trouble with grab [$?]:\n $grab\n") if $?; } sub main { my ($out,$url,$sleep,$geom) = parse_args(); load($url,$geom); sleep($sleep); my $window = find_window(); grab($window,$out); } main();