#!/usr/bin/perl # Filename: html2jpg # Author: David Ljung Madison my $VERSION= 1.03; # See License: http://MarginalHacks.com/License # Description: Takes a screenshot of an HTML page (uses opera) # # CHANGELOG # # 1.03 2004/09/23 # ---------------- # Updated to opera 9.01 # # 1.02 2004/09/23 # ---------------- # (Thanks Keith C. Ivey, keith at smokefreedc org) # + Added -t and -r args # # 1.01 2004/??/?? # ----------------- # + Added -opera browser select support # # 1.00 200?/??/?? # ---------------- # + Public release # use strict; ################################################## # Setup the variables ################################################## my $PROGNAME = $0; $PROGNAME =~ s|.*/||; my $GRAB = 'xwd -silent -nobdrs -id %id | convert -quality 85 - %out'; my $XINFO = 'xwininfo -tree -root'; my $BROWSER = 'mozilla'; # Must match find_window() code - see usage() # Default snapshot type (based off html2) my $TYPE = ($PROGNAME =~ /2(.+)$/) ? $1 : "jpg"; ################################################## # Usage ################################################## sub usage { foreach my $msg (@_) { print STDERR "ERROR: $msg\n"; } print STDERR < Takes a screenshot of an HTML page and saves to an image -o postfix determines image type [$PROGNAME.$TYPE] -s Time to sleep before window dump -g Browser geometry -t Size of thumbnail -r Reuse existing browser window -d Set debug mode -opera Use opera browser instead of $BROWSER Kludges: - We don't know when the page is finished loading - Rule of thumb for finding subwindow is guesswork! If you're getting the wrong window, comment out call to subwindow() - Browser geometry ignored if browser doesn't create a new window (Such as opera in "mdi" mode) - Dependent on xwd command and output of xwininfo and ... - Browser can't be iconified or partially off-screen - Only gets portion of html displayed in browser - Requires "Mozilla" or "Opera" browser (update find_window() code for other browsers) - Opens up a bunch o' windows in your browser and leaves them there Examples: % $PROGNAME MarginalHacks.com -o MH.gif # Snapshot of MarginalHacks % $PROGNAME GetDave.com -o gif:- | xv - # Pipe output, type gif Heck - I just needed something to automate screenshots of HTML output. Ask to add '-remote SaveAsImage(file)' if you don't like it. :) Author: David Ljung Madison License: http://MarginalHacks.com/License Please see! http://MarginalHacks.com/Pay USAGE exit -1; } sub version { print "\n"; printf "This is $PROGNAME version %4.2f\n",$VERSION; print "\n"; print "Copyright (c) 2002 David Ljung Madison \n"; print "\n"; exit -1; } sub parse_args { my $url; my ($out,$sleep,$geom,$thumb) = ("$PROGNAME.$TYPE",5,"800x600"); while (my $arg=shift(@ARGV)) { if ($arg =~ /^-h$/) { usage(); } if ($arg =~ /^-v$/) { version(); } if ($arg =~ /^-d$/) { $MAIN::DEBUG=1; next; } if ($arg =~ /^-o$/) { $out = shift(@ARGV); next; } if ($arg =~ /^-s$/) { $sleep = shift(@ARGV); next; } if ($arg =~ /^-g$/) { $geom = shift(@ARGV); next; } if ($arg =~ /^-t$/) { $thumb = shift(@ARGV); next; } if ($arg =~ /^-r$/) { $MAIN::REUSE = 1; next; } if ($arg =~ /^-opera$/) { $BROWSER = "opera"; next; } if ($arg =~ /^-.+/) { usage("Unknown option: $arg"); } usage("Too many URLs specified [$arg and $url]") if defined $url; $url=$arg; } usage("No URLs specified!") unless $url; ($out,$url,$sleep,$geom,$thumb); } sub debug { return unless $MAIN::DEBUG; foreach my $msg (@_) { print STDERR "[$PROGNAME] $msg\n"; } } ################################################## # Main code ################################################## sub load { my ($url,$geom) = @_; my $browser = "$BROWSER -geometry $geom -remote \'openURL($url"; $browser .= ",new-window" unless $MAIN::REUSE; $browser .= ")\'"; system($browser); } # Geometry regexp for xwininfo (saves x and y and offset) my $GEOM_RE = '\s(\d+)x(\d+)\+(\-?\d+)\+(\-?\d+)\s+\+\-?\d+\+\-?\d+$'; # Find smallest subwindow that is at least 80% # (We're trying to get rid of the scrollbars, menubars, etc...) sub subwindow { my ($spacing,$window,$x,$y) = @_; my $smallest = $x*$y; $x*=.8; $y*=.8; while () { # We're done traversing this window's tree when we find a new window # with the same or less spacing (or if we run out of xinfo). return $window if (/^(\s+)0x[0-9a-f]+/ && length($1)<=length($spacing)); if (/^\s+(0x[0-9a-f]+).*$GEOM_RE/ && # $4>=0 && $5>=0 && # Only look for positive offset windows? $2>=$x && $3>=$y && $2*$3<$smallest) { $window = $1; $smallest = $2*$3; debug("Smaller subwindow: $window ${2}x$3",$_); } } return $window; } sub opera_find_window { open(XINFO,"$XINFO|") || die("Couldn't run: [$XINFO]\n"); # Find the opera window (and the current title of the top window) my ($spacing,$title,$x,$y); while() { # # This could easily break and is very opera specific (works on 6.03) # last if (($spacing,$title,$x,$y) = (/^(\s+)0x[0-9a-f]+ "Opera .*\[(.+)\]": \("opera" "opera"\)\s*$GEOM_RE$/)); # This could easily break and is very opera specific (works on 7.50 & 8.50) last if (($spacing,$title,$x,$y) = (/^(\s+)0x[0-9a-f]+ "(.+) - Opera \d+\.\d+ ?": \("opera" "Opera"\)\s*$GEOM_RE$/)); # This could easily break and is very opera specific (works on beta) last if (($spacing,$title,$x,$y) = (/^(\s+)0x[0-9a-f]+ "(.+) - Opera Beta": \("opera" "Opera"\)\s*$GEOM_RE$/)); # This could easily break and is very opera specific (works on 9.01) last if (($spacing,$title,$x,$y) = (/^(\s+)0x[0-9a-f]+ "(.+) - Opera": \("opera" "Opera"\)\s*$GEOM_RE$/)); } die("Couldn't find window [Opera] in [$XINFO]\n") unless $title && $x && $y; # Now find the subwindow with the same title my $window; while () { die("Couldn't find subwindow [$title] in Opera windows:\n[$XINFO]\n") if (/^(\s+)0x[0-9a-f]+/ && length($1)<=length($spacing)); if (/^(\s+)(0x[0-9a-f]+)\s+"$title".*$GEOM_RE/) { ($spacing,$window,$x,$y) = ($1,$2,$3,$4); last; } } die("Couldn't find subwindow [$title] in Opera windows:\n[$XINFO]\n") unless $window; debug("Found: $window [$title] ${x}x$y"); $window = subwindow($spacing,$window,$x,$y); debug("Final window: $window"); close XINFO; $window; } sub mozilla_find_window { open(XINFO,"$XINFO|") || die("Couldn't run: [$XINFO]\n"); # Pick the first mozilla window. It's got the title in it, but # we have no way of knowing if that matches the URL, so we'll # hope this is the right one.. my ($spacing,$id,$title,$x,$y); while() { # This could easily break and is very mozilla specific (works on firefox) # Looks for [...("Mozilla" "navigator:browser") ..] # I've had this reported: 0x80002f "TITLE - Mozilla": ("Gecko" "Mozilla-bin") 889x687+0+22 +136+44 last if (($spacing,$id,$title,$x,$y) = (/^(\s+)(0x[0-9a-f]+) "(.*)\s*-\s*Mozilla.*": \("Mozilla" "navigator:browser"\)\s*$GEOM_RE$/)); # Submitted by Luca Deplano - ldeplano at it tiscali com # Mozilla 1.4.1 : last if (($spacing,$id,$title,$x,$y) = (/^(\s+)(0x[0-9a-f]+) "(.*)\s*-\s*Mozilla.*": \("mozilla-bin" "Mozilla-bin"\)\s*$GEOM_RE$/)); # Mozilla Firefox 1.0.4 last if (($spacing,$id,$title,$x,$y) = (/^(\s+)(0x[0-9a-f]+) "(.*)\s*-\s*Mozilla Firefox.*": \("Gecko" "Firefox-bin"\)\s*$GEOM_RE$/)); # Debian Mozilla Firefox last if (($spacing,$id,$title,$x,$y) = (/^(\s+)(0x[0-9a-f]+) "(.*)\s*-\s*Mozilla Firefox.*": \("firefox-bin" "Firefox-bin"\)\s*$GEOM_RE$/)); } die("Couldn't find window [Mozilla] in [$XINFO]\n") unless $title && $x && $y; $title =~ s/^\s*//; $title =~ s/\s*$//; print "Using mozilla window: \"$title\"\n"; $id = subwindow($spacing,$id,$x,$y); debug("Final window: $id"); close XINFO; $id; # # Find a subwindow with the same geometry minus a few percent off the top # while () { # my ($sp,$id,$x,$y) = (/^(\s+)(0x[0-9a-f]+)\s+".*".*$GEOM_RE/); # print "Got: $id and $x,$y\n"; #exit if length($sp) <= length($spacing); # } # debug("Found: $id [$title] ${x}x$y"); # # $window = subwindow($spacing,$window,$x,$y); # debug("Final window: $window"); # # close XINFO; # $window; } # I'm using mozilla.. sub find_window { $BROWSER eq "opera" ? opera_find_window(@_) : mozilla_find_window(@_); } sub grab { my ($id,$out,$thumb) = @_; my $grab = $GRAB; $grab =~ s/%id/$id/g; $grab =~ s/%out/$out/g; $grab =~ s/convert /convert -resize $thumb / if $thumb; # Kludgy, but.. system("$grab"); die("Trouble with grab [$?]:\n $grab\n") if $?; } sub main { my ($out,$url,$sleep,$geom,$thumb) = parse_args(); load($url,$geom); sleep($sleep); my $window = find_window(); grab($window,$out,$thumb); } main();