#!/usr/bin/perl # Filename: debed # Author: David Ljung Madison # See License: http://MarginalHacks.com/License/ # Description: Unembeds quicktime movies # # Thanks to a slashdot posting by Pathwalker: # http://slashdot.org/comments.pl?sid=61803&cid=5798166 # # Sorenson now available on linux! # http://slashdot.org/articles/02/06/20/2350241.shtml?tid=106 # ###################################################################### # When you look at the page, you want to find the EMBED tag that holds # the movie, so try the following command: # curl http://www.apple.com/trailers/columbia/charliesangelsfullthrottle/large_trailer.html | grep EMBED # Looking at the embed tag, you will see that it references two quick # time movies, one as a SRC argument, and one as a HREF argument. If # both are given, you want the HREF, otherwise, you want the SRC. here : # http://a772.g.akamai.net/5/772/51/52d5e54c5d8bec/1a1a1aaa2198c627970773d80669d84574a8d80d3cb12453c02589f25382f668c9329e0375e8177ae955ca3799026392f55dbe309c056add3eac1378e457bd329e0a7ee658c73baf1b80/ca2_fullthrottle-tlr_481.mov # is the link from the HREF option. It is the reference file for the movie. # # Now, download it and run strings on it. You will see several # instances of "url" on one line, and a url on the next line. These # are the movies that the reference movie points to. Almost always, # the first several will be to inform users that they have too old of a # version of quick time (they usually have a quicktime version embedded # in the name.) skip over them. # # at the end of the file, you will see this: # ca2_fullthrottle-tlr_m480.mov # that is the relative path to the real movie file. # # Take the URL for the reference file, and replace # ca2_fullthrottle-tlr_481.mov with ca2_fullthrottle-tlr_m480.mov # and you will have # 'http://a772.g.akamai.net/5/772/51/52d5e54c5d8bec/1a1a1aaa2198c627970773d80669d84574a8d80d3cb12453c02589f25382f668c9329e0375e8177ae955ca3799026392f55dbe309c056add3eac1378e457bd329e0a7ee658c73baf1b80/ca2_fullthrottle-tlr_m480.mov' # the url for the actual movie data file. # # This technique should work, even if Apple changes their convention # of just adding a m before the size of the movie, to indicate the real file. # # This also only works if the movie is being served over http. If it # is over rtsp, you will need some extra tools: http://www.live.com/ ###################################################################### # [Dave] Stuff I've learned: # .url or +url or )url is a relative path ###################################################################### use strict; ################################################## # Setup the variables ################################################## my $PROGNAME = $0; $PROGNAME =~ s|.*/||; # How to get URLs - pick one my $GET = 'GET'; #my $GET = 'curl'; #my $GET = 'lynx -source'; # How to yank strings from a binary my $STRINGS = 'strings'; ################################################## # Usage ################################################## sub fatal { foreach my $msg (@_) { print STDERR "[$PROGNAME] ERROR: $msg\n"; } exit(-1); } sub usage { foreach my $msg (@_) { print STDERR "ERROR: $msg\n"; } print STDERR < Finds streaming movie files in tags for a given URL. Extracts all movies listed in the stream and prints/downloads them. -qt Show quicktime version URLs as well -fetch Download the movies instead of printing (Currently has no way to distinguish between streaming .movs and normal .movs - if anyone knows how to do this, please let me know) Examples: % $PROGNAME -fetch http://some.trailer.site/embedded_trailer_page.html % xine `$PROGNAME http://some.trailer.site/embedded_trailer_page.html` Author: David Ljung Madison Site: http://MarginalHacks.com/ USAGE exit -1; } sub parse_args { my $url; while (my $arg=shift(@ARGV)) { if ($arg =~ /^-h$/) { usage(); } if ($arg =~ /^-d$/) { $MAIN::DEBUG=1; next; } if ($arg =~ /^-qt$/) { $MAIN::KEEP_QTS=1; next; } if ($arg =~ /^-f(etch)?$/) { $MAIN::FETCH=1; next; } if ($arg =~ /^-/) { usage("Unknown option: $arg"); } usage("Too many urls specified [$arg and $url]") if $url; $url=$arg; } usage("No url defined") unless $url; $url; } sub debug { return unless $MAIN::DEBUG; foreach my $msg (@_) { print STDERR "[$PROGNAME] $msg\n"; } } sub fatal { foreach my $msg (@_) { print STDERR "[$PROGNAME] ERROR: $msg\n"; } exit(-1); } #print "CAT HACK!\n"; #$GET = 'cat'; # for testing ################################################## # Code ################################################## sub get_tag { my ($tag,$str) = @_; return $1 if $str =~ /\W$tag='([^']+)'/i; return $1 if $str =~ /\W$tag=\"([^\"]+)\"/i; return $1 if $str =~ /\W$tag=(\w+)/i; undef; } sub find_embed { my ($url) = @_; open(GET,"$GET \Q$url\E |") || fatal("Couldn't get url: $url"); my @found; while () { while (// && ($_=)); s/[^>]*>//; $embed =~ s/>.*//; my $src=get_tag('src',$embed); my $href=get_tag('href',$embed); undef $src unless $src =~ /\.moo?v$/; undef $href unless $href =~ /\.moo?v$/; # Href is preferred push(@found,$href) if $href; push(@found,$src) if $src && !$href; } } close GET; die("No tags found in url:\n $url\n\nMake sure you're using the URL for the page that shows the actual movie.\n") unless @found; @found; } sub is_qtversion { m|/qt\dgateQT\d[^/]*moo?v[^/]*|i ? 1 : 0; } sub parse_embed { my (@embed) = @_; my %urls; foreach my $embed ( @embed ) { my $path = $embed; $path =~ s|/+[^/]+$||; debug("Parsing embed file: $embed"); open(GET,"$GET \Q$embed\E | $STRINGS |") || fatal("Couldn't pipe embedded url through strings: $embed"); my $saw_url = 0; while () { chomp; if ($saw_url && ($MAIN::KEEP_QTS || !is_qtversion())) { my $url = $_; # .url or +url or )url is a relative path $url = "$path/$2" if $url =~ /^(\.|\+|\))(.+)$/; debug("Saw url: $url\n") unless $urls{$url}; $urls{$url}++; } $saw_url= /^\s*url\s*$/i ? 1 : 0; } } keys %urls; } sub handle { my ($mov) = @_; return print "$mov\n" unless $MAIN::FETCH; my $f = $mov; $f =~ s|.*/||; $f = $1 if $f =~ /(.+)[\&\?].+/; $f = $f || "index.html"; debug("GET: $mov > $f"); system("$GET \Q$mov\E > \Q$f\E"); return print "Error [$?]: $!\n" if $?; print "$f\n"; } ################################################## # Main code ################################################## sub main { my $url = parse_args(); my (@embed) = find_embed($url); my @urls = parse_embed(@embed); # For now, just print or download them. # I suppose I could launch a movie player, but I'll let them do that... map handle($_), @urls; } main();