#!/usr/bin/perl -w # # hls-fetch - Download and decrypt HTTP Live Streaming videos. # Copyright (C) 2012 Oskar Liljeblad # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # use strict; use Getopt::Long; use HTML::Parser; use LWP::UserAgent; use JSON; use File::Temp qw(tempfile); use URI::URL; use constant READ_SIZE => 1024; my %opt = ('bandwidth' => 'max'); Getopt::Long::GetOptions(\%opt, 'embedded', 'svtplay', 'playlist', 'output|o=s', 'bandwidth|b=s', 'quiet|q', 'force|f', 'verbose|v', 'no-decrypt', 'version', 'help') || exit 1; if ($opt{'version'}) { print "hls-fetch 0.1\n"; print "Copyright (C) 2012 Oskar Liljeblad\n"; print "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>.\n"; print "This is free software: you are free to change and redistribute it.\n"; print "There is NO WARRANTY, to the extent permitted by law.\n\n"; print "Written by Oskar Liljeblad.\n"; exit 0; } if ($opt{'help'}) { print "Usage: hls-fetch [OPTION]... URL\n"; print "Download and decrypt videos served by the HTTP Live Streaming (HLS) protocol.\n\n"; print " --embedded URL refers to a page with <video> tag (default)\n"; print " --playlist URL refers to an M3U (m3u8) playlist\n"; print " --svtplay URL refers to an SVT Play page (svtplay.se)\n"; print " -o, --output=FILE save video to FILE rather than \"video.ts\"\n"; print " -f, --force force overwriting existing output file\n"; print " -b, --bandwidth=SPEC pick video with specified bandwidth (bits/s),\n"; print " lowest (\"min\") or highest (\"max\") (default max)\n"; print " -v, --verbose explain what is being done\n"; print " -q, --quiet no output other than errors\n"; print " --no-decrypt skip decryption even if stream should be decrypted\n"; print " --help display this help and exit\n"; print " --version output version information and exit\n"; print "\nDecryption requires openssl.\n"; print "\nReport bugs to Oskar Liljeblad <oskar\@osk.mine.nu>.\n"; exit 0; } die "--embedded, --playlist and --svtplay are mutually exclusive\n" if (scalar grep { defined } @opt{'svtplay', 'playlist', 'embedded'}) > 1; $opt{'embedded'} = 1 if !$opt{'playlist'} && !$opt{'svtplay'}; die "--verbose and --quiet are mutually exclusive\n" if $opt{'verbose'} && $opt{'quiet'}; die "non-numeric --bandwidth specified\n" if $opt{'bandwidth'} !~ /^(min|max|\d+)$/; die "missing URL operand\n" if !@ARGV; if (!exists $opt{'output'}) { $opt{'output'} = 'video.ts'; warn "no output file specified, assuming video.ts\n" if !$opt{'quiet'}; } my ($url) = @ARGV; my $browser = LWP::UserAgent->new; $browser->cookie_jar({}); my $video_file = $opt{'output'}; die "$video_file: file exists, not overwriting without -f/--force\n" if !$opt{'force'} && -e $video_file; open(my $video_fh, '>', $video_file) || die "$video_file: cannot open file: $!\n"; if ($opt{'svtplay'}) { my $data = eval { fetch_url($url) }; die "$url: cannot fetch page: $@" if $@; my $parser = HTML::Parser->new(api_version => 3, start_h => [\&handle_svtplay_tag, 'tagname,@attr']); my ($json_path, $json_title); sub handle_svtplay_tag { my ($tag, %attr) = @_; if ($tag eq 'a' && exists $attr{'id'} && $attr{'id'} =~ /^player($|_)/ && exists $attr{'data-json-href'} && exists $attr{'data-title'}) { $json_path = $attr{'data-json-href'}; $json_title = $attr{'data-title'}; } } $parser->parse($data); die "$url: cannot find any video on page\n" if !defined $json_path; print "Title: $json_title\n" if $opt{'verbose'}; my $json_url = url($json_path, $url)->abs; $json_url->query('output=json'); $json_url = $json_url->as_string(); print "URL (JSON): $json_url\n" if $opt{'verbose'}; $data = eval { fetch_url($json_url) }; die "$json_url: cannot fetch JSON data: $@" if $@; my $json = decode_json($data) // die "$json_url: cannot parse JSON data\n"; die "$json_url: invalid JSON data\n" if !exists $json->{'video'}->{'videoReferences'}->[0]; ($url) = map { $_->{'url'} } grep { $_->{'playerType'} eq 'ios' } @{$json->{'video'}->{'videoReferences'}}; die "$json_url: missing video URL for 'ios' type in JSON data\n" if !defined $url; print "URL (master): $url\n" if $opt{'verbose'}; } elsif ($opt{'embedded'}) { my $data = eval { fetch_url($url) }; die "$url: cannot fetch page: $@" if $@; my $parser = HTML::Parser->new(api_version => 3, start_h => [\&handle_playlist_tag, 'tagname,@attr']); my $index_url; sub handle_playlist_tag { my ($tag, %attr) = @_; if ($tag eq 'video') { $index_url = $attr{'src'} if exists $attr{'src'}; } elsif ($tag eq 'source') { $index_url = $attr{'src'} if exists $attr{'src'} && exists $attr{'type'} && $attr{'type'} eq 'application/vnd.apple.mpegurl'; } } $parser->parse($data); die "$url: cannot find any video on page\n" if !defined $index_url; $url = url($index_url, $url)->abs()->as_string(); print "URL (master): $url\n" if $opt{'verbose'}; } my $data = eval { fetch_url($url) }; die "$url: cannot fetch playlist: $@" if $@; my @lines = split(/\r*\n|\r\n*/, $data); die "$url: invalid playlist, no header\n" if @lines < 1 || $lines[0] ne '#EXTM3U'; if (!grep { /^#EXTINF:/ } @lines) { my (@streams, $last_stream); foreach my $line (@lines) { if ($line =~ /^#EXT-X-STREAM-INF:(.*)$/) { $last_stream = { parse_m3u_attribs($url, $1) }; push @streams, $last_stream; } elsif ($line !~ /^#EXT/) { die "$url: missing #EXT-X-STREAM-INF for URL: $line\n" if !defined $last_stream; $last_stream->{'URL'} = $line; $last_stream = undef; } } die "$url: no streams found in playlist\n" if !@streams; warn "$url: non-numeric bandwidth in playlist\n" if grep { $_->{'BANDWIDTH'} =~ /\D/ } @streams; my @bandwidths = sort { $a <=> $b } grep { /^\d+$/ } map { $_->{'BANDWIDTH'} } @streams; print "Bandwidths: ", join(', ', @bandwidths), "\n" if $opt{'verbose'}; my $stream; if ($opt{'bandwidth'} eq 'min') { ($stream) = grep { $_->{'BANDWIDTH'} == $bandwidths[0] } @streams; } elsif ($opt{'bandwidth'} eq 'max') { ($stream) = grep { $_->{'BANDWIDTH'} == $bandwidths[-1] } @streams; } else { ($stream) = grep { $opt{'bandwidth'} == $_->{'BANDWIDTH'} } @streams; die "$url: no streams with bandwidth $opt{'bandwidth'} in playlist\n" if !defined $stream; } print "Bandwidth (selected): $stream->{'BANDWIDTH'}\n" if $opt{'verbose'}; $url = url($stream->{'URL'}, $url)->abs()->as_string(); print "URL (index): $url\n" if $opt{'verbose'}; $data = eval { fetch_url($url) }; die "$url: cannot fetch playlist: $@" if $@; @lines = split(/\r?\n/, $data); die "$url: invalid playlist, no header\n" if @lines < 1 || $lines[0] ne '#EXTM3U'; } my $sequence = 0; my (%segments, $cryptkey_url); foreach my $line (@lines) { if ($line =~ /^#EXT-X-MEDIA-SEQUENCE:(\d+)$/) { $sequence = $1; print "First sequence number: $sequence\n" if $opt{'verbose'}; } elsif ($line =~ /^#EXT-X-KEY:(.*)$/) { my %attr = parse_m3u_attribs($url, $1); die "$url: unsupported encryption method $attr{'METHOD'} in playlist\n" if exists $attr{'METHOD'} && $attr{'METHOD'} ne 'AES-128'; $cryptkey_url = $attr{'URI'}; die "$url: missing encryption key URI in playlist\n" if !defined $cryptkey_url; } elsif ($line !~ /^#EXT/) { $segments{$sequence} = { 'url' => $line, 'cryptkey_url' => $cryptkey_url }; $sequence++; } } die "$url: no segments in playlist\n" if !scalar keys %segments; my %cryptkeys; #my $cryptkey; #if (defined $cryptkey_url) { # print "URL (key): $cryptkey_url\n" if $opt{'verbose'}; # $cryptkey = eval { fetch_url($cryptkey_url) }; die "$cryptkey_url: cannot fetch encryption key: $@" if $@; # $cryptkey = join('', map { sprintf('%02x', ord) } split(//, $cryptkey)); # print "Key: $cryptkey\n" if $opt{'verbose'}; #} print "Segments: ", scalar keys %segments, "\n" if $opt{'verbose'}; $| = 1; foreach my $sequence (sort { $a <=> $b } keys %segments) { my $segment = $segments{$sequence}; my $segment_url = url($segment->{'url'}, $url)->abs()->as_string(); print "URL (segment $sequence): $segment_url\n" if $opt{'verbose'}; printf "\r%d/%d", $sequence, scalar keys %segments if !$opt{'quiet'} && !$opt{'verbose'}; if (!$opt{'no-decrypt'} && defined $segment->{'cryptkey_url'} && !exists $cryptkeys{$segment->{'cryptkey_url'}}) { print "URL (key): ", $segment->{'cryptkey_url'}, "\n" if $opt{'verbose'}; my $cryptkey = eval { fetch_url($segment->{'cryptkey_url'}) }; die "$segment->{'cryptkey_url'}: cannot fetch encryption key: $@" if $@; $cryptkey = join('', map { sprintf('%02x', ord) } split(//, $cryptkey)); print "Key: $cryptkey\n" if $opt{'verbose'}; $cryptkeys{$segment->{'cryptkey_url'}} = $cryptkey; } my ($segment_fh, $segment_file) = tempfile(); close $segment_fh; eval { eval { fetch_url($segment_url, $segment_file) }; die "$segment_url: cannot not fetch segment: $@" if $@; if (!$opt{'no-decrypt'} && defined $segment->{'cryptkey_url'}) { my ($decrypt_fh, $decrypt_file) = tempfile(); close $decrypt_fh; my $iv = sprintf('%032x', $sequence); my @cmd = ('openssl', 'aes-128-cbc', '-d', '-in', $segment_file, '-out', $decrypt_file, '-K', $cryptkeys{$segment->{'cryptkey_url'}}, '-iv', $iv); system @cmd; unlink $segment_file || warn "$segment_file: cannot remove file: $!\n"; $segment_file = $decrypt_file; die "$segment_file: openssl failed (status $?)\n" if $? != 0; } open ($segment_fh, '<', $segment_file) || die "$segment_file: cannot open file: $!\n"; for (;;) { my $size = sysread($segment_fh, $data, READ_SIZE); die "$segment_file: cannot read from file: $!\n" if !defined $size; last if $size == 0; die "$video_file: cannot write to file: $!\n" if !defined syswrite($video_fh, $data); } close $segment_fh; }; unlink $segment_file || warn "$segment_file: cannot remove file: $!\n"; die $@ if $@; } close $video_fh; sub parse_m3u_attribs { my ($url, $attr_str) = @_; my %attr; for (my $as = $attr_str; $as ne ''; ) { $as =~ s/^?([^=]*)=([^,"]*|"[^"]*")\s*(,\s*|$)// or die "$url: invalid attributes in playlist: $attr_str\n"; my ($key, $val) = ($1, $2); $val =~ s/^"(.*)"$/$1/; $attr{$key} = $val; } return %attr; } sub fetch_url { my ($url, $filename) = @_; if (defined $filename) { my $response = $browser->get($url, ":content_file" => $filename); die $response->status_line(), "\n" if !$response->is_success; return undef; } else { my $response = $browser->get($url); die $response->status_line(), "\n" if !$response->is_success; return $response->decoded_content(); } }