#!/usr/bin/env perl # This chunk of stuff was generated by App::FatPacker. To find the original # file's code, look for the end of this BEGIN block or the string 'FATPACK' BEGIN { my %fatpacked; $fatpacked{"CPAN/DistnameInfo.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'CPAN_DISTNAMEINFO'; package CPAN::DistnameInfo; $VERSION = "0.12"; use strict; sub distname_info { my $file = shift or return; my ($dist, $version) = $file =~ /^ ((?:[-+.]*(?:[A-Za-z0-9]+|(?<=\D)_|_(?=\D))* (?: [A-Za-z](?=[^A-Za-z]|$) | \d(?=-) )(? 6 and $1 & 1) or ($2 and $2 >= 50)) or $3; } elsif ($version =~ /\d\D\d+_\d/ or $version =~ /-TRIAL/) { $dev = 1; } } else { $version = undef; } ($dist, $version, $dev); } sub new { my $class = shift; my $distfile = shift; $distfile =~ s,//+,/,g; my %info = ( pathname => $distfile ); ($info{filename} = $distfile) =~ s,^(((.*?/)?authors/)?id/)?([A-Z])/(\4[A-Z])/(\5[-A-Z0-9]*)/,, and $info{cpanid} = $6; if ($distfile =~ m,([^/]+)\.(tar\.(?:g?z|bz2)|zip|tgz)$,i) { # support more ? $info{distvname} = $1; $info{extension} = $2; } @info{qw(dist version beta)} = distname_info($info{distvname}); $info{maturity} = delete $info{beta} ? 'developer' : 'released'; return bless \%info, $class; } sub dist { shift->{dist} } sub version { shift->{version} } sub maturity { shift->{maturity} } sub filename { shift->{filename} } sub cpanid { shift->{cpanid} } sub distvname { shift->{distvname} } sub extension { shift->{extension} } sub pathname { shift->{pathname} } sub properties { %{ $_[0] } } 1; __END__ =head1 NAME CPAN::DistnameInfo - Extract distribution name and version from a distribution filename =head1 SYNOPSIS my $pathname = "authors/id/G/GB/GBARR/CPAN-DistnameInfo-0.02.tar.gz"; my $d = CPAN::DistnameInfo->new($pathname); my $dist = $d->dist; # "CPAN-DistnameInfo" my $version = $d->version; # "0.02" my $maturity = $d->maturity; # "released" my $filename = $d->filename; # "CPAN-DistnameInfo-0.02.tar.gz" my $cpanid = $d->cpanid; # "GBARR" my $distvname = $d->distvname; # "CPAN-DistnameInfo-0.02" my $extension = $d->extension; # "tar.gz" my $pathname = $d->pathname; # "authors/id/G/GB/GBARR/..." my %prop = $d->properties; =head1 DESCRIPTION Many online services that are centered around CPAN attempt to associate multiple uploads by extracting a distribution name from the filename of the upload. For most distributions this is easy as they have used ExtUtils::MakeMaker or Module::Build to create the distribution, which results in a uniform name. But sadly not all uploads are created in this way. C uses heuristics that have been learnt by L to extract the distribution name and version from filenames and also report if the version is to be treated as a developer release The constructor takes a single pathname, returning an object with the following methods =over =item cpanid If the path given looked like a CPAN authors directory path, then this will be the the CPAN id of the author. =item dist The name of the distribution =item distvname The file name with any suffix and leading directory names removed =item filename If the path given looked like a CPAN authors directory path, then this will be the path to the file relative to the detected CPAN author directory. Otherwise it is the path that was passed in. =item maturity The maturity of the distribution. This will be either C or C =item extension The extension of the distribution, often used to denote the archive type (e.g. 'tar.gz') =item pathname The pathname that was passed to the constructor when creating the object. =item properties This will return a list of key-value pairs, suitable for assigning to a hash, for the known properties. =item version The extracted version =back =head1 AUTHOR Graham Barr =head1 COPYRIGHT Copyright (c) 2003 Graham Barr. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut CPAN_DISTNAMEINFO $fatpacked{"Dist/Surveyor.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'DIST_SURVEYOR'; package Dist::Surveyor; =head1 NAME Dist::Surveyor - Survey installed modules and determine the specific distribution versions they came from =head1 SYNOPSIS my $options = { opt_match => $opt_match, opt_perlver => $opt_perlver, opt_remnants => $opt_remnants, distro_key_mod_names => $distro_key_mod_names, }; my @installed_releases = determine_installed_releases($options, \@libdirs); =head1 DESCRIPTION Surveys your huge ball of Perl modules, jammed together inside a directory, and tells you exactly which module is installed there. For quick start, and a fine example of this module usage, see L. This module have one exported function - determine_installed_releases =cut use strict; use warnings; use version; use Carp; # core use Data::Dumper; # core use File::Find; # core use File::Spec; # core use List::Util qw(max sum); # core use Dist::Surveyor::Inquiry; # internal use Module::CoreList; use Module::Metadata; our $VERSION = '0.022'; use constant ON_WIN32 => $^O eq 'MSWin32'; use constant ON_VMS => $^O eq 'VMS'; if (ON_VMS) { require File::Spec::Unix; } our ($DEBUG, $VERBOSE); *DEBUG = \$::DEBUG; *VERBOSE = \$::VERBOSE; require Exporter; our @ISA = qw{Exporter}; our @EXPORT = qw{determine_installed_releases}; =head1 determine_installed_releases($options, $search_dirs) $options includes: =over =item opt_match A regex qr//. If exists, will ignore modules that doesn't match this regex =item opt_perlver Skip modules that are included as core in this Perl version =item opt_remnants If true, output will include old distribution versions that have left old modules behind =item distro_key_mod_names A hash-ref, with a list of irregular named releases. i.e. 'libwww-perl' => 'LWP'. =back $search_dirs is an array-ref containing the list of directories to survey. Returns a list, where each element is a hashref representing one installed distibution. This hashref is what MetaCPAN returns for C, with two additional keys: =over =item * 'url' - that same as 'download_url', but without the hostname. can be used to download the file for your favorite mirror =item * 'dist_data' - Hashref containing info about the release, i.e. percent_installed. (fully installed releases will have '100.00') =back =cut sub determine_installed_releases { my ($options, $search_dirs) = @_; $options->{opt_perlver} ||= version->parse( $] )->numify; my %installed_mod_info; warn "Finding modules in @$search_dirs\n"; my ($installed_mod_files, $installed_meta) = find_installed_modules(@$search_dirs); # get the installed version of each installed module and related info warn "Finding candidate releases for the ".keys(%$installed_mod_files)." installed modules\n"; foreach my $module ( sort keys %$installed_mod_files ) { my $mod_file = $installed_mod_files->{$module}; if (my $opt_match = $options->{opt_match}) { if ($module !~ m/$opt_match/o) { delete $installed_mod_files->{$module}; next; } } module_progress_indicator($module) unless $VERBOSE; my $mi = get_installed_mod_info($options, $module, $mod_file); $installed_mod_info{$module} = $mi if $mi; } # Map modules to dists using the accumulated %installed_mod_info info warn "*** Mapping modules to releases\n"; my %best_dist; foreach my $mod ( sort keys %installed_mod_info ) { my $mi = $installed_mod_info{$mod}; module_progress_indicator($mod) unless $VERBOSE; # find best match among the cpan releases that included this module my $ccdr = $installed_mod_info{$mod}{candidate_cpan_dist_releases} or next; # no candidates, warned about above (for mods with a version) my $best_dist_cache_key = join " ", sort keys %$ccdr; our %best_dist_cache; my $best = $best_dist_cache{$best_dist_cache_key} ||= pick_best_cpan_dist_release($ccdr, \%installed_mod_info); my $note = ""; if ((@$best > 1) and $installed_meta->{perllocalpod}) { # try using perllocal.pod to narrow the options, if there is one # XXX TODO move this logic into the per-candidate-distro loop below # it doesn't make much sense to be here at the per-module level my @in_perllocal = grep { my $distname = $_->{distribution}; my ($v, $dist_mod_name) = perllocal_distro_mod_version( $options->{distro_key_mod_names}, $distname, $installed_meta->{perllocalpod}); warn "$dist_mod_name in perllocal.pod: ".($v ? "YES" : "NO")."\n" if $DEBUG; $v; } @$best; if (@in_perllocal && @in_perllocal < @$best) { $note = sprintf "narrowed from %d via perllocal", scalar @$best; $best = \@in_perllocal; } } if (@$best > 1 or $note) { # note the poor match for this module # but not if there's no version (as that's common) my $best_desc = join " or ", map { $_->{release} } @$best; my $pct = sprintf "%.2f%%", $best->[0]{fraction_installed} * 100; warn "$mod $mi->{version} odd best match: $best_desc $note ($best->[0]{fraction_installed})\n" if $note or $VERBOSE or ($mi->{version} and $best->[0]{fraction_installed} < 0.3); # if the module has no version and multiple best matches # then it's unlikely make a useful contribution, so ignore it # XXX there's a risk that we'd ignore all the modules of a release # where none of the modules has a version, but that seems unlikely. next if not $mi->{version}; } for my $dist (@$best) { # two level hash to make it easier to handle versions my $di = $best_dist{ $dist->{distribution} }{ $dist->{release} } ||= { dist => $dist }; push @{ $di->{modules} }, $mi; $di->{or}{$_->{release}}++ for grep { $_ != $dist } @$best; } } warn "*** Refining releases\n"; # $best_dist{ Foo }{ Foo-1.23 }{ dist=>$dist_struct, modules=>, or=>{ Foo-1.22 => $dist_struct } } my @installed_releases; # Dist-Name => { ... } for my $distname ( sort keys %best_dist ) { my $releases = $best_dist{$distname}; push @installed_releases, refine_releases($options, $distname, $releases); } # sorting into dependency order could be added later, maybe return @installed_releases; } sub refine_releases { my ($options, $distname, $releases) = @_; my @dist_by_version = sort { $a->{dist}{version_obj} <=> $b->{dist}{version_obj} or $a->{dist}{fraction_installed} <=> $b->{dist}{fraction_installed} } values %$releases; my @dist_by_fraction = sort { $a->{dist}{fraction_installed} <=> $b->{dist}{fraction_installed} or $a->{dist}{version_obj} <=> $b->{dist}{version_obj} } values %$releases; my @remnant_dists = @dist_by_version; my $installed_dist = pop @remnant_dists; # is the most recent candidate dist version also the one with the # highest fraction_installed? if ($dist_by_version[-1] == $dist_by_fraction[-1]) { # this is the common case: we'll assume that's installed and the # rest are remnants of earlier versions } elsif ($dist_by_fraction[-1]{dist}{fraction_installed} == 100) { warn "Unsure which $distname is installed from among @{[ keys %$releases ]}\n"; @remnant_dists = @dist_by_fraction; $installed_dist = pop @remnant_dists; warn "Selecting the one that apprears to be 100% installed\n"; } else { # else grumble so the user knows to ponder the possibilities warn "Can't determine which $distname is installed from among @{[ keys %$releases ]}\n"; warn Dumper([\@dist_by_version, \@dist_by_fraction]); warn "\tSelecting based on latest version\n"; } if (@remnant_dists or $DEBUG) { warn "Distributions with remnants (chosen release is first):\n" unless our $dist_with_remnants_warning++; warn "@{[ map { $_->{dist}{release} } reverse @dist_by_fraction ]}\n"; for ($installed_dist, @remnant_dists) { my $fi = $_->{dist}{fraction_installed}; my $modules = $_->{modules}; my $mv_desc = join(", ", map { "$_->{module} $_->{version}" } @$modules); warn sprintf "\t%s\t%s%% installed: %s\n", $_->{dist}{release}, $_->{dist}{percent_installed}, (@$modules > 4 ? "(".@$modules." modules)" : $mv_desc), } } my @installed_releases; # note ordering: remnants first for (($options->{opt_remnants} ? @remnant_dists : ()), $installed_dist) { my ($author, $release) = @{$_->{dist}}{qw(author release)}; my $release_data = get_release_info($author, $release); next unless $release_data; # shortcuts (my $url = $release_data->{download_url}) =~ s{ .*? \b authors/ }{authors/}x; push @installed_releases, { # %$release_data, # extra items mushed inhandy shortcuts url => $url, # raw data structures dist_data => $_->{dist}, }; } #die Dumper(\@installed_releases); return @installed_releases; } # for each installed module, get the list of releases that it exists in # Parameters: # $options - uses only opt_perlver # $module - module name (i.e. 'Dist::Surveyor') # $mod_file - the location of this module on the filesystem # Return: # undef if this module should be skipped # otherwise, a hashref containing: # file => $mod_file, # module => $module, # version => $mod_version, # version_obj => same as version, but as an object, # size => $mod_file_size, # # optional flags: # file_size_mismatch => 1, # cpan_dist_fallback => 1, # could not find this module/version on cpan, # # but found a release with that version, containing such module # version_not_on_cpan> 1, # can not find this file on CPAN. # # releases info # candidate_cpan_dist_releases => hashref, # # candidate_cpan_dist_releases hashref contain a map of all the releases # that this module exists in. see get_candidate_cpan_dist_releases for more # info. sub get_installed_mod_info { my ($options, $module, $mod_file) = @_; my $mod_version = do { # silence warnings about duplicate VERSION declarations # eg Catalyst::Controller::DBIC::API* 2.002001 local $SIG{__WARN__} = sub { warn @_ if $_[0] !~ /already declared with version/ }; my $mm = Module::Metadata->new_from_file($mod_file); $mm->version; # only one version for one package in file }; $mod_version ||= 0; # XXX my $mod_file_size = -s $mod_file; # Eliminate modules that will be supplied by the target perl version if ( my $cv = $Module::CoreList::version{ $options->{opt_perlver} }->{$module} ) { $cv =~ s/ //g; if (version->parse($cv) >= version->parse($mod_version)) { warn "$module is core in perl $options->{opt_perlver} (lib: $mod_version, core: $cv) - skipped\n"; return; } } my $mi = { file => $mod_file, module => $module, version => $mod_version, version_obj => version->parse($mod_version), size => $mod_file_size, }; # ignore modules we know aren't indexed return $mi if $module =~ /^Moose::Meta::Method::Accessor::Native::/; # XXX could also consider file mtime: releases newer than the mtime # of the module file can't be the origin of that module file. # (assuming clocks and file times haven't been messed with) eval { my $ccdr = get_candidate_cpan_dist_releases($module, $mod_version, $mod_file_size); if (not %$ccdr) { $ccdr = get_candidate_cpan_dist_releases($module, $mod_version, 0); if (%$ccdr) { # probably either a local change/patch or installed direct from repo # but with a version number that matches a release warn "$module $mod_version on CPAN but with different file size (not $mod_file_size)\n" if $mod_version or $VERBOSE; $mi->{file_size_mismatch}++; } elsif ($ccdr = get_candidate_cpan_dist_releases_fallback($module, $mod_version) and %$ccdr) { warn "$module $mod_version not on CPAN but assumed to be from @{[ sort keys %$ccdr ]}\n" if $mod_version or $VERBOSE; $mi->{cpan_dist_fallback}++; } else { $mi->{version_not_on_cpan}++; # Possibly: # - a local change/patch or installed direct from repo # with a version number that was never released. # - a private module never released on cpan. # - a build-time create module eg common/sense.pm.PL warn "$module $mod_version not found on CPAN\n" if $mi->{version} # no version implies uninteresting or $VERBOSE; # XXX could try finding the module with *any* version on cpan # to help with later advice. ie could select as candidates # the version above and the version below the number we have, # and set a flag to inform later logic. } } $mi->{candidate_cpan_dist_releases} = $ccdr if %$ccdr; }; if ($@) { warn "Failed get_candidate_cpan_dist_releases($module, $mod_version, $mod_file_size): $@"; } return $mi; } # pick_best_cpan_dist_release - memoized # for each %$ccdr adds a fraction_installed based on %$installed_mod_info # returns ref to array of %$ccdr values that have the max fraction_installed sub pick_best_cpan_dist_release { my ($ccdr, $installed_mod_info) = @_; for my $release (sort keys %$ccdr) { my $release_info = $ccdr->{$release}; $release_info->{fraction_installed} = dist_fraction_installed($release_info->{author}, $release, $installed_mod_info); $release_info->{percent_installed} # for informal use = sprintf "%.2f", $release_info->{fraction_installed} * 100; } my $max_fraction_installed = max( map { $_->{fraction_installed} } values %$ccdr ); my @best = grep { $_->{fraction_installed} == $max_fraction_installed } values %$ccdr; return \@best; } # returns a number from 0 to 1 representing the fraction of the modules # in a particular release match the coresponding modules in %$installed_mod_info sub dist_fraction_installed { my ($author, $release, $installed_mod_info) = @_; my $tag = "$author/$release"; my $mods_in_rel = get_module_versions_in_release($author, $release); my $mods_in_rel_count = keys %$mods_in_rel; my $mods_inst_count = sum( map { my $mi = $installed_mod_info->{ $_->{name} }; # XXX we stash the version_obj into the mods_in_rel hash # (though with little/no caching effect with current setup) $_->{version_obj} ||= eval { version->parse($_->{version}) }; my $hit = ($mi && $mi->{version_obj} == $_->{version_obj}) ? 1 : 0; # demote to a low-scoring partial match if the file size differs # XXX this isn't good as the effect varies with the number of modules $hit = 0.1 if $mi && $mi->{size} != $_->{size}; warn sprintf "%s %s %s %s: %s\n", $tag, $_->{name}, $_->{version_obj}, $_->{size}, ($hit == 1) ? "matches" : ($mi) ? "differs ($mi->{version_obj}, $mi->{size})" : "not installed", if $DEBUG; $hit; } values %$mods_in_rel) || 0; my $fraction_installed = ($mods_in_rel_count) ? $mods_inst_count/$mods_in_rel_count : 0; warn "$author/$release:\tfraction_installed $fraction_installed ($mods_inst_count/$mods_in_rel_count)\n" if $VERBOSE or !$mods_in_rel_count; return $fraction_installed; } sub get_file_mtime { my ($file) = @_; # try to find the time the file was 'installed' # by looking for the commit date in svn or git # else fallback to the file modification time return (stat($file))[9]; } sub find_installed_modules { my (@dirs) = @_; ### File::Find uses follow_skip => 1 by default, which doesn't die ### on duplicates, unless they are directories or symlinks. ### Ticket #29796 shows this code dying on Alien::WxWidgets, ### which uses symlinks. ### File::Find doc says to use follow_skip => 2 to ignore duplicates ### so this will stop it from dying. my %find_args = ( follow_skip => 2 ); ### File::Find uses lstat, which quietly becomes stat on win32 ### it then uses -l _ which is not allowed by the statbuffer because ### you did a stat, not an lstat (duh!). so don't tell win32 to ### follow symlinks, as that will break badly # XXX disabled because we want the postprocess hook to work #$find_args{'follow_fast'} = 1 unless ON_WIN32; ### never use the @INC hooks to find installed versions of ### modules -- they're just there in case they're not on the ### perl install, but the user shouldn't trust them for *other* ### modules! ### XXX CPANPLUS::inc is now obsolete, remove the calls #local @INC = CPANPLUS::inc->original_inc; # sort @dirs to put longest first to make it easy to handle # elements that are within other elements (e.g., an archdir) my @dirs_ordered = sort { length $b <=> length $a } @dirs; my %seen_mod; my %dir_done; my %meta; # return metadata about the search for my $dir (@dirs_ordered) { next if $dir eq '.'; ### not a directory after all ### may be coderef or some such next unless -d $dir; ### make sure to clean up the directories just in case, ### as we're making assumptions about the length ### This solves rt.cpan issue #19738 ### John M. notes: On VMS cannonpath can not currently handle ### the $dir values that are in UNIX format. $dir = File::Spec->canonpath($dir) unless ON_VMS; ### have to use F::S::Unix on VMS, or things will break my $file_spec = ON_VMS ? 'File::Spec::Unix' : 'File::Spec'; ### XXX in some cases File::Find can actually die! ### so be safe and wrap it in an eval. eval { File::Find::find( { %find_args, postprocess => sub { $dir_done{$File::Find::dir}++; }, wanted => sub { unless (/\.pm$/i) { # skip all dot-dirs (eg .git .svn) $File::Find::prune = 1 if -d $File::Find::name and /^\.\w/; # don't reenter a dir we've already done $File::Find::prune = 1 if $dir_done{$File::Find::name}; # remember perllocal.pod if we see it push @{$meta{perllocalpod}}, $File::Find::name if $_ eq 'perllocal.pod'; return; } my $mod = $File::Find::name; ### make sure it's in Unix format, as it ### may be in VMS format on VMS; $mod = VMS::Filespec::unixify($mod) if ON_VMS; $mod = substr( $mod, length($dir) + 1, -3 ); $mod = join '::', $file_spec->splitdir($mod); return if $seen_mod{$mod}; $seen_mod{$mod} = $File::Find::name; ### ignore files that don't contain a matching package declaration ### warn about those that do contain some kind of package declaration #use File::Slurp; #my $content = read_file($File::Find::name); #unless ( $content =~ m/^ \s* package \s+ (\#.*\n\s*)? $mod \b/xm ) { #warn "No 'package $mod' seen in $File::Find::name\n" #if $VERBOSE && $content =~ /\b package \b/x; #return; #} }, }, $dir ); 1; } or die "File::Find died: $@"; } return (\%seen_mod, \%meta); } sub perllocal_distro_mod_version { my ($distro_key_mod_names, $distname, $perllocalpod) = @_; ( my $dist_mod_name = $distname ) =~ s/-/::/g; my $key_mod_name = $distro_key_mod_names->{$distname} || $dist_mod_name; our $perllocal_distro_mod_version; if (not $perllocal_distro_mod_version) { # initial setup warn "Only first perllocal.pod file will be processed: @$perllocalpod\n" if ref $perllocalpod eq 'ARRAY' and @$perllocalpod > 1; $perllocal_distro_mod_version = {}; # extract data from perllocal.pod if (my $plp = shift @$perllocalpod) { # The VERSION isn't always the same as that in the distro file if (eval { require ExtUtils::Perllocal::Parser }) { my $p = ExtUtils::Perllocal::Parser->new; $perllocal_distro_mod_version = { map { $_->name => $_->{data}{VERSION} } $p->parse_from_file($plp) }; warn "Details of ".keys(%$perllocal_distro_mod_version)." distributions found in $plp\n"; } else { warn "Wanted to use perllocal.pod but can't because ExtUtils::Perllocal::Parser isn't available\n"; } } else { warn "No perllocal.pod found to aid disambiguation\n"; } } return $perllocal_distro_mod_version->{$key_mod_name}; } sub module_progress_indicator { my ($module) = @_; my $crnt = (split /::/, $module)[0]; our $last ||= ''; if ($last ne $crnt) { warn "\t$crnt...\n"; $last = $crnt; } } =head1 OTHERS This module checks $::DEBUG and $::VERBOSE for obvious proposes. This module uses L to communicate with MetaCPAN. Check that module's documentation for options and caching. You can use L to take the list of releases and create a mini-cpan containing them. =head1 AUTHOR Written by Tim Bunce ETim.Bunce@pobox.comE Maintained by Fomberg Shmuel Eshmuelfomberg@gmail.comE, Dan Book Edbook@cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2011-2013 by Tim Bunce. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; DIST_SURVEYOR $fatpacked{"Dist/Surveyor/DB_File.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'DIST_SURVEYOR_DB_FILE'; package Dist::Surveyor::DB_File; use strict; use warnings; use Storable qw(freeze thaw); our $VERSION = '0.022'; our @ISA; if (eval { require DB_File; 1; }) { @ISA = ('DB_File'); } elsif (eval { require SDBM_File; 1; }) { @ISA = ('SDBM_File'); } else { die "Need either DB_file or SDBM_File installed to run"; } # DB_File can store only strings as values, and not Perl structures # this small wrapper fixes the problem sub STORE { my ($self, $key, $val) = @_; $self->SUPER::STORE($key, freeze($val)); } sub FETCH { my ($self, $key) = @_; my $val = $self->SUPER::FETCH($key); return thaw($val); } return 1; DIST_SURVEYOR_DB_FILE $fatpacked{"Dist/Surveyor/Inquiry.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'DIST_SURVEYOR_INQUIRY'; package Dist::Surveyor::Inquiry; use strict; use warnings; use Memoize; # core use FindBin; use Fcntl qw(:DEFAULT :flock); # core use Dist::Surveyor::DB_File; # internal use HTTP::Tiny; use JSON::MaybeXS qw(JSON decode_json); use Scalar::Util qw(looks_like_number); # core use Data::Dumper; use version; our $VERSION = '0.022'; =head1 NAME Dist::Surveyor::Inquiry - Handling the meta-cpan API access for Dist::Surveyor =head1 DESCRIPTION There are a few things that needed to be known in this module: =over =item * $metacpan_size - internally defined global to limit the maximum size of every API call =item * $metacpan_calls - internally defined global counting how many API call happen. =item * This module checks $::DEBUG and $::VERBOSE for obvious proposes. =item * For initating cache-on-disk, call Dist::Surveyor::Inquiry->perma_cache() (this should be usually done, except in testing environment) =back =cut # We have to limit the number of results when using MetaCPAN::API. # We can'r make it too large as it hurts the server (it preallocates) # but need to make it large enough for worst case distros (eg eBay-API). # TODO: switching to the ElasticSearch module, with cursor support, will # probably avoid the need for this. Else we could dynamically adjust. our $metacpan_size = 2500; our $metacpan_calls = 0; our ($DEBUG, $VERBOSE); *DEBUG = \$::DEBUG; *VERBOSE = \$::VERBOSE; require Exporter; our @ISA = qw{Exporter}; our @EXPORT = qw{ get_candidate_cpan_dist_releases get_candidate_cpan_dist_releases_fallback get_module_versions_in_release get_release_info }; my $agent_string = "dist_surveyor/$VERSION"; my ($ua, $wget, $curl); if (HTTP::Tiny->can_ssl) { $ua = HTTP::Tiny->new( agent => $agent_string, timeout => 10, keep_alive => 1, ); } else { # for fatpacking support require File::Which; require IPC::System::Simple; $wget = File::Which::which('wget'); $curl = File::Which::which('curl'); } sub _https_request { my ($method, $url, $headers, $content) = @_; $headers ||= {}; $method = uc($method || 'GET'); if (defined $ua) { my %options; $options{headers} = $headers if %$headers; $options{content} = $content if defined $content; my $response = $ua->request($method, $url, \%options); unless ($response->{success}) { die "Transport error: $response->{content}\n" if $response->{status} == 599; die "HTTP error: $response->{status} $response->{reason}\n"; } return $response->{content}; } elsif (defined $wget) { my @args = ('-q', '-O', '-', '-U', $agent_string, '-T', 10, '--method', $method); push @args, '--header', "$_: $headers->{$_}" for keys %$headers; push @args, '--body-data', $content if defined $content; return IPC::System::Simple::capturex($wget, @args, $url); } elsif (defined $curl) { my @args = ('-s', '-S', '-L', '-A', $agent_string, '--connect-timeout', 10, '-X', $method); push @args, '-H', "$_: $headers->{$_}" for keys %$headers; push @args, '--data-raw', $content if defined $content; return IPC::System::Simple::capturex($curl, @args, $url); } else { die "None of IO::Socket::SSL, wget, or curl are available; cannot make HTTPS requests."; } } # caching via persistent memoize my %memoize_cache; my $locking_file; =head1 CLASS METHODS =head2 Dist::Surveyor::Inquiry->perma_cache() Enable caching to disk of all the MetaCPAN API requests. This cache can grew to be quite big - 40MB is one case, but it worth it, as if you will need to run this program again, it will run much faster. =cut sub perma_cache { my $class = shift; my $db_generation = 3; # XXX increment on incompatible change my $pname = $FindBin::Script; $pname =~ s/\..*$//; my $memoize_file = "$pname-$db_generation.db"; open $locking_file, ">", "$memoize_file.lock" or die "Unable to open lock file: $!"; flock ($locking_file, LOCK_EX) || die "flock: $!"; tie %memoize_cache => 'Dist::Surveyor::DB_File', $memoize_file, O_CREAT|O_RDWR, 0640 or die "Unable to use persistent cache: $!"; } my @memoize_subs = qw( get_candidate_cpan_dist_releases get_candidate_cpan_dist_releases_fallback get_module_versions_in_release get_release_info ); for my $subname (@memoize_subs) { my %memoize_args = ( SCALAR_CACHE => [ HASH => \%memoize_cache ], LIST_CACHE => 'FAULT', NORMALIZER => sub { return join("\034", $subname, @_) } ); memoize($subname, %memoize_args); } =head1 FUNCTIONS =head2 get_release_info($author, $release) Receive release info, such as: get_release_info('SEMUELF', 'Dist-Surveyor-0.009') Returns a hashref containing all that release meta information, returned by C (but not information on the files inside the module) Dies on HTTP error, and warns on empty response. =cut sub get_release_info { my ($author, $release) = @_; $metacpan_calls++; my $response = _https_request(GET => "https://fastapi.metacpan.org/v1/release/$author/$release"); my $release_data = decode_json $response; if (!$release_data or !$release_data->{release}) { warn "Can't find release details for $author/$release - SKIPPED!\n"; return; # XXX could fake some of $release_data instead } return $release_data->{release}; } =head2 get_candidate_cpan_dist_releases($module, $version, $file_size) Return a hashref containing all the releases that contain this module (with the specific version and file size combination) The keys are the release name (i.e. 'Dist-Surveyor-0.009') and the value is a hashref containing release information and file information: 'Dist-Surveyor-0.009' => { # release information 'date' => '2013-02-20T06:48:35.000Z', 'version' => '0.009', 'author' => 'SEMUELF', 'version_numified' => '0.009', 'release' => 'Dist-Surveyor-0.009', 'distribution' => 'Dist-Surveyor', 'version_obj' => , # File information 'path' => 'lib/Dist/Surveyor/DB_File.pm', 'stat.mtime' => 1361342736, 'module.version' => '0.009' 'module.version_numified' => '0.009', } =cut sub get_candidate_cpan_dist_releases { my ($module, $version, $file_size) = @_; my $funcstr = "get_candidate_cpan_dist_releases($module, $version, $file_size)"; my $version_qual = _prepare_version_query(0, $version); my @and_quals = ( {"term" => {"module.name" => $module }}, (@$version_qual > 1 ? { "bool" => { "should" => $version_qual } } : $version_qual->[0]), ); push @and_quals, {"term" => {"stat.size" => $file_size }} if $file_size; # XXX doesn't cope with odd cases like # http://explorer.metacpan.org/?url=/module/MLEHMANN/common-sense-3.4/sense.pm.PL $metacpan_calls++; my $query = { "size" => $metacpan_size, "query" => { "bool" => { "filter" => \@and_quals, }}, "fields" => [qw( release _parent author version version_numified module.version module.version_numified date stat.mtime distribution path )] }; my $response = _https_request(POST => 'https://fastapi.metacpan.org/v1/file', { 'Content-Type' => 'application/json;charset=UTF-8' }, JSON->new->utf8->canonical->encode($query), ); return _process_response($funcstr, $response); } =head2 get_candidate_cpan_dist_releases_fallback($module, $version) Similar to get_candidate_cpan_dist_releases, but getting called when get_candidate_cpan_dist_releases fails for find matching file and release. Maybe the file was tempared somehow, so the file size does not match anymore. =cut sub get_candidate_cpan_dist_releases_fallback { my ($module, $version) = @_; # fallback to look for distro of the same name as the module # for odd cases like # http://explorer.metacpan.org/?url=/module/MLEHMANN/common-sense-3.4/sense.pm.PL (my $distname = $module) =~ s/::/-/g; my $version_qual = _prepare_version_query(1, $version); my @and_quals = ( {"term" => {"distribution" => $distname }}, (@$version_qual > 1 ? { "bool" => { "should" => $version_qual } } : $version_qual->[0]), ); # XXX doesn't cope with odd cases like $metacpan_calls++; my $query = { "size" => $metacpan_size, "query" => { "bool" => { "filter" => \@and_quals, }}, "fields" => [qw( release _parent author version version_numified module.version module.version_numified date stat.mtime distribution path)] }; my $response = _https_request(POST => 'https://fastapi.metacpan.org/v1/file', { 'Content-Type' => 'application/json;charset=UTF-8' }, JSON->new->utf8->canonical->encode($query), ); return _process_response("get_candidate_cpan_dist_releases_fallback($module, $version)", $response); } sub _prepare_version_query { my ($is_fallback, $version) = @_; $version = 0 if not defined $version; # XXX my ($v_key, $num_key) = $is_fallback ? qw{ version version_numified } : qw{ module.version module.version_numified }; # timbunce: So, the current situation is that: version_numified is a float # holding version->parse($raw_version)->numify, and version is a string # also holding version->parse($raw_version)->numify at the moment, and # that'll change to ->stringify at some point. Is that right now? # mo: yes, I already patched the indexer, so new releases are already # indexed ok, but for older ones I need to reindex cpan my $v = (ref $version && $version->isa('version')) ? $version : version->parse($version); my %v = map { $_ => 1 } "$version", $v->stringify, $v->numify; my @version_qual; push @version_qual, { term => { $v_key => $_ } } for keys %v; push @version_qual, { term => { $num_key => $_ }} for grep { looks_like_number($_) } keys %v; return \@version_qual; } sub _process_response { my ($funcname, $response) = @_; my $results = decode_json $response; my $hits = $results->{hits}{hits}; die "$funcname: too many results (>$metacpan_size)" if @$hits >= $metacpan_size; warn "$funcname: ".Dumper($results) if grep { not $_->{fields}{release} } @$hits; # XXX temp, seen once but not since # filter out perl-like releases @$hits = grep { $_->{fields}{path} !~ m!^(?:t|xt|tests?|inc|samples?|ex|examples?|bak|local-lib)\b! } grep { $_->{fields}{release} !~ /^(perl|ponie|parrot|kurila|SiePerl-)/ } @$hits; for my $hit (@$hits) { $hit->{release_id} = delete $hit->{_parent}; # add version_obj for convenience (will fail and be undef for releases like "0.08124-TRIAL") $hit->{fields}{version_obj} = eval { version->parse($hit->{fields}{version}) }; } # we'll return { "Dist-Name-Version" => { details }, ... } my %dists = map { $_->{fields}{release} => $_->{fields} } @$hits; warn "$funcname: @{[ sort keys %dists ]}\n" if $VERBOSE; return \%dists; } =head2 get_module_versions_in_release($author, $release) Receive release info, such as: get_module_versions_in_release('SEMUELF', 'Dist-Surveyor-0.009') And returns a hashref, that contains one entry for each module that exists in the release. module information is the format: 'Dist::Surveyor' => { 'version' => '0.009', 'name' => 'Dist::Surveyor', 'path' => 'lib/Dist/Surveyor.pm', 'size' => 43879 }, this function can be called for all sorts of releases that are only vague possibilities and aren't actually installed, so generally it's quiet =cut sub get_module_versions_in_release { my ($author, $release) = @_; $metacpan_calls++; my $results = eval { my $query = { "size" => $metacpan_size, "query" => { "bool" => { "filter" => [ {"term" => {"release" => $release }}, {"term" => {"author" => $author }}, {"term" => {"mime" => "text/x-script.perl-module"}}, ], }}, "fields" => ["path","name","stat.size"], "inner_hits" => {"module" => {"path" => {"module" => {}}}}, }; my $response = _https_request(POST => 'https://fastapi.metacpan.org/v1/file', { 'Content-Type' => 'application/json;charset=UTF-8' }, JSON->new->utf8->canonical->encode($query), ); decode_json $response; }; if (not $results) { warn "Failed get_module_versions_in_release for $author/$release: $@"; return {}; } my $hits = $results->{hits}{hits}; die "get_module_versions_in_release($author, $release): too many results" if @$hits >= $metacpan_size; my %modules_in_release; for my $hit (@$hits) { my $path = $hit->{fields}{path}; # XXX try to ignore files that won't get installed # XXX should use META noindex! if ($path =~ m!^(?:t|xt|tests?|inc|samples?|ex|examples?|bak|local-lib)\b!) { warn "$author/$release: ignored non-installed module $path\n" if $DEBUG; next; } my $size = $hit->{fields}{"stat.size"}; # files can contain more than one package ('module') my $rel_mods = $hit->{inner_hits}{module}{hits}{hits} || []; for my $inner_hit (@$rel_mods) { # actually packages in the file my $mod = $inner_hit->{_source}; # Some files may contain multiple packages. We want to ignore # all except the one that matches the name of the file. # We use a fairly loose (but still very effective) test because we # can't rely on $path including the full package name. (my $filebasename = $hit->{fields}{name}) =~ s/\.pm$//; if ($mod->{name} !~ m/\b$filebasename$/) { warn "$author/$release: ignored $mod->{name} in $path\n" if $DEBUG; next; } # warn if package previously seen in this release # with a different version or file size if (my $prev = $modules_in_release{$mod->{name}}) { my $version_obj = eval { version->parse($mod->{version}) }; die "$author/$release: $mod $mod->{version}: $@" if $@; if ($VERBOSE) { # XXX could add a show-only-once cache here my $msg = "$mod->{name} $mod->{version} ($size) seen in $path after $prev->{path} $prev->{version} ($prev->{size})"; warn "$release: $msg\n" if ($version_obj != version->parse($prev->{version}) or $size != $prev->{size}); } } # keep result small as Storable thawing this is major runtime cost # (specifically we avoid storing a version_obj here) $modules_in_release{$mod->{name}} = { name => $mod->{name}, path => $path, version => $mod->{version}, size => $size, }; } } warn "\n$author/$release contains: @{[ map { qq($_->{name} $_->{version}) } values %modules_in_release ]}\n" if $DEBUG; return \%modules_in_release; } =head1 License, Copyright Please see L for details =cut 1; DIST_SURVEYOR_INQUIRY $fatpacked{"Dist/Surveyor/MakeCpan.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'DIST_SURVEYOR_MAKECPAN'; package Dist::Surveyor::MakeCpan; use strict; use warnings; use Carp; # core use Data::Dumper; # core use File::Path; # core use CPAN::DistnameInfo; use File::Basename qw{dirname}; # core use HTTP::Tiny; use Dist::Surveyor::Inquiry; use List::Util qw(max); # core our $VERSION = '0.022'; our $verbose; *verbose = \$::VERBOSE; sub new { my ($class, $cpan_dir, $progname, $irregularities) = @_; require Compress::Zlib; mkpath("$cpan_dir/modules"); # --- write extra data files that may be useful XXX may change # XXX these don't all (yet?) merge with existing data my $survey_datadump_dir = "$cpan_dir/$progname"; mkpath($survey_datadump_dir); # Write list of releases, like default stdout open my $rel_fh, ">", "$survey_datadump_dir/releases.txt"; # dump the primary result data for additional info and debugging my $gzwrite = Compress::Zlib::gzopen("$survey_datadump_dir/_data_dump.perl.gz", 'wb') or croak "Cannot open $survey_datadump_dir/_data_dump.perl.gz for writing: " . $Compress::Zlib::gzerrno; $gzwrite->gzwrite("[\n"); my $self = { errors => 0, cpan_dir => $cpan_dir, irregularities => $irregularities, pkg_ver_rel => {}, # for 02packages progname => $progname, rel_fh => $rel_fh, gzwrite => $gzwrite, }; return bless $self, $class; } sub close { my $self = shift; # --- write 02packages file my $pkg_lines = _readpkgs($self->{cpan_dir}); my %packages; for my $line (@$pkg_lines, map { $_->{line} } values %{ $self->{pkg_ver_rel} }) { my ($pkg) = split(/\s+/, $line, 2); if ($packages{$pkg} and $packages{$pkg} ne $line) { warn "Old $packages{$pkg}\nNew $line\n" if $verbose; } $packages{$pkg} = $line; }; _writepkgs($self->{cpan_dir}, [ sort { lc $a cmp lc $b } values %packages ] ); # Write list of token packages - each should match only one release. # This makes it _much_ faster to do installs via cpanm because it # can skip the modules it knows are installed (whereas using a list of # distros it has to reinstall _all_ of them every time). # XXX maybe add as a separate option: "--mainpkgs mainpkgs.lst" my %dist_packages; while ( my ($pkg, $line) = each %packages) { my $distpath = (split /\s+/, $line)[2]; $dist_packages{$distpath}{$pkg}++; } my %token_package; my %token_package_pri = ( # alter install order for some modules 'Module::Build' => 100, # should be near first Moose => 50, # install distros that use Module::Install late so their dependencies # have already been resolved (else they try to fetch them directly, # bypassing our cpanm --mirror-only goal) 'Olson::Abbreviations' => -90, # distros with special needs 'Term::ReadKey' => -100, # tests hang if run in background ); for my $distpath (sort keys %dist_packages) { my $dp = $dist_packages{$distpath}; my $di = CPAN::DistnameInfo->new($distpath); #warn Dumper([ $distpath, $di->dist, $di]); (my $token_pkg = $di->dist) =~ s/-/::/g; if (!$dp->{$token_pkg}) { if (my $keypkg = $self->{irregularities}->{$di->dist}) { $token_pkg = $keypkg; } else { # XXX not good - may pick a dummy test package $token_pkg = (grep { $_ } keys %$dp)[0] || $token_pkg; warn "Picked $token_pkg as token package for ".$di->distvname."\n"; } } $token_package{$token_pkg} = $token_package_pri{$token_pkg} || 0; } my @main_pkgs = sort { $token_package{$b} <=> $token_package{$a} or $a cmp $b } keys %token_package; open my $key_pkg_fh, ">", join('/', $self->{cpan_dir}, $self->{progname}, "token_packages.txt"); print $key_pkg_fh "$_\n" for @main_pkgs; close $key_pkg_fh; close $self->{rel_fh}; $self->{gzwrite}->gzwrite("]\n"); $self->{gzwrite}->gzclose; warn $self->{cpan_dir}." updated.\n"; return $self->{errors}; } sub add_release { my ($self, $ri) = @_; # --- get the file my $main_url = $ri->{download_url}; my $di = distname_info_from_url($main_url); my $pathfile = "authors/id/".$di->pathname; my $destfile = $self->{cpan_dir}."/$pathfile"; mkpath(dirname($destfile)); my @urls = ($main_url); for my $mirror ('http://backpan.perl.org') { push @urls, "$mirror/$pathfile"; } my $mirror_status; my $ua = HTTP::Tiny->new(agent => "dist_surveyor/$VERSION"); for my $url (@urls) { $mirror_status = $ua->mirror($url, $destfile); last if $mirror_status->{success}; } if (!$mirror_status->{success}) { my $err = $mirror_status->{status} == 599 ? $mirror_status->{content} : $mirror_status->{status}; my $msg = "Error $err mirroring $main_url"; if (-f $destfile) { warn "$msg - using existing file\n"; } else { # better to keep going and add the packages to the index # than abort at this stage due to network/mirror problems # the user can drop the files in later warn "$msg - continuing, ADD FILE MANUALLY!\n"; $self->{errors}++; } } else { warn "$mirror_status->{status} $main_url\n" if $verbose; } my $mods_in_rel = get_module_versions_in_release($ri->{author}, $ri->{name}); if (!keys %$mods_in_rel) { # XXX hack for common::sense (my $dist_as_pkg = $ri->{distribution}) =~ s/-/::/g; warn "$ri->{author}/$ri->{name} has no modules! Adding fake module $dist_as_pkg ".$di->version."\n"; $mods_in_rel->{$dist_as_pkg} = { name => $dist_as_pkg, version => $di->version, version_obj => version->parse($di->version), }; } # --- accumulate package info for 02packages file for my $pkg (sort keys %$mods_in_rel ) { # pi => { name=>, version=>, version_obj=> } my $pi = $mods_in_rel->{$pkg}; # for selecting which dist a package belongs to # XXX should factor in authorization status my $p_r_match_score = p_r_match_score($pkg, $ri); if (my $pvr = $self->{pkg_ver_rel}->{$pkg}) { # already seen same package name in different distribution if ($p_r_match_score < $pvr->{p_r_match_score}) { warn "$pkg seen in $pvr->{ri}{name} so ignoring one in $ri->{name}\n"; next; } warn "$pkg seen in $pvr->{ri}{name} - now overridden by $ri->{name}\n"; } my $line = _fmtmodule($pkg, $di->pathname, $pi->{version}); $self->{pkg_ver_rel}->{$pkg} = { line => $line, pi => $pi, ri => $ri, p_r_match_score => $p_r_match_score }; } printf { $self->{rel_fh} } "%s\n", ( exists $ri->{url} ? $ri->{url} : "?url" ); $self->{gzwrite}->gzwrite(Dumper($ri)); $self->{gzwrite}->gzwrite(","); } sub p_r_match_score { my ($pkg_name, $ri) = @_; my @p = split /\W/, $pkg_name; my @r = split /\W/, $ri->{name}; for my $i (0..max(scalar @p, scalar @r)) { return $i if not defined $p[$i] or not defined $r[$i] or $p[$i] ne $r[$i] } die; # unreached } # copied from CPAN::Mini::Inject and hacked sub _readpkgs { my ($cpandir) = @_; my $packages_file = $cpandir.'/modules/02packages.details.txt.gz'; return [] if not -f $packages_file; my $gzread = Compress::Zlib::gzopen($packages_file, 'rb') or croak "Cannot open $packages_file: " . $Compress::Zlib::gzerrno . "\n"; my $inheader = 1; my @packages; my $package; while ( $gzread->gzreadline( $package ) ) { if ( $inheader ) { $inheader = 0 unless $package =~ /\S/; next; } chomp $package; push @packages, $package; } $gzread->gzclose; return \@packages; } sub _writepkgs { my ($cpandir, $pkgs) = @_; my $packages_file = $cpandir.'/modules/02packages.details.txt.gz'; my $gzwrite = Compress::Zlib::gzopen($packages_file, 'wb') or croak "Cannot open $packages_file for writing: " . $Compress::Zlib::gzerrno; $gzwrite->gzwrite( "File: 02packages.details.txt\n" ); $gzwrite->gzwrite( "URL: http://www.perl.com/CPAN/modules/02packages.details.txt\n" ); $gzwrite->gzwrite( 'Description: Package names found in directory $CPAN/authors/id/' . "\n" ); $gzwrite->gzwrite( "Columns: package name, version, path\n" ); $gzwrite->gzwrite( "Intended-For: Automated fetch routines, namespace documentation.\n" ); $gzwrite->gzwrite( "Written-By: $0 0.001\n" ); # XXX TODO $gzwrite->gzwrite( "Line-Count: " . scalar( @$pkgs ) . "\n" ); # Last-Updated: Sat, 19 Mar 2005 19:49:10 GMT my @date = split( /\s+/, scalar( gmtime ) ); $gzwrite->gzwrite( "Last-Updated: $date[0], $date[2] $date[1] $date[4] $date[3] GMT\n\n" ); $gzwrite->gzwrite( "$_\n" ) for ( @$pkgs ); $gzwrite->gzclose; } sub distname_info_from_url { my ($url) = @_; $url =~ s{.* \b authors/id/ }{}x or warn "No authors/ in '$url'\n"; my $di = CPAN::DistnameInfo->new($url); return $di; } sub _fmtmodule { my ( $module, $file, $version ) = @_; $version = "undef" if not defined $version; my $fw = 38 - length $version; $fw = length $module if $fw < length $module; return sprintf "%-${fw}s %s %s", $module, $version, $file; } sub errors { my $self = shift; return $self->{errors}; } 1; =head1 NAME Dist::Surveyor::MakeCpan - Create a Mini-CPAN for the surveyed modules =head1 SYNOPSIS use Dist::Surveyor::MakeCpan; my $cpan = Dist::Surveyor::MakeCpan->new( $cpan_dir, $progname, $irregularities); foreach my $rel (@releases) { $cpan->add_release($rel); } $cpan->close(); say "There where ", $cpan->errors(), " errors"; =head1 DESCRIPTION Create a mini-CPAN for the surveyed modules, so you will be able to re-install the same setup in a new computer. =head1 CONSTRUCTOR my $cpan = Dist::Surveyor::MakeCpan->new( $cpan_dir, $progname, $irregularities, $verbose); =over =item $cpan_dir The directory where the mini-cpan will be created =item $progname The name of the running program - will be used to create a subdirectory inside $cpan_dir, that will contain debug information. =item $irregularities A hashref with a list of irregular named releases. i.e. 'libwww-perl' => 'LWP'. =back =head1 METHODS =head2 $cpan->add_release($rel) Add one release to the mini-cpan. the $rel should be a hashref, and contain the following fields: $rel = { download_url => 'http://cpan.metacpan.org/authors/id/S/SE/SEMUELF/Dist-Surveyor-0.009.tar.gz', url => 'authors/id/S/SE/SEMUELF/Dist-Surveyor-0.009.tar.gz', author => 'SEMUELF', name => 'Dist-Surveyor-0.009', distribution => 'Dist-Surveyor', } =head2 $cpan->close() Close the mini-CPAN, and close all the debug data dump files. =head1 License, Copyright Please see L for details =cut DIST_SURVEYOR_MAKECPAN $fatpacked{"Exporter.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'EXPORTER'; package Exporter; require 5.006; # Be lean. #use strict; #no strict 'refs'; our $Debug = 0; our $ExportLevel = 0; our $Verbose ||= 0; our $VERSION = '5.72'; our (%Cache); sub as_heavy { require Exporter::Heavy; # Unfortunately, this does not work if the caller is aliased as *name = \&foo # Thus the need to create a lot of identical subroutines my $c = (caller(1))[3]; $c =~ s/.*:://; \&{"Exporter::Heavy::heavy_$c"}; } sub export { goto &{as_heavy()}; } sub import { my $pkg = shift; my $callpkg = caller($ExportLevel); if ($pkg eq "Exporter" and @_ and $_[0] eq "import") { *{$callpkg."::import"} = \&import; return; } # We *need* to treat @{"$pkg\::EXPORT_FAIL"} since Carp uses it :-( my $exports = \@{"$pkg\::EXPORT"}; # But, avoid creating things if they don't exist, which saves a couple of # hundred bytes per package processed. my $fail = ${$pkg . '::'}{EXPORT_FAIL} && \@{"$pkg\::EXPORT_FAIL"}; return export $pkg, $callpkg, @_ if $Verbose or $Debug or $fail && @$fail > 1; my $export_cache = ($Cache{$pkg} ||= {}); my $args = @_ or @_ = @$exports; if ($args and not %$export_cache) { s/^&//, $export_cache->{$_} = 1 foreach (@$exports, @{"$pkg\::EXPORT_OK"}); } my $heavy; # Try very hard not to use {} and hence have to enter scope on the foreach # We bomb out of the loop with last as soon as heavy is set. if ($args or $fail) { ($heavy = (/\W/ or $args and not exists $export_cache->{$_} or $fail and @$fail and $_ eq $fail->[0])) and last foreach (@_); } else { ($heavy = /\W/) and last foreach (@_); } return export $pkg, $callpkg, ($args ? @_ : ()) if $heavy; local $SIG{__WARN__} = sub {require Carp; &Carp::carp} if not $SIG{__WARN__}; # shortcut for the common case of no type character *{"$callpkg\::$_"} = \&{"$pkg\::$_"} foreach @_; } # Default methods sub export_fail { my $self = shift; @_; } # Unfortunately, caller(1)[3] "does not work" if the caller is aliased as # *name = \&foo. Thus the need to create a lot of identical subroutines # Otherwise we could have aliased them to export(). sub export_to_level { goto &{as_heavy()}; } sub export_tags { goto &{as_heavy()}; } sub export_ok_tags { goto &{as_heavy()}; } sub require_version { goto &{as_heavy()}; } 1; __END__ =head1 NAME Exporter - Implements default import method for modules =head1 SYNOPSIS In module F: package YourModule; require Exporter; @ISA = qw(Exporter); @EXPORT_OK = qw(munge frobnicate); # symbols to export on request or package YourModule; use Exporter 'import'; # gives you Exporter's import() method directly @EXPORT_OK = qw(munge frobnicate); # symbols to export on request In other files which wish to use C: use YourModule qw(frobnicate); # import listed symbols frobnicate ($left, $right) # calls YourModule::frobnicate Take a look at L for some variants you will like to use in modern Perl code. =head1 DESCRIPTION The Exporter module implements an C method which allows a module to export functions and variables to its users' namespaces. Many modules use Exporter rather than implementing their own C method because Exporter provides a highly flexible interface, with an implementation optimised for the common case. Perl automatically calls the C method when processing a C statement for a module. Modules and C are documented in L and L. Understanding the concept of modules and how the C statement operates is important to understanding the Exporter. =head2 How to Export The arrays C<@EXPORT> and C<@EXPORT_OK> in a module hold lists of symbols that are going to be exported into the users name space by default, or which they can request to be exported, respectively. The symbols can represent functions, scalars, arrays, hashes, or typeglobs. The symbols must be given by full name with the exception that the ampersand in front of a function is optional, e.g. @EXPORT = qw(afunc $scalar @array); # afunc is a function @EXPORT_OK = qw(&bfunc %hash *typeglob); # explicit prefix on &bfunc If you are only exporting function names it is recommended to omit the ampersand, as the implementation is faster this way. =head2 Selecting What to Export Do B export method names! Do B export anything else by default without a good reason! Exports pollute the namespace of the module user. If you must export try to use C<@EXPORT_OK> in preference to C<@EXPORT> and avoid short or common symbol names to reduce the risk of name clashes. Generally anything not exported is still accessible from outside the module using the C (or C<< $blessed_ref->method >>) syntax. By convention you can use a leading underscore on names to informally indicate that they are 'internal' and not for public use. (It is actually possible to get private functions by saying: my $subref = sub { ... }; $subref->(@args); # Call it as a function $obj->$subref(@args); # Use it as a method However if you use them for methods it is up to you to figure out how to make inheritance work.) As a general rule, if the module is trying to be object oriented then export nothing. If it's just a collection of functions then C<@EXPORT_OK> anything but use C<@EXPORT> with caution. For function and method names use barewords in preference to names prefixed with ampersands for the export lists. Other module design guidelines can be found in L. =head2 How to Import In other files which wish to use your module there are three basic ways for them to load your module and import its symbols: =over 4 =item C This imports all the symbols from YourModule's C<@EXPORT> into the namespace of the C statement. =item C This causes perl to load your module but does not import any symbols. =item C This imports only the symbols listed by the caller into their namespace. All listed symbols must be in your C<@EXPORT> or C<@EXPORT_OK>, else an error occurs. The advanced export features of Exporter are accessed like this, but with list entries that are syntactically distinct from symbol names. =back Unless you want to use its advanced features, this is probably all you need to know to use Exporter. =head1 Advanced Features =head2 Specialised Import Lists If any of the entries in an import list begins with !, : or / then the list is treated as a series of specifications which either add to or delete from the list of names to import. They are processed left to right. Specifications are in the form: [!]name This name only [!]:DEFAULT All names in @EXPORT [!]:tag All names in $EXPORT_TAGS{tag} anonymous array [!]/pattern/ All names in @EXPORT and @EXPORT_OK which match A leading ! indicates that matching names should be deleted from the list of names to import. If the first specification is a deletion it is treated as though preceded by :DEFAULT. If you just want to import extra names in addition to the default set you will still need to include :DEFAULT explicitly. e.g., F defines: @EXPORT = qw(A1 A2 A3 A4 A5); @EXPORT_OK = qw(B1 B2 B3 B4 B5); %EXPORT_TAGS = (T1 => [qw(A1 A2 B1 B2)], T2 => [qw(A1 A2 B3 B4)]); Note that you cannot use tags in @EXPORT or @EXPORT_OK. Names in EXPORT_TAGS must also appear in @EXPORT or @EXPORT_OK. An application using Module can say something like: use Module qw(:DEFAULT :T2 !B3 A3); Other examples include: use Socket qw(!/^[AP]F_/ !SOMAXCONN !SOL_SOCKET); use POSIX qw(:errno_h :termios_h !TCSADRAIN !/^EXIT/); Remember that most patterns (using //) will need to be anchored with a leading ^, e.g., C rather than C. You can say C to see how the specifications are being processed and what is actually being imported into modules. =head2 Exporting Without Using Exporter's import Method Exporter has a special method, 'export_to_level' which is used in situations where you can't directly call Exporter's import method. The export_to_level method looks like: MyPackage->export_to_level( $where_to_export, $package, @what_to_export ); where C<$where_to_export> is an integer telling how far up the calling stack to export your symbols, and C<@what_to_export> is an array telling what symbols *to* export (usually this is C<@_>). The C<$package> argument is currently unused. For example, suppose that you have a module, A, which already has an import function: package A; @ISA = qw(Exporter); @EXPORT_OK = qw($b); sub import { $A::b = 1; # not a very useful import method } and you want to Export symbol C<$A::b> back to the module that called package A. Since Exporter relies on the import method to work, via inheritance, as it stands Exporter::import() will never get called. Instead, say the following: package A; @ISA = qw(Exporter); @EXPORT_OK = qw($b); sub import { $A::b = 1; A->export_to_level(1, @_); } This will export the symbols one level 'above' the current package - ie: to the program or module that used package A. Note: Be careful not to modify C<@_> at all before you call export_to_level - or people using your package will get very unexplained results! =head2 Exporting Without Inheriting from Exporter By including Exporter in your C<@ISA> you inherit an Exporter's import() method but you also inherit several other helper methods which you probably don't want. To avoid this you can do: package YourModule; use Exporter qw(import); which will export Exporter's own import() method into YourModule. Everything will work as before but you won't need to include Exporter in C<@YourModule::ISA>. Note: This feature was introduced in version 5.57 of Exporter, released with perl 5.8.3. =head2 Module Version Checking The Exporter module will convert an attempt to import a number from a module into a call to C<< $module_name->VERSION($value) >>. This can be used to validate that the version of the module being used is greater than or equal to the required version. For historical reasons, Exporter supplies a C method that simply delegates to C. Originally, before C existed, Exporter would call C. Since the C method treats the C<$VERSION> number as a simple numeric value it will regard version 1.10 as lower than 1.9. For this reason it is strongly recommended that you use numbers with at least two decimal places, e.g., 1.09. =head2 Managing Unknown Symbols In some situations you may want to prevent certain symbols from being exported. Typically this applies to extensions which have functions or constants that may not exist on some systems. The names of any symbols that cannot be exported should be listed in the C<@EXPORT_FAIL> array. If a module attempts to import any of these symbols the Exporter will give the module an opportunity to handle the situation before generating an error. The Exporter will call an export_fail method with a list of the failed symbols: @failed_symbols = $module_name->export_fail(@failed_symbols); If the C method returns an empty list then no error is recorded and all the requested symbols are exported. If the returned list is not empty then an error is generated for each symbol and the export fails. The Exporter provides a default C method which simply returns the list unchanged. Uses for the C method include giving better error messages for some symbols and performing lazy architectural checks (put more symbols into C<@EXPORT_FAIL> by default and then take them out if someone actually tries to use them and an expensive check shows that they are usable on that platform). =head2 Tag Handling Utility Functions Since the symbols listed within C<%EXPORT_TAGS> must also appear in either C<@EXPORT> or C<@EXPORT_OK>, two utility functions are provided which allow you to easily add tagged sets of symbols to C<@EXPORT> or C<@EXPORT_OK>: %EXPORT_TAGS = (foo => [qw(aa bb cc)], bar => [qw(aa cc dd)]); Exporter::export_tags('foo'); # add aa, bb and cc to @EXPORT Exporter::export_ok_tags('bar'); # add aa, cc and dd to @EXPORT_OK Any names which are not tags are added to C<@EXPORT> or C<@EXPORT_OK> unchanged but will trigger a warning (with C<-w>) to avoid misspelt tags names being silently added to C<@EXPORT> or C<@EXPORT_OK>. Future versions may make this a fatal error. =head2 Generating Combined Tags If several symbol categories exist in C<%EXPORT_TAGS>, it's usually useful to create the utility ":all" to simplify "use" statements. The simplest way to do this is: %EXPORT_TAGS = (foo => [qw(aa bb cc)], bar => [qw(aa cc dd)]); # add all the other ":class" tags to the ":all" class, # deleting duplicates { my %seen; push @{$EXPORT_TAGS{all}}, grep {!$seen{$_}++} @{$EXPORT_TAGS{$_}} foreach keys %EXPORT_TAGS; } F creates an ":all" tag which contains some (but not really all) of its categories. That could be done with one small change: # add some of the other ":class" tags to the ":all" class, # deleting duplicates { my %seen; push @{$EXPORT_TAGS{all}}, grep {!$seen{$_}++} @{$EXPORT_TAGS{$_}} foreach qw/html2 html3 netscape form cgi internal/; } Note that the tag names in C<%EXPORT_TAGS> don't have the leading ':'. =head2 Ced Constants Many modules make use of Cing for constant subroutines to avoid having to compile and waste memory on rarely used values (see L for details on constant subroutines). Calls to such constant subroutines are not optimized away at compile time because they can't be checked at compile time for constancy. Even if a prototype is available at compile time, the body of the subroutine is not (it hasn't been Ced yet). perl needs to examine both the C<()> prototype and the body of a subroutine at compile time to detect that it can safely replace calls to that subroutine with the constant value. A workaround for this is to call the constants once in a C block: package My ; use Socket ; foo( SO_LINGER ); ## SO_LINGER NOT optimized away; called at runtime BEGIN { SO_LINGER } foo( SO_LINGER ); ## SO_LINGER optimized away at compile time. This forces the C for C to take place before SO_LINGER is encountered later in C package. If you are writing a package that Cs, consider forcing an C for any constants explicitly imported by other packages or which are usually used when your package is Cd. =head1 Good Practices =head2 Declaring C<@EXPORT_OK> and Friends When using C with the standard C and C pragmas, the C keyword is needed to declare the package variables C<@EXPORT_OK>, C<@EXPORT>, C<@ISA>, etc. our @ISA = qw(Exporter); our @EXPORT_OK = qw(munge frobnicate); If backward compatibility for Perls under 5.6 is important, one must write instead a C statement. use vars qw(@ISA @EXPORT_OK); @ISA = qw(Exporter); @EXPORT_OK = qw(munge frobnicate); =head2 Playing Safe There are some caveats with the use of runtime statements like C and the assignment to package variables, which can be very subtle for the unaware programmer. This may happen for instance with mutually recursive modules, which are affected by the time the relevant constructions are executed. The ideal (but a bit ugly) way to never have to think about that is to use C blocks. So the first part of the L code could be rewritten as: package YourModule; use strict; use warnings; our (@ISA, @EXPORT_OK); BEGIN { require Exporter; @ISA = qw(Exporter); @EXPORT_OK = qw(munge frobnicate); # symbols to export on request } The C will assure that the loading of F and the assignments to C<@ISA> and C<@EXPORT_OK> happen immediately, leaving no room for something to get awry or just plain wrong. With respect to loading C and inheriting, there are alternatives with the use of modules like C and C. use base qw(Exporter); # or use parent qw(Exporter); Any of these statements are nice replacements for C with the same compile-time effect. The basic difference is that C code interacts with declared C while C is a streamlined version of the older C code to just establish the IS-A relationship. For more details, see the documentation and code of L and L. Another thorough remedy to that runtime vs. compile-time trap is to use L, which is a wrapper of Exporter that allows all boilerplate code at a single gulp in the use statement. use Exporter::Easy ( OK => [ qw(munge frobnicate) ], ); # @ISA setup is automatic # all assignments happen at compile time =head2 What Not to Export You have been warned already in L to not export: =over 4 =item * method names (because you don't need to and that's likely to not do what you want), =item * anything by default (because you don't want to surprise your users... badly) =item * anything you don't need to (because less is more) =back There's one more item to add to this list. Do B export variable names. Just because C lets you do that, it does not mean you should. @EXPORT_OK = qw($svar @avar %hvar); # DON'T! Exporting variables is not a good idea. They can change under the hood, provoking horrible effects at-a-distance that are too hard to track and to fix. Trust me: they are not worth it. To provide the capability to set/get class-wide settings, it is best instead to provide accessors as subroutines or class methods instead. =head1 SEE ALSO C is definitely not the only module with symbol exporter capabilities. At CPAN, you may find a bunch of them. Some are lighter. Some provide improved APIs and features. Pick the one that fits your needs. The following is a sample list of such modules. Exporter::Easy Exporter::Lite Exporter::Renaming Exporter::Tidy Sub::Exporter / Sub::Installer Perl6::Export / Perl6::Export::Attrs =head1 LICENSE This library is free software. You can redistribute it and/or modify it under the same terms as Perl itself. =cut EXPORTER $fatpacked{"Exporter/Heavy.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'EXPORTER_HEAVY'; package Exporter::Heavy; use strict; no strict 'refs'; # On one line so MakeMaker will see it. require Exporter; our $VERSION = $Exporter::VERSION; =head1 NAME Exporter::Heavy - Exporter guts =head1 SYNOPSIS (internal use only) =head1 DESCRIPTION No user-serviceable parts inside. =cut # # We go to a lot of trouble not to 'require Carp' at file scope, # because Carp requires Exporter, and something has to give. # sub _rebuild_cache { my ($pkg, $exports, $cache) = @_; s/^&// foreach @$exports; @{$cache}{@$exports} = (1) x @$exports; my $ok = \@{"${pkg}::EXPORT_OK"}; if (@$ok) { s/^&// foreach @$ok; @{$cache}{@$ok} = (1) x @$ok; } } sub heavy_export { # Save the old __WARN__ handler in case it was defined my $oldwarn = $SIG{__WARN__}; # First make import warnings look like they're coming from the "use". local $SIG{__WARN__} = sub { # restore it back so proper stacking occurs local $SIG{__WARN__} = $oldwarn; my $text = shift; if ($text =~ s/ at \S*Exporter\S*.pm line \d+.*\n//) { require Carp; local $Carp::CarpLevel = 1; # ignore package calling us too. Carp::carp($text); } else { warn $text; } }; local $SIG{__DIE__} = sub { require Carp; local $Carp::CarpLevel = 1; # ignore package calling us too. Carp::croak("$_[0]Illegal null symbol in \@${1}::EXPORT") if $_[0] =~ /^Unable to create sub named "(.*?)::"/; }; my($pkg, $callpkg, @imports) = @_; my($type, $sym, $cache_is_current, $oops); my($exports, $export_cache) = (\@{"${pkg}::EXPORT"}, $Exporter::Cache{$pkg} ||= {}); if (@imports) { if (!%$export_cache) { _rebuild_cache ($pkg, $exports, $export_cache); $cache_is_current = 1; } if (grep m{^[/!:]}, @imports) { my $tagsref = \%{"${pkg}::EXPORT_TAGS"}; my $tagdata; my %imports; my($remove, $spec, @names, @allexports); # negated first item implies starting with default set: unshift @imports, ':DEFAULT' if $imports[0] =~ m/^!/; foreach $spec (@imports){ $remove = $spec =~ s/^!//; if ($spec =~ s/^://){ if ($spec eq 'DEFAULT'){ @names = @$exports; } elsif ($tagdata = $tagsref->{$spec}) { @names = @$tagdata; } else { warn qq["$spec" is not defined in %${pkg}::EXPORT_TAGS]; ++$oops; next; } } elsif ($spec =~ m:^/(.*)/$:){ my $patn = $1; @allexports = keys %$export_cache unless @allexports; # only do keys once @names = grep(/$patn/, @allexports); # not anchored by default } else { @names = ($spec); # is a normal symbol name } warn "Import ".($remove ? "del":"add").": @names " if $Exporter::Verbose; if ($remove) { foreach $sym (@names) { delete $imports{$sym} } } else { @imports{@names} = (1) x @names; } } @imports = keys %imports; } my @carp; foreach $sym (@imports) { if (!$export_cache->{$sym}) { if ($sym =~ m/^\d/) { $pkg->VERSION($sym); # inherit from UNIVERSAL # If the version number was the only thing specified # then we should act as if nothing was specified: if (@imports == 1) { @imports = @$exports; last; } # We need a way to emulate 'use Foo ()' but still # allow an easy version check: "use Foo 1.23, ''"; if (@imports == 2 and !$imports[1]) { @imports = (); last; } } elsif ($sym !~ s/^&// || !$export_cache->{$sym}) { # Last chance - see if they've updated EXPORT_OK since we # cached it. unless ($cache_is_current) { %$export_cache = (); _rebuild_cache ($pkg, $exports, $export_cache); $cache_is_current = 1; } if (!$export_cache->{$sym}) { # accumulate the non-exports push @carp, qq["$sym" is not exported by the $pkg module\n]; $oops++; } } } } if ($oops) { require Carp; Carp::croak("@{carp}Can't continue after import errors"); } } else { @imports = @$exports; } my($fail, $fail_cache) = (\@{"${pkg}::EXPORT_FAIL"}, $Exporter::FailCache{$pkg} ||= {}); if (@$fail) { if (!%$fail_cache) { # Build cache of symbols. Optimise the lookup by adding # barewords twice... both with and without a leading &. # (Technique could be applied to $export_cache at cost of memory) my @expanded = map { /^\w/ ? ($_, '&'.$_) : $_ } @$fail; warn "${pkg}::EXPORT_FAIL cached: @expanded" if $Exporter::Verbose; @{$fail_cache}{@expanded} = (1) x @expanded; } my @failed; foreach $sym (@imports) { push(@failed, $sym) if $fail_cache->{$sym} } if (@failed) { @failed = $pkg->export_fail(@failed); foreach $sym (@failed) { require Carp; Carp::carp(qq["$sym" is not implemented by the $pkg module ], "on this architecture"); } if (@failed) { require Carp; Carp::croak("Can't continue after import errors"); } } } warn "Importing into $callpkg from $pkg: ", join(", ",sort @imports) if $Exporter::Verbose; foreach $sym (@imports) { # shortcut for the common case of no type character (*{"${callpkg}::$sym"} = \&{"${pkg}::$sym"}, next) unless $sym =~ s/^(\W)//; $type = $1; no warnings 'once'; *{"${callpkg}::$sym"} = $type eq '&' ? \&{"${pkg}::$sym"} : $type eq '$' ? \${"${pkg}::$sym"} : $type eq '@' ? \@{"${pkg}::$sym"} : $type eq '%' ? \%{"${pkg}::$sym"} : $type eq '*' ? *{"${pkg}::$sym"} : do { require Carp; Carp::croak("Can't export symbol: $type$sym") }; } } sub heavy_export_to_level { my $pkg = shift; my $level = shift; (undef) = shift; # XXX redundant arg my $callpkg = caller($level); $pkg->export($callpkg, @_); } # Utility functions sub _push_tags { my($pkg, $var, $syms) = @_; my @nontag = (); my $export_tags = \%{"${pkg}::EXPORT_TAGS"}; push(@{"${pkg}::$var"}, map { $export_tags->{$_} ? @{$export_tags->{$_}} : scalar(push(@nontag,$_),$_) } (@$syms) ? @$syms : keys %$export_tags); if (@nontag and $^W) { # This may change to a die one day require Carp; Carp::carp(join(", ", @nontag)." are not tags of $pkg"); } } sub heavy_require_version { my($self, $wanted) = @_; my $pkg = ref $self || $self; return ${pkg}->VERSION($wanted); } sub heavy_export_tags { _push_tags((caller)[0], "EXPORT", \@_); } sub heavy_export_ok_tags { _push_tags((caller)[0], "EXPORT_OK", \@_); } 1; EXPORTER_HEAVY $fatpacked{"File/Path.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'FILE_PATH'; package File::Path; use 5.005_04; use strict; use Cwd 'getcwd'; use File::Basename (); use File::Spec (); BEGIN { if ( $] < 5.006 ) { # can't say 'opendir my $dh, $dirname' # need to initialise $dh eval 'use Symbol'; } } use Exporter (); use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); $VERSION = '2.15'; $VERSION = eval $VERSION; @ISA = qw(Exporter); @EXPORT = qw(mkpath rmtree); @EXPORT_OK = qw(make_path remove_tree); BEGIN { for (qw(VMS MacOS MSWin32 os2)) { no strict 'refs'; *{"_IS_\U$_"} = $^O eq $_ ? sub () { 1 } : sub () { 0 }; } # These OSes complain if you want to remove a file that you have no # write permission to: *_FORCE_WRITABLE = ( grep { $^O eq $_ } qw(amigaos dos epoc MSWin32 MacOS os2) ) ? sub () { 1 } : sub () { 0 }; # Unix-like systems need to stat each directory in order to detect # race condition. MS-Windows is immune to this particular attack. *_NEED_STAT_CHECK = !(_IS_MSWIN32()) ? sub () { 1 } : sub () { 0 }; } sub _carp { require Carp; goto &Carp::carp; } sub _croak { require Carp; goto &Carp::croak; } sub _error { my $arg = shift; my $message = shift; my $object = shift; if ( $arg->{error} ) { $object = '' unless defined $object; $message .= ": $!" if $!; push @{ ${ $arg->{error} } }, { $object => $message }; } else { _carp( defined($object) ? "$message for $object: $!" : "$message: $!" ); } } sub __is_arg { my ($arg) = @_; # If client code blessed an array ref to HASH, this will not work # properly. We could have done $arg->isa() wrapped in eval, but # that would be expensive. This implementation should suffice. # We could have also used Scalar::Util:blessed, but we choose not # to add this dependency return ( ref $arg eq 'HASH' ); } sub make_path { push @_, {} unless @_ and __is_arg( $_[-1] ); goto &mkpath; } sub mkpath { my $old_style = !( @_ and __is_arg( $_[-1] ) ); my $data; my $paths; if ($old_style) { my ( $verbose, $mode ); ( $paths, $verbose, $mode ) = @_; $paths = [$paths] unless UNIVERSAL::isa( $paths, 'ARRAY' ); $data->{verbose} = $verbose; $data->{mode} = defined $mode ? $mode : oct '777'; } else { my %args_permitted = map { $_ => 1 } ( qw| chmod error group mask mode owner uid user verbose | ); my %not_on_win32_args = map { $_ => 1 } ( qw| group owner uid user | ); my @bad_args = (); my @win32_implausible_args = (); my $arg = pop @_; for my $k (sort keys %{$arg}) { if (! $args_permitted{$k}) { push @bad_args, $k; } elsif ($not_on_win32_args{$k} and _IS_MSWIN32) { push @win32_implausible_args, $k; } else { $data->{$k} = $arg->{$k}; } } _carp("Unrecognized option(s) passed to mkpath() or make_path(): @bad_args") if @bad_args; _carp("Option(s) implausible on Win32 passed to mkpath() or make_path(): @win32_implausible_args") if @win32_implausible_args; $data->{mode} = delete $data->{mask} if exists $data->{mask}; $data->{mode} = oct '777' unless exists $data->{mode}; ${ $data->{error} } = [] if exists $data->{error}; unless (@win32_implausible_args) { $data->{owner} = delete $data->{user} if exists $data->{user}; $data->{owner} = delete $data->{uid} if exists $data->{uid}; if ( exists $data->{owner} and $data->{owner} =~ /\D/ ) { my $uid = ( getpwnam $data->{owner} )[2]; if ( defined $uid ) { $data->{owner} = $uid; } else { _error( $data, "unable to map $data->{owner} to a uid, ownership not changed" ); delete $data->{owner}; } } if ( exists $data->{group} and $data->{group} =~ /\D/ ) { my $gid = ( getgrnam $data->{group} )[2]; if ( defined $gid ) { $data->{group} = $gid; } else { _error( $data, "unable to map $data->{group} to a gid, group ownership not changed" ); delete $data->{group}; } } if ( exists $data->{owner} and not exists $data->{group} ) { $data->{group} = -1; # chown will leave group unchanged } if ( exists $data->{group} and not exists $data->{owner} ) { $data->{owner} = -1; # chown will leave owner unchanged } } $paths = [@_]; } return _mkpath( $data, $paths ); } sub _mkpath { my $data = shift; my $paths = shift; my ( @created ); foreach my $path ( @{$paths} ) { next unless defined($path) and length($path); $path .= '/' if _IS_OS2 and $path =~ /^\w:\z/s; # feature of CRT # Logic wants Unix paths, so go with the flow. if (_IS_VMS) { next if $path eq '/'; $path = VMS::Filespec::unixify($path); } next if -d $path; my $parent = File::Basename::dirname($path); # Coverage note: It's not clear how we would test the condition: # '-d $parent or $path eq $parent' unless ( -d $parent or $path eq $parent ) { push( @created, _mkpath( $data, [$parent] ) ); } print "mkdir $path\n" if $data->{verbose}; if ( mkdir( $path, $data->{mode} ) ) { push( @created, $path ); if ( exists $data->{owner} ) { # NB: $data->{group} guaranteed to be set during initialisation if ( !chown $data->{owner}, $data->{group}, $path ) { _error( $data, "Cannot change ownership of $path to $data->{owner}:$data->{group}" ); } } if ( exists $data->{chmod} ) { # Coverage note: It's not clear how we would trigger the next # 'if' block. Failure of 'chmod' might first result in a # system error: "Permission denied". if ( !chmod $data->{chmod}, $path ) { _error( $data, "Cannot change permissions of $path to $data->{chmod}" ); } } } else { my $save_bang = $!; # From 'perldoc perlvar': $EXTENDED_OS_ERROR ($^E) is documented # as: # Error information specific to the current operating system. At the # moment, this differs from "$!" under only VMS, OS/2, and Win32 # (and for MacPerl). On all other platforms, $^E is always just the # same as $!. my ( $e, $e1 ) = ( $save_bang, $^E ); $e .= "; $e1" if $e ne $e1; # allow for another process to have created it meanwhile if ( ! -d $path ) { $! = $save_bang; if ( $data->{error} ) { push @{ ${ $data->{error} } }, { $path => $e }; } else { _croak("mkdir $path: $e"); } } } } return @created; } sub remove_tree { push @_, {} unless @_ and __is_arg( $_[-1] ); goto &rmtree; } sub _is_subdir { my ( $dir, $test ) = @_; my ( $dv, $dd ) = File::Spec->splitpath( $dir, 1 ); my ( $tv, $td ) = File::Spec->splitpath( $test, 1 ); # not on same volume return 0 if $dv ne $tv; my @d = File::Spec->splitdir($dd); my @t = File::Spec->splitdir($td); # @t can't be a subdir if it's shorter than @d return 0 if @t < @d; return join( '/', @d ) eq join( '/', splice @t, 0, +@d ); } sub rmtree { my $old_style = !( @_ and __is_arg( $_[-1] ) ); my ($arg, $data, $paths); if ($old_style) { my ( $verbose, $safe ); ( $paths, $verbose, $safe ) = @_; $data->{verbose} = $verbose; $data->{safe} = defined $safe ? $safe : 0; if ( defined($paths) and length($paths) ) { $paths = [$paths] unless UNIVERSAL::isa( $paths, 'ARRAY' ); } else { _carp("No root path(s) specified\n"); return 0; } } else { my %args_permitted = map { $_ => 1 } ( qw| error keep_root result safe verbose | ); my @bad_args = (); my $arg = pop @_; for my $k (sort keys %{$arg}) { if (! $args_permitted{$k}) { push @bad_args, $k; } else { $data->{$k} = $arg->{$k}; } } _carp("Unrecognized option(s) passed to remove_tree(): @bad_args") if @bad_args; ${ $data->{error} } = [] if exists $data->{error}; ${ $data->{result} } = [] if exists $data->{result}; # Wouldn't it make sense to do some validation on @_ before assigning # to $paths here? # In the $old_style case we guarantee that each path is both defined # and non-empty. We don't check that here, which means we have to # check it later in the first condition in this line: # if ( $ortho_root_length && _is_subdir( $ortho_root, $ortho_cwd ) ) { # Granted, that would be a change in behavior for the two # non-old-style interfaces. $paths = [@_]; } $data->{prefix} = ''; $data->{depth} = 0; my @clean_path; $data->{cwd} = getcwd() or do { _error( $data, "cannot fetch initial working directory" ); return 0; }; for ( $data->{cwd} ) { /\A(.*)\Z/s; $_ = $1 } # untaint for my $p (@$paths) { # need to fixup case and map \ to / on Windows my $ortho_root = _IS_MSWIN32 ? _slash_lc($p) : $p; my $ortho_cwd = _IS_MSWIN32 ? _slash_lc( $data->{cwd} ) : $data->{cwd}; my $ortho_root_length = length($ortho_root); $ortho_root_length-- if _IS_VMS; # don't compare '.' with ']' if ( $ortho_root_length && _is_subdir( $ortho_root, $ortho_cwd ) ) { local $! = 0; _error( $data, "cannot remove path when cwd is $data->{cwd}", $p ); next; } if (_IS_MACOS) { $p = ":$p" unless $p =~ /:/; $p .= ":" unless $p =~ /:\z/; } elsif ( _IS_MSWIN32 ) { $p =~ s{[/\\]\z}{}; } else { $p =~ s{/\z}{}; } push @clean_path, $p; } @{$data}{qw(device inode)} = ( lstat $data->{cwd} )[ 0, 1 ] or do { _error( $data, "cannot stat initial working directory", $data->{cwd} ); return 0; }; return _rmtree( $data, \@clean_path ); } sub _rmtree { my $data = shift; my $paths = shift; my $count = 0; my $curdir = File::Spec->curdir(); my $updir = File::Spec->updir(); my ( @files, $root ); ROOT_DIR: foreach my $root (@$paths) { # since we chdir into each directory, it may not be obvious # to figure out where we are if we generate a message about # a file name. We therefore construct a semi-canonical # filename, anchored from the directory being unlinked (as # opposed to being truly canonical, anchored from the root (/). my $canon = $data->{prefix} ? File::Spec->catfile( $data->{prefix}, $root ) : $root; my ( $ldev, $lino, $perm ) = ( lstat $root )[ 0, 1, 2 ] or next ROOT_DIR; if ( -d _ ) { $root = VMS::Filespec::vmspath( VMS::Filespec::pathify($root) ) if _IS_VMS; if ( !chdir($root) ) { # see if we can escalate privileges to get in # (e.g. funny protection mask such as -w- instead of rwx) # This uses fchmod to avoid traversing outside of the proper # location (CVE-2017-6512) my $root_fh; if (open($root_fh, '<', $root)) { my ($fh_dev, $fh_inode) = (stat $root_fh )[0,1]; $perm &= oct '7777'; my $nperm = $perm | oct '700'; local $@; if ( !( $data->{safe} or $nperm == $perm or !-d _ or $fh_dev ne $ldev or $fh_inode ne $lino or eval { chmod( $nperm, $root_fh ) } ) ) { _error( $data, "cannot make child directory read-write-exec", $canon ); next ROOT_DIR; } close $root_fh; } if ( !chdir($root) ) { _error( $data, "cannot chdir to child", $canon ); next ROOT_DIR; } } my ( $cur_dev, $cur_inode, $perm ) = ( stat $curdir )[ 0, 1, 2 ] or do { _error( $data, "cannot stat current working directory", $canon ); next ROOT_DIR; }; if (_NEED_STAT_CHECK) { ( $ldev eq $cur_dev and $lino eq $cur_inode ) or _croak( "directory $canon changed before chdir, expected dev=$ldev ino=$lino, actual dev=$cur_dev ino=$cur_inode, aborting." ); } $perm &= oct '7777'; # don't forget setuid, setgid, sticky bits my $nperm = $perm | oct '700'; # notabene: 0700 is for making readable in the first place, # it's also intended to change it to writable in case we have # to recurse in which case we are better than rm -rf for # subtrees with strange permissions if ( !( $data->{safe} or $nperm == $perm or chmod( $nperm, $curdir ) ) ) { _error( $data, "cannot make directory read+writeable", $canon ); $nperm = $perm; } my $d; $d = gensym() if $] < 5.006; if ( !opendir $d, $curdir ) { _error( $data, "cannot opendir", $canon ); @files = (); } else { if ( !defined ${^TAINT} or ${^TAINT} ) { # Blindly untaint dir names if taint mode is active @files = map { /\A(.*)\z/s; $1 } readdir $d; } else { @files = readdir $d; } closedir $d; } if (_IS_VMS) { # Deleting large numbers of files from VMS Files-11 # filesystems is faster if done in reverse ASCIIbetical order. # include '.' to '.;' from blead patch #31775 @files = map { $_ eq '.' ? '.;' : $_ } reverse @files; } @files = grep { $_ ne $updir and $_ ne $curdir } @files; if (@files) { # remove the contained files before the directory itself my $narg = {%$data}; @{$narg}{qw(device inode cwd prefix depth)} = ( $cur_dev, $cur_inode, $updir, $canon, $data->{depth} + 1 ); $count += _rmtree( $narg, \@files ); } # restore directory permissions of required now (in case the rmdir # below fails), while we are still in the directory and may do so # without a race via '.' if ( $nperm != $perm and not chmod( $perm, $curdir ) ) { _error( $data, "cannot reset chmod", $canon ); } # don't leave the client code in an unexpected directory chdir( $data->{cwd} ) or _croak("cannot chdir to $data->{cwd} from $canon: $!, aborting."); # ensure that a chdir upwards didn't take us somewhere other # than we expected (see CVE-2002-0435) ( $cur_dev, $cur_inode ) = ( stat $curdir )[ 0, 1 ] or _croak( "cannot stat prior working directory $data->{cwd}: $!, aborting." ); if (_NEED_STAT_CHECK) { ( $data->{device} eq $cur_dev and $data->{inode} eq $cur_inode ) or _croak( "previous directory $data->{cwd} " . "changed before entering $canon, " . "expected dev=$ldev ino=$lino, " . "actual dev=$cur_dev ino=$cur_inode, aborting." ); } if ( $data->{depth} or !$data->{keep_root} ) { if ( $data->{safe} && ( _IS_VMS ? !&VMS::Filespec::candelete($root) : !-w $root ) ) { print "skipped $root\n" if $data->{verbose}; next ROOT_DIR; } if ( _FORCE_WRITABLE and !chmod $perm | oct '700', $root ) { _error( $data, "cannot make directory writeable", $canon ); } print "rmdir $root\n" if $data->{verbose}; if ( rmdir $root ) { push @{ ${ $data->{result} } }, $root if $data->{result}; ++$count; } else { _error( $data, "cannot remove directory", $canon ); if ( _FORCE_WRITABLE && !chmod( $perm, ( _IS_VMS ? VMS::Filespec::fileify($root) : $root ) ) ) { _error( $data, sprintf( "cannot restore permissions to 0%o", $perm ), $canon ); } } } } else { # not a directory $root = VMS::Filespec::vmsify("./$root") if _IS_VMS && !File::Spec->file_name_is_absolute($root) && ( $root !~ m/(?]+/ ); # not already in VMS syntax if ( $data->{safe} && ( _IS_VMS ? !&VMS::Filespec::candelete($root) : !( -l $root || -w $root ) ) ) { print "skipped $root\n" if $data->{verbose}; next ROOT_DIR; } my $nperm = $perm & oct '7777' | oct '600'; if ( _FORCE_WRITABLE and $nperm != $perm and not chmod $nperm, $root ) { _error( $data, "cannot make file writeable", $canon ); } print "unlink $canon\n" if $data->{verbose}; # delete all versions under VMS for ( ; ; ) { if ( unlink $root ) { push @{ ${ $data->{result} } }, $root if $data->{result}; } else { _error( $data, "cannot unlink file", $canon ); _FORCE_WRITABLE and chmod( $perm, $root ) or _error( $data, sprintf( "cannot restore permissions to 0%o", $perm ), $canon ); last; } ++$count; last unless _IS_VMS && lstat $root; } } } return $count; } sub _slash_lc { # fix up slashes and case on MSWin32 so that we can determine that # c:\path\to\dir is underneath C:/Path/To my $path = shift; $path =~ tr{\\}{/}; return lc($path); } 1; __END__ =head1 NAME File::Path - Create or remove directory trees =head1 VERSION 2.15 - released June 07 2017. =head1 SYNOPSIS use File::Path qw(make_path remove_tree); @created = make_path('foo/bar/baz', '/zug/zwang'); @created = make_path('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711, }); make_path('foo/bar/baz', '/zug/zwang', { chmod => 0777, }); $removed_count = remove_tree('foo/bar/baz', '/zug/zwang', { verbose => 1, error => \my $err_list, safe => 1, }); # legacy (interface promoted before v2.00) @created = mkpath('/foo/bar/baz'); @created = mkpath('/foo/bar/baz', 1, 0711); @created = mkpath(['/foo/bar/baz', 'blurfl/quux'], 1, 0711); $removed_count = rmtree('foo/bar/baz', 1, 1); $removed_count = rmtree(['foo/bar/baz', 'blurfl/quux'], 1, 1); # legacy (interface promoted before v2.06) @created = mkpath('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711 }); $removed_count = rmtree('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711 }); =head1 DESCRIPTION This module provides a convenient way to create directories of arbitrary depth and to delete an entire directory subtree from the filesystem. The following functions are provided: =over =item make_path( $dir1, $dir2, .... ) =item make_path( $dir1, $dir2, ...., \%opts ) The C function creates the given directories if they don't exist before, much like the Unix command C. The function accepts a list of directories to be created. Its behaviour may be tuned by an optional hashref appearing as the last parameter on the call. The function returns the list of directories actually created during the call; in scalar context the number of directories created. The following keys are recognised in the option hash: =over =item mode => $num The numeric permissions mode to apply to each created directory (defaults to C<0777>), to be modified by the current C. If the directory already exists (and thus does not need to be created), the permissions will not be modified. C is recognised as an alias for this parameter. =item chmod => $num Takes a numeric mode to apply to each created directory (not modified by the current C). If the directory already exists (and thus does not need to be created), the permissions will not be modified. =item verbose => $bool If present, will cause C to print the name of each directory as it is created. By default nothing is printed. =item error => \$err If present, it should be a reference to a scalar. This scalar will be made to reference an array, which will be used to store any errors that are encountered. See the L section for more information. If this parameter is not used, certain error conditions may raise a fatal error that will cause the program to halt, unless trapped in an C block. =item owner => $owner =item user => $owner =item uid => $owner If present, will cause any created directory to be owned by C<$owner>. If the value is numeric, it will be interpreted as a uid; otherwise a username is assumed. An error will be issued if the username cannot be mapped to a uid, the uid does not exist or the process lacks the privileges to change ownership. Ownership of directories that already exist will not be changed. C and C are aliases of C. =item group => $group If present, will cause any created directory to be owned by the group C<$group>. If the value is numeric, it will be interpreted as a gid; otherwise a group name is assumed. An error will be issued if the group name cannot be mapped to a gid, the gid does not exist or the process lacks the privileges to change group ownership. Group ownership of directories that already exist will not be changed. make_path '/var/tmp/webcache', {owner=>'nobody', group=>'nogroup'}; =back =item mkpath( $dir ) =item mkpath( $dir, $verbose, $mode ) =item mkpath( [$dir1, $dir2,...], $verbose, $mode ) =item mkpath( $dir1, $dir2,..., \%opt ) The C function provide the legacy interface of C with a different interpretation of the arguments passed. The behaviour and return value of the function is otherwise identical to C. =item remove_tree( $dir1, $dir2, .... ) =item remove_tree( $dir1, $dir2, ...., \%opts ) The C function deletes the given directories and any files and subdirectories they might contain, much like the Unix command C or the Windows commands C and C. The function accepts a list of directories to be removed. (In point of fact, it will also accept filesystem entries which are not directories, such as regular files and symlinks. But, as its name suggests, its intent is to remove trees rather than individual files.) C's behaviour may be tuned by an optional hashref appearing as the last parameter on the call. If an empty string is passed to C, an error will occur. B For security reasons, we strongly advise use of the hashref-as-final-argument syntax -- specifically, with a setting of the C element to a true value. remove_tree( $dir1, $dir2, ...., { safe => 1, ... # other key-value pairs }, ); The function returns the number of files successfully deleted. The following keys are recognised in the option hash: =over =item verbose => $bool If present, will cause C to print the name of each file as it is unlinked. By default nothing is printed. =item safe => $bool When set to a true value, will cause C to skip the files for which the process lacks the required privileges needed to delete files, such as delete privileges on VMS. In other words, the code will make no attempt to alter file permissions. Thus, if the process is interrupted, no filesystem object will be left in a more permissive mode. =item keep_root => $bool When set to a true value, will cause all files and subdirectories to be removed, except the initially specified directories. This comes in handy when cleaning out an application's scratch directory. remove_tree( '/tmp', {keep_root => 1} ); =item result => \$res If present, it should be a reference to a scalar. This scalar will be made to reference an array, which will be used to store all files and directories unlinked during the call. If nothing is unlinked, the array will be empty. remove_tree( '/tmp', {result => \my $list} ); print "unlinked $_\n" for @$list; This is a useful alternative to the C key. =item error => \$err If present, it should be a reference to a scalar. This scalar will be made to reference an array, which will be used to store any errors that are encountered. See the L section for more information. Removing things is a much more dangerous proposition than creating things. As such, there are certain conditions that C may encounter that are so dangerous that the only sane action left is to kill the program. Use C to trap all that is reasonable (problems with permissions and the like), and let it die if things get out of hand. This is the safest course of action. =back =item rmtree( $dir ) =item rmtree( $dir, $verbose, $safe ) =item rmtree( [$dir1, $dir2,...], $verbose, $safe ) =item rmtree( $dir1, $dir2,..., \%opt ) The C function provide the legacy interface of C with a different interpretation of the arguments passed. The behaviour and return value of the function is otherwise identical to C. B For security reasons, we strongly advise use of the hashref-as-final-argument syntax, specifically with a setting of the C element to a true value. rmtree( $dir1, $dir2, ...., { safe => 1, ... # other key-value pairs }, ); =back =head2 ERROR HANDLING =over 4 =item B The following error handling mechanism is consistent throughout all code paths EXCEPT in cases where the ROOT node is nonexistent. In version 2.11 the maintainers attempted to rectify this inconsistency but too many downstream modules encountered problems. In such case, if you require root node evaluation or error checking prior to calling C or C, you should take additional precautions. =back If C or C encounters an error, a diagnostic message will be printed to C via C (for non-fatal errors) or via C (for fatal errors). If this behaviour is not desirable, the C attribute may be used to hold a reference to a variable, which will be used to store the diagnostics. The variable is made a reference to an array of hash references. Each hash contain a single key/value pair where the key is the name of the file, and the value is the error message (including the contents of C<$!> when appropriate). If a general error is encountered the diagnostic key will be empty. An example usage looks like: remove_tree( 'foo/bar', 'bar/rat', {error => \my $err} ); if ($err && @$err) { for my $diag (@$err) { my ($file, $message) = %$diag; if ($file eq '') { print "general error: $message\n"; } else { print "problem unlinking $file: $message\n"; } } } else { print "No error encountered\n"; } Note that if no errors are encountered, C<$err> will reference an empty array. This means that C<$err> will always end up TRUE; so you need to test C<@$err> to determine if errors occurred. =head2 NOTES C blindly exports C and C into the current namespace. These days, this is considered bad style, but to change it now would break too much code. Nonetheless, you are invited to specify what it is you are expecting to use: use File::Path 'rmtree'; The routines C and C are B exported by default. You must specify which ones you want to use. use File::Path 'remove_tree'; Note that a side-effect of the above is that C and C are no longer exported at all. This is due to the way the C module works. If you are migrating a codebase to use the new interface, you will have to list everything explicitly. But that's just good practice anyway. use File::Path qw(remove_tree rmtree); =head3 API CHANGES The API was changed in the 2.0 branch. For a time, C and C tried, unsuccessfully, to deal with the two different calling mechanisms. This approach was considered a failure. The new semantics are now only available with C and C. The old semantics are only available through C and C. Users are strongly encouraged to upgrade to at least 2.08 in order to avoid surprises. =head3 SECURITY CONSIDERATIONS There were race conditions in the 1.x implementations of File::Path's C function (although sometimes patched depending on the OS distribution or platform). The 2.0 version contains code to avoid the problem mentioned in CVE-2002-0435. See the following pages for more information: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=286905 http://www.nntp.perl.org/group/perl.perl5.porters/2005/01/msg97623.html http://www.debian.org/security/2005/dsa-696 Additionally, unless the C parameter is set (or the third parameter in the traditional interface is TRUE), should a C be interrupted, files that were originally in read-only mode may now have their permissions set to a read-write (or "delete OK") mode. The following CVE reports were previously filed against File-Path and are believed to have been addressed: =over 4 =item * L =item * L =back In February 2017 the cPanel Security Team reported an additional vulnerability in File-Path. The C logic to make directories traversable can be abused to set the mode on an attacker-chosen file to an attacker-chosen value. This is due to the time-of-check-to-time-of-use (TOCTTOU) race condition (L) between the C that decides the inode is a directory and the C that tries to make it user-rwx. CPAN versions 2.13 and later incorporate a patch provided by John Lightsey to address this problem. This vulnerability has been reported as CVE-2017-6512. =head1 DIAGNOSTICS FATAL errors will cause the program to halt (C), since the problem is so severe that it would be dangerous to continue. (This can always be trapped with C, but it's not a good idea. Under the circumstances, dying is the best thing to do). SEVERE errors may be trapped using the modern interface. If the they are not trapped, or if the old interface is used, such an error will cause the program will halt. All other errors may be trapped using the modern interface, otherwise they will be Ced about. Program execution will not be halted. =over 4 =item mkdir [path]: [errmsg] (SEVERE) C was unable to create the path. Probably some sort of permissions error at the point of departure or insufficient resources (such as free inodes on Unix). =item No root path(s) specified C was not given any paths to create. This message is only emitted if the routine is called with the traditional interface. The modern interface will remain silent if given nothing to do. =item No such file or directory On Windows, if C gives you this warning, it may mean that you have exceeded your filesystem's maximum path length. =item cannot fetch initial working directory: [errmsg] C attempted to determine the initial directory by calling C, but the call failed for some reason. No attempt will be made to delete anything. =item cannot stat initial working directory: [errmsg] C attempted to stat the initial directory (after having successfully obtained its name via C), however, the call failed for some reason. No attempt will be made to delete anything. =item cannot chdir to [dir]: [errmsg] C attempted to set the working directory in order to begin deleting the objects therein, but was unsuccessful. This is usually a permissions issue. The routine will continue to delete other things, but this directory will be left intact. =item directory [dir] changed before chdir, expected dev=[n] ino=[n], actual dev=[n] ino=[n], aborting. (FATAL) C recorded the device and inode of a directory, and then moved into it. It then performed a C on the current directory and detected that the device and inode were no longer the same. As this is at the heart of the race condition problem, the program will die at this point. =item cannot make directory [dir] read+writeable: [errmsg] C attempted to change the permissions on the current directory to ensure that subsequent unlinkings would not run into problems, but was unable to do so. The permissions remain as they were, and the program will carry on, doing the best it can. =item cannot read [dir]: [errmsg] C tried to read the contents of the directory in order to acquire the names of the directory entries to be unlinked, but was unsuccessful. This is usually a permissions issue. The program will continue, but the files in this directory will remain after the call. =item cannot reset chmod [dir]: [errmsg] C, after having deleted everything in a directory, attempted to restore its permissions to the original state but failed. The directory may wind up being left behind. =item cannot remove [dir] when cwd is [dir] The current working directory of the program is F and you are attempting to remove an ancestor, such as F. The directory tree is left untouched. The solution is to C out of the child directory to a place outside the directory tree to be removed. =item cannot chdir to [parent-dir] from [child-dir]: [errmsg], aborting. (FATAL) C, after having deleted everything and restored the permissions of a directory, was unable to chdir back to the parent. The program halts to avoid a race condition from occurring. =item cannot stat prior working directory [dir]: [errmsg], aborting. (FATAL) C was unable to stat the parent directory after having returned from the child. Since there is no way of knowing if we returned to where we think we should be (by comparing device and inode) the only way out is to C. =item previous directory [parent-dir] changed before entering [child-dir], expected dev=[n] ino=[n], actual dev=[n] ino=[n], aborting. (FATAL) When C returned from deleting files in a child directory, a check revealed that the parent directory it returned to wasn't the one it started out from. This is considered a sign of malicious activity. =item cannot make directory [dir] writeable: [errmsg] Just before removing a directory (after having successfully removed everything it contained), C attempted to set the permissions on the directory to ensure it could be removed and failed. Program execution continues, but the directory may possibly not be deleted. =item cannot remove directory [dir]: [errmsg] C attempted to remove a directory, but failed. This may be because some objects that were unable to be removed remain in the directory, or it could be a permissions issue. The directory will be left behind. =item cannot restore permissions of [dir] to [0nnn]: [errmsg] After having failed to remove a directory, C was unable to restore its permissions from a permissive state back to a possibly more restrictive setting. (Permissions given in octal). =item cannot make file [file] writeable: [errmsg] C attempted to force the permissions of a file to ensure it could be deleted, but failed to do so. It will, however, still attempt to unlink the file. =item cannot unlink file [file]: [errmsg] C failed to remove a file. Probably a permissions issue. =item cannot restore permissions of [file] to [0nnn]: [errmsg] After having failed to remove a file, C was also unable to restore the permissions on the file to a possibly less permissive setting. (Permissions given in octal). =item unable to map [owner] to a uid, ownership not changed"); C was instructed to give the ownership of created directories to the symbolic name [owner], but C did not return the corresponding numeric uid. The directory will be created, but ownership will not be changed. =item unable to map [group] to a gid, group ownership not changed C was instructed to give the group ownership of created directories to the symbolic name [group], but C did not return the corresponding numeric gid. The directory will be created, but group ownership will not be changed. =back =head1 SEE ALSO =over 4 =item * L Allows files and directories to be moved to the Trashcan/Recycle Bin (where they may later be restored if necessary) if the operating system supports such functionality. This feature may one day be made available directly in C. =item * L When removing directory trees, if you want to examine each file to decide whether to delete it (and possibly leaving large swathes alone), F offers a convenient and flexible approach to examining directory trees. =back =head1 BUGS AND LIMITATIONS The following describes F limitations and how to report bugs. =head2 MULTITHREADED APPLICATIONS F C and C will not work with multithreaded applications due to its use of C. At this time, no warning or error is generated in this situation. You will certainly encounter unexpected results. The implementation that surfaces this limitation will not be changed. See the F module for functionality similar to F but which does not C. =head2 NFS Mount Points F is not responsible for triggering the automounts, mirror mounts, and the contents of network mounted filesystems. If your NFS implementation requires an action to be performed on the filesystem in order for F to perform operations, it is strongly suggested you assure filesystem availability by reading the root of the mounted filesystem. =head2 REPORTING BUGS Please report all bugs on the RT queue, either via the web interface: L or by email: bug-File-Path@rt.cpan.org In either case, please B patches to the bug report rather than including them inline in the web post or the body of the email. You can also send pull requests to the Github repository: L =head1 ACKNOWLEDGEMENTS Paul Szabo identified the race condition originally, and Brendan O'Dea wrote an implementation for Debian that addressed the problem. That code was used as a basis for the current code. Their efforts are greatly appreciated. Gisle Aas made a number of improvements to the documentation for 2.07 and his advice and assistance is also greatly appreciated. =head1 AUTHORS Prior authors and maintainers: Tim Bunce, Charles Bailey, and David Landgren >. Current maintainers are Richard Elberger > and James (Jim) Keenan >. =head1 CONTRIBUTORS Contributors to File::Path, in alphabetical order by first name. =over 1 =item > =item Charlie Gonzalez > =item Craig A. Berry > =item James E Keenan > =item John Lightsey > =item Nigel Horne > =item Richard Elberger > =item Ryan Yee > =item Skye Shaw > =item Tom Lutz > =item Will Sheppard > =back =head1 COPYRIGHT This module is copyright (C) Charles Bailey, Tim Bunce, David Landgren, James Keenan and Richard Elberger 1995-2017. All rights reserved. =head1 LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut FILE_PATH $fatpacked{"File/Which.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'FILE_WHICH'; package File::Which; use strict; use warnings; use Exporter (); use File::Spec (); # ABSTRACT: Perl implementation of the which utility as an API our $VERSION = '1.22'; # VERSION our @ISA = 'Exporter'; our @EXPORT = 'which'; our @EXPORT_OK = 'where'; use constant IS_VMS => ($^O eq 'VMS'); use constant IS_MAC => ($^O eq 'MacOS'); use constant IS_DOS => ($^O eq 'MSWin32' or $^O eq 'dos' or $^O eq 'os2'); use constant IS_CYG => ($^O eq 'cygwin' || $^O eq 'msys'); # For Win32 systems, stores the extensions used for # executable files # For others, the empty string is used # because 'perl' . '' eq 'perl' => easier my @PATHEXT = (''); if ( IS_DOS ) { # WinNT. PATHEXT might be set on Cygwin, but not used. if ( $ENV{PATHEXT} ) { push @PATHEXT, split ';', $ENV{PATHEXT}; } else { # Win9X or other: doesn't have PATHEXT, so needs hardcoded. push @PATHEXT, qw{.com .exe .bat}; } } elsif ( IS_VMS ) { push @PATHEXT, qw{.exe .com}; } elsif ( IS_CYG ) { # See this for more info # http://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-exe push @PATHEXT, qw{.exe .com}; } sub which { my ($exec) = @_; return undef unless defined $exec; return undef if $exec eq ''; my $all = wantarray; my @results = (); # check for aliases first if ( IS_VMS ) { my $symbol = `SHOW SYMBOL $exec`; chomp($symbol); unless ( $? ) { return $symbol unless $all; push @results, $symbol; } } if ( IS_MAC ) { my @aliases = split /\,/, $ENV{Aliases}; foreach my $alias ( @aliases ) { # This has not been tested!! # PPT which says MPW-Perl cannot resolve `Alias $alias`, # let's just hope it's fixed if ( lc($alias) eq lc($exec) ) { chomp(my $file = `Alias $alias`); last unless $file; # if it failed, just go on the normal way return $file unless $all; push @results, $file; # we can stop this loop as if it finds more aliases matching, # it'll just be the same result anyway last; } } } return $exec if !IS_VMS and !IS_MAC and !IS_DOS and $exec =~ /\// and -f $exec and -x $exec; my @path = File::Spec->path; if ( IS_DOS or IS_VMS or IS_MAC ) { unshift @path, File::Spec->curdir; } foreach my $base ( map { File::Spec->catfile($_, $exec) } @path ) { for my $ext ( @PATHEXT ) { my $file = $base.$ext; # We don't want dirs (as they are -x) next if -d $file; if ( # Executable, normal case -x _ or ( # MacOS doesn't mark as executable so we check -e IS_MAC || ( ( IS_DOS or IS_CYG ) and grep { $file =~ /$_\z/i } @PATHEXT[1..$#PATHEXT] ) # DOSish systems don't pass -x on # non-exe/bat/com files. so we check -e. # However, we don't want to pass -e on files # that aren't in PATHEXT, like README. and -e _ ) ) { return $file unless $all; push @results, $file; } } } if ( $all ) { return @results; } else { return undef; } } sub where { # force wantarray my @res = which($_[0]); return @res; } 1; __END__ =pod =encoding UTF-8 =head1 NAME File::Which - Perl implementation of the which utility as an API =head1 VERSION version 1.22 =head1 SYNOPSIS use File::Which; # exports which() use File::Which qw(which where); # exports which() and where() my $exe_path = which 'perldoc'; my @paths = where 'perl'; # Or my @paths = which 'perl'; # an array forces search for all of them =head1 DESCRIPTION L finds the full or relative paths to executable programs on the system. This is normally the function of C utility. C is typically implemented as either a program or a built in shell command. On some platforms, such as Microsoft Windows it is not provided as part of the core operating system. This module provides a consistent API to this functionality regardless of the underlying platform. The focus of this module is correctness and portability. As a consequence platforms where the current directory is implicitly part of the search path such as Microsoft Windows will find executables in the current directory, whereas on platforms such as UNIX where this is not the case executables in the current directory will only be found if the current directory is explicitly added to the path. If you need a portable C on the command line in an environment that does not provide it, install L which provides a command line interface to this API. =head2 Implementations L searches the directories of the user's C (the current implementation uses L to determine the correct C), looking for executable files having the name specified as a parameter to L. Under Win32 systems, which do not have a notion of directly executable files, but uses special extensions such as C<.exe> and C<.bat> to identify them, C takes extra steps to assure that you will find the correct file (so for example, you might be searching for C, it'll try F, F, etc.) =head3 Linux, *BSD and other UNIXes There should not be any surprises here. The current directory will not be searched unless it is explicitly added to the path. =head3 Modern Windows (including NT, XP, Vista, 7, 8, 10 etc) Windows NT has a special environment variable called C, which is used by the shell to look for executable files. Usually, it will contain a list in the form C<.EXE;.BAT;.COM;.JS;.VBS> etc. If C finds such an environment variable, it parses the list and uses it as the different extensions. =head3 Cygwin Cygwin provides a Unix-like environment for Microsoft Windows users. In most ways it works like other Unix and Unix-like environments, but in a few key aspects it works like Windows. As with other Unix environments, the current directory is not included in the search unless it is explicitly included in the search path. Like on Windows, files with C<.EXE> or <.BAT> extensions will be discovered even if they are not part of the query. C<.COM> or extensions specified using the C environment variable will NOT be discovered without the fully qualified name, however. =head3 Windows 95, 98, ME, MS-DOS, OS/2 This set of operating systems don't have the C variable, and usually you will find executable files there with the extensions C<.exe>, C<.bat> and (less likely) C<.com>. C uses this hardcoded list if it's running under Win32 but does not find a C variable. As of 2015 none of these platforms are tested frequently (or perhaps ever), but the current maintainer is determined not to intentionally remove support for older operating systems. =head3 VMS Same case as Windows 9x: uses C<.exe> and C<.com> (in that order). As of 2015 the current maintainer does not test on VMS, and is in fact not certain it has ever been tested on VMS. If this platform is important to you and you can help me verify and or support it on that platform please contact me. =head1 FUNCTIONS =head2 which my $path = which $short_exe_name; my @paths = which $short_exe_name; Exported by default. C<$short_exe_name> is the name used in the shell to call the program (for example, C). If it finds an executable with the name you specified, C will return the absolute path leading to this executable (for example, F or F). If it does I find the executable, it returns C. If C is called in list context, it will return I the matches. =head2 where my @paths = where $short_exe_name; Not exported by default. Same as L in array context. Same as the C utility, will return an array containing all the path names matching C<$short_exe_name>. =head1 CAVEATS This module has no non-core requirements for Perl 5.6.2 and better. This module is fully supported back to Perl 5.8.1. It may work on 5.8.0. It should work on Perl 5.6.x and I may even test on 5.6.2. I will accept patches to maintain compatibility for such older Perls, but you may need to fix it on 5.6.x / 5.8.0 and send me a patch. Not tested on VMS although there is platform specific code for those. Anyone who haves a second would be very kind to send me a report of how it went. =head1 SUPPORT Bugs should be reported via the GitHub issue tracker L For other issues, contact the maintainer. =head1 SEE ALSO =over 4 =item L, L Command line interface to this module. =item L Comes with a C function with slightly different semantics that the traditional UNIX where. It will find executables in the current directory, even though the current directory is not searched for by default on Unix. =item L This module purports to "check that a command is available", but does not provide any documentation on how you might use it. =back =head1 AUTHORS =over 4 =item * Per Einar Ellefsen =item * Adam Kennedy =item * Graham Ollis =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2002 by Per Einar Ellefsen . This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut FILE_WHICH $fatpacked{"Getopt/Long.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'GETOPT_LONG'; #! perl # Getopt::Long.pm -- Universal options parsing # Author : Johan Vromans # Created On : Tue Sep 11 15:00:12 1990 # Last Modified By: Johan Vromans # Last Modified On: Sat May 27 12:11:39 2017 # Update Count : 1715 # Status : Released ################ Module Preamble ################ use 5.004; use strict; use warnings; package Getopt::Long; use vars qw($VERSION); $VERSION = 2.50; # For testing versions only. use vars qw($VERSION_STRING); $VERSION_STRING = "2.50"; use Exporter; use vars qw(@ISA @EXPORT @EXPORT_OK); @ISA = qw(Exporter); # Exported subroutines. sub GetOptions(@); # always sub GetOptionsFromArray(@); # on demand sub GetOptionsFromString(@); # on demand sub Configure(@); # on demand sub HelpMessage(@); # on demand sub VersionMessage(@); # in demand BEGIN { # Init immediately so their contents can be used in the 'use vars' below. @EXPORT = qw(&GetOptions $REQUIRE_ORDER $PERMUTE $RETURN_IN_ORDER); @EXPORT_OK = qw(&HelpMessage &VersionMessage &Configure &GetOptionsFromArray &GetOptionsFromString); } # User visible variables. use vars @EXPORT, @EXPORT_OK; use vars qw($error $debug $major_version $minor_version); # Deprecated visible variables. use vars qw($autoabbrev $getopt_compat $ignorecase $bundling $order $passthrough); # Official invisible variables. use vars qw($genprefix $caller $gnu_compat $auto_help $auto_version $longprefix); # Really invisible variables. my $bundling_values; # Public subroutines. sub config(@); # deprecated name # Private subroutines. sub ConfigDefaults(); sub ParseOptionSpec($$); sub OptCtl($); sub FindOption($$$$$); sub ValidValue ($$$$$); ################ Local Variables ################ # $requested_version holds the version that was mentioned in the 'use' # or 'require', if any. It can be used to enable or disable specific # features. my $requested_version = 0; ################ Resident subroutines ################ sub ConfigDefaults() { # Handle POSIX compliancy. if ( defined $ENV{"POSIXLY_CORRECT"} ) { $genprefix = "(--|-)"; $autoabbrev = 0; # no automatic abbrev of options $bundling = 0; # no bundling of single letter switches $getopt_compat = 0; # disallow '+' to start options $order = $REQUIRE_ORDER; } else { $genprefix = "(--|-|\\+)"; $autoabbrev = 1; # automatic abbrev of options $bundling = 0; # bundling off by default $getopt_compat = 1; # allow '+' to start options $order = $PERMUTE; } # Other configurable settings. $debug = 0; # for debugging $error = 0; # error tally $ignorecase = 1; # ignore case when matching options $passthrough = 0; # leave unrecognized options alone $gnu_compat = 0; # require --opt=val if value is optional $longprefix = "(--)"; # what does a long prefix look like $bundling_values = 0; # no bundling of values } # Override import. sub import { my $pkg = shift; # package my @syms = (); # symbols to import my @config = (); # configuration my $dest = \@syms; # symbols first for ( @_ ) { if ( $_ eq ':config' ) { $dest = \@config; # config next next; } push(@$dest, $_); # push } # Hide one level and call super. local $Exporter::ExportLevel = 1; push(@syms, qw(&GetOptions)) if @syms; # always export GetOptions $requested_version = 0; $pkg->SUPER::import(@syms); # And configure. Configure(@config) if @config; } ################ Initialization ################ # Values for $order. See GNU getopt.c for details. ($REQUIRE_ORDER, $PERMUTE, $RETURN_IN_ORDER) = (0..2); # Version major/minor numbers. ($major_version, $minor_version) = $VERSION =~ /^(\d+)\.(\d+)/; ConfigDefaults(); ################ OO Interface ################ package Getopt::Long::Parser; # Store a copy of the default configuration. Since ConfigDefaults has # just been called, what we get from Configure is the default. my $default_config = do { Getopt::Long::Configure () }; sub new { my $that = shift; my $class = ref($that) || $that; my %atts = @_; # Register the callers package. my $self = { caller_pkg => (caller)[0] }; bless ($self, $class); # Process config attributes. if ( defined $atts{config} ) { my $save = Getopt::Long::Configure ($default_config, @{$atts{config}}); $self->{settings} = Getopt::Long::Configure ($save); delete ($atts{config}); } # Else use default config. else { $self->{settings} = $default_config; } if ( %atts ) { # Oops die(__PACKAGE__.": unhandled attributes: ". join(" ", sort(keys(%atts)))."\n"); } $self; } sub configure { my ($self) = shift; # Restore settings, merge new settings in. my $save = Getopt::Long::Configure ($self->{settings}, @_); # Restore orig config and save the new config. $self->{settings} = Getopt::Long::Configure ($save); } sub getoptions { my ($self) = shift; return $self->getoptionsfromarray(\@ARGV, @_); } sub getoptionsfromarray { my ($self) = shift; # Restore config settings. my $save = Getopt::Long::Configure ($self->{settings}); # Call main routine. my $ret = 0; $Getopt::Long::caller = $self->{caller_pkg}; eval { # Locally set exception handler to default, otherwise it will # be called implicitly here, and again explicitly when we try # to deliver the messages. local ($SIG{__DIE__}) = 'DEFAULT'; $ret = Getopt::Long::GetOptionsFromArray (@_); }; # Restore saved settings. Getopt::Long::Configure ($save); # Handle errors and return value. die ($@) if $@; return $ret; } package Getopt::Long; ################ Back to Normal ################ # Indices in option control info. # Note that ParseOptions uses the fields directly. Search for 'hard-wired'. use constant CTL_TYPE => 0; #use constant CTL_TYPE_FLAG => ''; #use constant CTL_TYPE_NEG => '!'; #use constant CTL_TYPE_INCR => '+'; #use constant CTL_TYPE_INT => 'i'; #use constant CTL_TYPE_INTINC => 'I'; #use constant CTL_TYPE_XINT => 'o'; #use constant CTL_TYPE_FLOAT => 'f'; #use constant CTL_TYPE_STRING => 's'; use constant CTL_CNAME => 1; use constant CTL_DEFAULT => 2; use constant CTL_DEST => 3; use constant CTL_DEST_SCALAR => 0; use constant CTL_DEST_ARRAY => 1; use constant CTL_DEST_HASH => 2; use constant CTL_DEST_CODE => 3; use constant CTL_AMIN => 4; use constant CTL_AMAX => 5; # FFU. #use constant CTL_RANGE => ; #use constant CTL_REPEAT => ; # Rather liberal patterns to match numbers. use constant PAT_INT => "[-+]?_*[0-9][0-9_]*"; use constant PAT_XINT => "(?:". "[-+]?_*[1-9][0-9_]*". "|". "0x_*[0-9a-f][0-9a-f_]*". "|". "0b_*[01][01_]*". "|". "0[0-7_]*". ")"; use constant PAT_FLOAT => "[-+]?". # optional sign "(?=[0-9.])". # must start with digit or dec.point "[0-9_]*". # digits before the dec.point "(\.[0-9_]+)?". # optional fraction "([eE][-+]?[0-9_]+)?"; # optional exponent sub GetOptions(@) { # Shift in default array. unshift(@_, \@ARGV); # Try to keep caller() and Carp consistent. goto &GetOptionsFromArray; } sub GetOptionsFromString(@) { my ($string) = shift; require Text::ParseWords; my $args = [ Text::ParseWords::shellwords($string) ]; $caller ||= (caller)[0]; # current context my $ret = GetOptionsFromArray($args, @_); return ( $ret, $args ) if wantarray; if ( @$args ) { $ret = 0; warn("GetOptionsFromString: Excess data \"@$args\" in string \"$string\"\n"); } $ret; } sub GetOptionsFromArray(@) { my ($argv, @optionlist) = @_; # local copy of the option descriptions my $argend = '--'; # option list terminator my %opctl = (); # table of option specs my $pkg = $caller || (caller)[0]; # current context # Needed if linkage is omitted. my @ret = (); # accum for non-options my %linkage; # linkage my $userlinkage; # user supplied HASH my $opt; # current option my $prefix = $genprefix; # current prefix $error = ''; if ( $debug ) { # Avoid some warnings if debugging. local ($^W) = 0; print STDERR ("Getopt::Long $Getopt::Long::VERSION ", "called from package \"$pkg\".", "\n ", "argv: ", defined($argv) ? UNIVERSAL::isa( $argv, 'ARRAY' ) ? "(@$argv)" : $argv : "", "\n ", "autoabbrev=$autoabbrev,". "bundling=$bundling,", "bundling_values=$bundling_values,", "getopt_compat=$getopt_compat,", "gnu_compat=$gnu_compat,", "order=$order,", "\n ", "ignorecase=$ignorecase,", "requested_version=$requested_version,", "passthrough=$passthrough,", "genprefix=\"$genprefix\",", "longprefix=\"$longprefix\".", "\n"); } # Check for ref HASH as first argument. # First argument may be an object. It's OK to use this as long # as it is really a hash underneath. $userlinkage = undef; if ( @optionlist && ref($optionlist[0]) and UNIVERSAL::isa($optionlist[0],'HASH') ) { $userlinkage = shift (@optionlist); print STDERR ("=> user linkage: $userlinkage\n") if $debug; } # See if the first element of the optionlist contains option # starter characters. # Be careful not to interpret '<>' as option starters. if ( @optionlist && $optionlist[0] =~ /^\W+$/ && !($optionlist[0] eq '<>' && @optionlist > 0 && ref($optionlist[1])) ) { $prefix = shift (@optionlist); # Turn into regexp. Needs to be parenthesized! $prefix =~ s/(\W)/\\$1/g; $prefix = "([" . $prefix . "])"; print STDERR ("=> prefix=\"$prefix\"\n") if $debug; } # Verify correctness of optionlist. %opctl = (); while ( @optionlist ) { my $opt = shift (@optionlist); unless ( defined($opt) ) { $error .= "Undefined argument in option spec\n"; next; } # Strip leading prefix so people can specify "--foo=i" if they like. $opt = $+ if $opt =~ /^$prefix+(.*)$/s; if ( $opt eq '<>' ) { if ( (defined $userlinkage) && !(@optionlist > 0 && ref($optionlist[0])) && (exists $userlinkage->{$opt}) && ref($userlinkage->{$opt}) ) { unshift (@optionlist, $userlinkage->{$opt}); } unless ( @optionlist > 0 && ref($optionlist[0]) && ref($optionlist[0]) eq 'CODE' ) { $error .= "Option spec <> requires a reference to a subroutine\n"; # Kill the linkage (to avoid another error). shift (@optionlist) if @optionlist && ref($optionlist[0]); next; } $linkage{'<>'} = shift (@optionlist); next; } # Parse option spec. my ($name, $orig) = ParseOptionSpec ($opt, \%opctl); unless ( defined $name ) { # Failed. $orig contains the error message. Sorry for the abuse. $error .= $orig; # Kill the linkage (to avoid another error). shift (@optionlist) if @optionlist && ref($optionlist[0]); next; } # If no linkage is supplied in the @optionlist, copy it from # the userlinkage if available. if ( defined $userlinkage ) { unless ( @optionlist > 0 && ref($optionlist[0]) ) { if ( exists $userlinkage->{$orig} && ref($userlinkage->{$orig}) ) { print STDERR ("=> found userlinkage for \"$orig\": ", "$userlinkage->{$orig}\n") if $debug; unshift (@optionlist, $userlinkage->{$orig}); } else { # Do nothing. Being undefined will be handled later. next; } } } # Copy the linkage. If omitted, link to global variable. if ( @optionlist > 0 && ref($optionlist[0]) ) { print STDERR ("=> link \"$orig\" to $optionlist[0]\n") if $debug; my $rl = ref($linkage{$orig} = shift (@optionlist)); if ( $rl eq "ARRAY" ) { $opctl{$name}[CTL_DEST] = CTL_DEST_ARRAY; } elsif ( $rl eq "HASH" ) { $opctl{$name}[CTL_DEST] = CTL_DEST_HASH; } elsif ( $rl eq "SCALAR" || $rl eq "REF" ) { # if ( $opctl{$name}[CTL_DEST] == CTL_DEST_ARRAY ) { # my $t = $linkage{$orig}; # $$t = $linkage{$orig} = []; # } # elsif ( $opctl{$name}[CTL_DEST] == CTL_DEST_HASH ) { # } # else { # Ok. # } } elsif ( $rl eq "CODE" ) { # Ok. } else { $error .= "Invalid option linkage for \"$opt\"\n"; } } else { # Link to global $opt_XXX variable. # Make sure a valid perl identifier results. my $ov = $orig; $ov =~ s/\W/_/g; if ( $opctl{$name}[CTL_DEST] == CTL_DEST_ARRAY ) { print STDERR ("=> link \"$orig\" to \@$pkg","::opt_$ov\n") if $debug; eval ("\$linkage{\$orig} = \\\@".$pkg."::opt_$ov;"); } elsif ( $opctl{$name}[CTL_DEST] == CTL_DEST_HASH ) { print STDERR ("=> link \"$orig\" to \%$pkg","::opt_$ov\n") if $debug; eval ("\$linkage{\$orig} = \\\%".$pkg."::opt_$ov;"); } else { print STDERR ("=> link \"$orig\" to \$$pkg","::opt_$ov\n") if $debug; eval ("\$linkage{\$orig} = \\\$".$pkg."::opt_$ov;"); } } if ( $opctl{$name}[CTL_TYPE] eq 'I' && ( $opctl{$name}[CTL_DEST] == CTL_DEST_ARRAY || $opctl{$name}[CTL_DEST] == CTL_DEST_HASH ) ) { $error .= "Invalid option linkage for \"$opt\"\n"; } } $error .= "GetOptionsFromArray: 1st parameter is not an array reference\n" unless $argv && UNIVERSAL::isa( $argv, 'ARRAY' ); # Bail out if errors found. die ($error) if $error; $error = 0; # Supply --version and --help support, if needed and allowed. if ( defined($auto_version) ? $auto_version : ($requested_version >= 2.3203) ) { if ( !defined($opctl{version}) ) { $opctl{version} = ['','version',0,CTL_DEST_CODE,undef]; $linkage{version} = \&VersionMessage; } $auto_version = 1; } if ( defined($auto_help) ? $auto_help : ($requested_version >= 2.3203) ) { if ( !defined($opctl{help}) && !defined($opctl{'?'}) ) { $opctl{help} = $opctl{'?'} = ['','help',0,CTL_DEST_CODE,undef]; $linkage{help} = \&HelpMessage; } $auto_help = 1; } # Show the options tables if debugging. if ( $debug ) { my ($arrow, $k, $v); $arrow = "=> "; while ( ($k,$v) = each(%opctl) ) { print STDERR ($arrow, "\$opctl{$k} = $v ", OptCtl($v), "\n"); $arrow = " "; } } # Process argument list my $goon = 1; while ( $goon && @$argv > 0 ) { # Get next argument. $opt = shift (@$argv); print STDERR ("=> arg \"", $opt, "\"\n") if $debug; # Double dash is option list terminator. if ( defined($opt) && $opt eq $argend ) { push (@ret, $argend) if $passthrough; last; } # Look it up. my $tryopt = $opt; my $found; # success status my $key; # key (if hash type) my $arg; # option argument my $ctl; # the opctl entry ($found, $opt, $ctl, $arg, $key) = FindOption ($argv, $prefix, $argend, $opt, \%opctl); if ( $found ) { # FindOption undefines $opt in case of errors. next unless defined $opt; my $argcnt = 0; while ( defined $arg ) { # Get the canonical name. print STDERR ("=> cname for \"$opt\" is ") if $debug; $opt = $ctl->[CTL_CNAME]; print STDERR ("\"$ctl->[CTL_CNAME]\"\n") if $debug; if ( defined $linkage{$opt} ) { print STDERR ("=> ref(\$L{$opt}) -> ", ref($linkage{$opt}), "\n") if $debug; if ( ref($linkage{$opt}) eq 'SCALAR' || ref($linkage{$opt}) eq 'REF' ) { if ( $ctl->[CTL_TYPE] eq '+' ) { print STDERR ("=> \$\$L{$opt} += \"$arg\"\n") if $debug; if ( defined ${$linkage{$opt}} ) { ${$linkage{$opt}} += $arg; } else { ${$linkage{$opt}} = $arg; } } elsif ( $ctl->[CTL_DEST] == CTL_DEST_ARRAY ) { print STDERR ("=> ref(\$L{$opt}) auto-vivified", " to ARRAY\n") if $debug; my $t = $linkage{$opt}; $$t = $linkage{$opt} = []; print STDERR ("=> push(\@{\$L{$opt}, \"$arg\")\n") if $debug; push (@{$linkage{$opt}}, $arg); } elsif ( $ctl->[CTL_DEST] == CTL_DEST_HASH ) { print STDERR ("=> ref(\$L{$opt}) auto-vivified", " to HASH\n") if $debug; my $t = $linkage{$opt}; $$t = $linkage{$opt} = {}; print STDERR ("=> \$\$L{$opt}->{$key} = \"$arg\"\n") if $debug; $linkage{$opt}->{$key} = $arg; } else { print STDERR ("=> \$\$L{$opt} = \"$arg\"\n") if $debug; ${$linkage{$opt}} = $arg; } } elsif ( ref($linkage{$opt}) eq 'ARRAY' ) { print STDERR ("=> push(\@{\$L{$opt}, \"$arg\")\n") if $debug; push (@{$linkage{$opt}}, $arg); } elsif ( ref($linkage{$opt}) eq 'HASH' ) { print STDERR ("=> \$\$L{$opt}->{$key} = \"$arg\"\n") if $debug; $linkage{$opt}->{$key} = $arg; } elsif ( ref($linkage{$opt}) eq 'CODE' ) { print STDERR ("=> &L{$opt}(\"$opt\"", $ctl->[CTL_DEST] == CTL_DEST_HASH ? ", \"$key\"" : "", ", \"$arg\")\n") if $debug; my $eval_error = do { local $@; local $SIG{__DIE__} = 'DEFAULT'; eval { &{$linkage{$opt}} (Getopt::Long::CallBack->new (name => $opt, ctl => $ctl, opctl => \%opctl, linkage => \%linkage, prefix => $prefix, ), $ctl->[CTL_DEST] == CTL_DEST_HASH ? ($key) : (), $arg); }; $@; }; print STDERR ("=> die($eval_error)\n") if $debug && $eval_error ne ''; if ( $eval_error =~ /^!/ ) { if ( $eval_error =~ /^!FINISH\b/ ) { $goon = 0; } } elsif ( $eval_error ne '' ) { warn ($eval_error); $error++; } } else { print STDERR ("Invalid REF type \"", ref($linkage{$opt}), "\" in linkage\n"); die("Getopt::Long -- internal error!\n"); } } # No entry in linkage means entry in userlinkage. elsif ( $ctl->[CTL_DEST] == CTL_DEST_ARRAY ) { if ( defined $userlinkage->{$opt} ) { print STDERR ("=> push(\@{\$L{$opt}}, \"$arg\")\n") if $debug; push (@{$userlinkage->{$opt}}, $arg); } else { print STDERR ("=>\$L{$opt} = [\"$arg\"]\n") if $debug; $userlinkage->{$opt} = [$arg]; } } elsif ( $ctl->[CTL_DEST] == CTL_DEST_HASH ) { if ( defined $userlinkage->{$opt} ) { print STDERR ("=> \$L{$opt}->{$key} = \"$arg\"\n") if $debug; $userlinkage->{$opt}->{$key} = $arg; } else { print STDERR ("=>\$L{$opt} = {$key => \"$arg\"}\n") if $debug; $userlinkage->{$opt} = {$key => $arg}; } } else { if ( $ctl->[CTL_TYPE] eq '+' ) { print STDERR ("=> \$L{$opt} += \"$arg\"\n") if $debug; if ( defined $userlinkage->{$opt} ) { $userlinkage->{$opt} += $arg; } else { $userlinkage->{$opt} = $arg; } } else { print STDERR ("=>\$L{$opt} = \"$arg\"\n") if $debug; $userlinkage->{$opt} = $arg; } } $argcnt++; last if $argcnt >= $ctl->[CTL_AMAX] && $ctl->[CTL_AMAX] != -1; undef($arg); # Need more args? if ( $argcnt < $ctl->[CTL_AMIN] ) { if ( @$argv ) { if ( ValidValue($ctl, $argv->[0], 1, $argend, $prefix) ) { $arg = shift(@$argv); if ( $ctl->[CTL_TYPE] =~ /^[iIo]$/ ) { $arg =~ tr/_//d; $arg = $ctl->[CTL_TYPE] eq 'o' && $arg =~ /^0/ ? oct($arg) : 0+$arg } ($key,$arg) = $arg =~ /^([^=]+)=(.*)/ if $ctl->[CTL_DEST] == CTL_DEST_HASH; next; } warn("Value \"$$argv[0]\" invalid for option $opt\n"); $error++; } else { warn("Insufficient arguments for option $opt\n"); $error++; } } # Any more args? if ( @$argv && ValidValue($ctl, $argv->[0], 0, $argend, $prefix) ) { $arg = shift(@$argv); if ( $ctl->[CTL_TYPE] =~ /^[iIo]$/ ) { $arg =~ tr/_//d; $arg = $ctl->[CTL_TYPE] eq 'o' && $arg =~ /^0/ ? oct($arg) : 0+$arg } ($key,$arg) = $arg =~ /^([^=]+)=(.*)/ if $ctl->[CTL_DEST] == CTL_DEST_HASH; next; } } } # Not an option. Save it if we $PERMUTE and don't have a <>. elsif ( $order == $PERMUTE ) { # Try non-options call-back. my $cb; if ( defined ($cb = $linkage{'<>'}) ) { print STDERR ("=> &L{$tryopt}(\"$tryopt\")\n") if $debug; my $eval_error = do { local $@; local $SIG{__DIE__} = 'DEFAULT'; eval { # The arg to <> cannot be the CallBack object # since it may be passed to other modules that # get confused (e.g., Archive::Tar). Well, # it's not relevant for this callback anyway. &$cb($tryopt); }; $@; }; print STDERR ("=> die($eval_error)\n") if $debug && $eval_error ne ''; if ( $eval_error =~ /^!/ ) { if ( $eval_error =~ /^!FINISH\b/ ) { $goon = 0; } } elsif ( $eval_error ne '' ) { warn ($eval_error); $error++; } } else { print STDERR ("=> saving \"$tryopt\" ", "(not an option, may permute)\n") if $debug; push (@ret, $tryopt); } next; } # ...otherwise, terminate. else { # Push this one back and exit. unshift (@$argv, $tryopt); return ($error == 0); } } # Finish. if ( @ret && $order == $PERMUTE ) { # Push back accumulated arguments print STDERR ("=> restoring \"", join('" "', @ret), "\"\n") if $debug; unshift (@$argv, @ret); } return ($error == 0); } # A readable representation of what's in an optbl. sub OptCtl ($) { my ($v) = @_; my @v = map { defined($_) ? ($_) : ("") } @$v; "[". join(",", "\"$v[CTL_TYPE]\"", "\"$v[CTL_CNAME]\"", "\"$v[CTL_DEFAULT]\"", ("\$","\@","\%","\&")[$v[CTL_DEST] || 0], $v[CTL_AMIN] || '', $v[CTL_AMAX] || '', # $v[CTL_RANGE] || '', # $v[CTL_REPEAT] || '', ). "]"; } # Parse an option specification and fill the tables. sub ParseOptionSpec ($$) { my ($opt, $opctl) = @_; # Match option spec. if ( $opt !~ m;^ ( # Option name (?: \w+[-\w]* ) # Alias names, or "?" (?: \| (?: \? | \w[-\w]* ) )* # Aliases (?: \| (?: [^-|!+=:][^|!+=:]* )? )* )? ( # Either modifiers ... [!+] | # ... or a value/dest/repeat specification [=:] [ionfs] [@%]? (?: \{\d*,?\d*\} )? | # ... or an optional-with-default spec : (?: -?\d+ | \+ ) [@%]? )? $;x ) { return (undef, "Error in option spec: \"$opt\"\n"); } my ($names, $spec) = ($1, $2); $spec = '' unless defined $spec; # $orig keeps track of the primary name the user specified. # This name will be used for the internal or external linkage. # In other words, if the user specifies "FoO|BaR", it will # match any case combinations of 'foo' and 'bar', but if a global # variable needs to be set, it will be $opt_FoO in the exact case # as specified. my $orig; my @names; if ( defined $names ) { @names = split (/\|/, $names); $orig = $names[0]; } else { @names = (''); $orig = ''; } # Construct the opctl entries. my $entry; if ( $spec eq '' || $spec eq '+' || $spec eq '!' ) { # Fields are hard-wired here. $entry = [$spec,$orig,undef,CTL_DEST_SCALAR,0,0]; } elsif ( $spec =~ /^:(-?\d+|\+)([@%])?$/ ) { my $def = $1; my $dest = $2; my $type = $def eq '+' ? 'I' : 'i'; $dest ||= '$'; $dest = $dest eq '@' ? CTL_DEST_ARRAY : $dest eq '%' ? CTL_DEST_HASH : CTL_DEST_SCALAR; # Fields are hard-wired here. $entry = [$type,$orig,$def eq '+' ? undef : $def, $dest,0,1]; } else { my ($mand, $type, $dest) = $spec =~ /^([=:])([ionfs])([@%])?(\{(\d+)?(,)?(\d+)?\})?$/; return (undef, "Cannot repeat while bundling: \"$opt\"\n") if $bundling && defined($4); my ($mi, $cm, $ma) = ($5, $6, $7); return (undef, "{0} is useless in option spec: \"$opt\"\n") if defined($mi) && !$mi && !defined($ma) && !defined($cm); $type = 'i' if $type eq 'n'; $dest ||= '$'; $dest = $dest eq '@' ? CTL_DEST_ARRAY : $dest eq '%' ? CTL_DEST_HASH : CTL_DEST_SCALAR; # Default minargs to 1/0 depending on mand status. $mi = $mand eq '=' ? 1 : 0 unless defined $mi; # Adjust mand status according to minargs. $mand = $mi ? '=' : ':'; # Adjust maxargs. $ma = $mi ? $mi : 1 unless defined $ma || defined $cm; return (undef, "Max must be greater than zero in option spec: \"$opt\"\n") if defined($ma) && !$ma; return (undef, "Max less than min in option spec: \"$opt\"\n") if defined($ma) && $ma < $mi; # Fields are hard-wired here. $entry = [$type,$orig,undef,$dest,$mi,$ma||-1]; } # Process all names. First is canonical, the rest are aliases. my $dups = ''; foreach ( @names ) { $_ = lc ($_) if $ignorecase > (($bundling && length($_) == 1) ? 1 : 0); if ( exists $opctl->{$_} ) { $dups .= "Duplicate specification \"$opt\" for option \"$_\"\n"; } if ( $spec eq '!' ) { $opctl->{"no$_"} = $entry; $opctl->{"no-$_"} = $entry; $opctl->{$_} = [@$entry]; $opctl->{$_}->[CTL_TYPE] = ''; } else { $opctl->{$_} = $entry; } } if ( $dups && $^W ) { foreach ( split(/\n+/, $dups) ) { warn($_."\n"); } } ($names[0], $orig); } # Option lookup. sub FindOption ($$$$$) { # returns (1, $opt, $ctl, $arg, $key) if okay, # returns (1, undef) if option in error, # returns (0) otherwise. my ($argv, $prefix, $argend, $opt, $opctl) = @_; print STDERR ("=> find \"$opt\"\n") if $debug; return (0) unless defined($opt); return (0) unless $opt =~ /^($prefix)(.*)$/s; return (0) if $opt eq "-" && !defined $opctl->{''}; $opt = substr( $opt, length($1) ); # retain taintedness my $starter = $1; print STDERR ("=> split \"$starter\"+\"$opt\"\n") if $debug; my $optarg; # value supplied with --opt=value my $rest; # remainder from unbundling # If it is a long option, it may include the value. # With getopt_compat, only if not bundling. if ( ($starter=~/^$longprefix$/ || ($getopt_compat && ($bundling == 0 || $bundling == 2))) && (my $oppos = index($opt, '=', 1)) > 0) { my $optorg = $opt; $opt = substr($optorg, 0, $oppos); $optarg = substr($optorg, $oppos + 1); # retain tainedness print STDERR ("=> option \"", $opt, "\", optarg = \"$optarg\"\n") if $debug; } #### Look it up ### my $tryopt = $opt; # option to try if ( ( $bundling || $bundling_values ) && $starter eq '-' ) { # To try overrides, obey case ignore. $tryopt = $ignorecase ? lc($opt) : $opt; # If bundling == 2, long options can override bundles. if ( $bundling == 2 && length($tryopt) > 1 && defined ($opctl->{$tryopt}) ) { print STDERR ("=> $starter$tryopt overrides unbundling\n") if $debug; } # If bundling_values, option may be followed by the value. elsif ( $bundling_values ) { $tryopt = $opt; # Unbundle single letter option. $rest = length ($tryopt) > 0 ? substr ($tryopt, 1) : ''; $tryopt = substr ($tryopt, 0, 1); $tryopt = lc ($tryopt) if $ignorecase > 1; print STDERR ("=> $starter$tryopt unbundled from ", "$starter$tryopt$rest\n") if $debug; # Whatever remains may not be considered an option. $optarg = $rest eq '' ? undef : $rest; $rest = undef; } # Split off a single letter and leave the rest for # further processing. else { $tryopt = $opt; # Unbundle single letter option. $rest = length ($tryopt) > 0 ? substr ($tryopt, 1) : ''; $tryopt = substr ($tryopt, 0, 1); $tryopt = lc ($tryopt) if $ignorecase > 1; print STDERR ("=> $starter$tryopt unbundled from ", "$starter$tryopt$rest\n") if $debug; $rest = undef unless $rest ne ''; } } # Try auto-abbreviation. elsif ( $autoabbrev && $opt ne "" ) { # Sort the possible long option names. my @names = sort(keys (%$opctl)); # Downcase if allowed. $opt = lc ($opt) if $ignorecase; $tryopt = $opt; # Turn option name into pattern. my $pat = quotemeta ($opt); # Look up in option names. my @hits = grep (/^$pat/, @names); print STDERR ("=> ", scalar(@hits), " hits (@hits) with \"$pat\" ", "out of ", scalar(@names), "\n") if $debug; # Check for ambiguous results. unless ( (@hits <= 1) || (grep ($_ eq $opt, @hits) == 1) ) { # See if all matches are for the same option. my %hit; foreach ( @hits ) { my $hit = $opctl->{$_}->[CTL_CNAME] if defined $opctl->{$_}->[CTL_CNAME]; $hit = "no" . $hit if $opctl->{$_}->[CTL_TYPE] eq '!'; $hit{$hit} = 1; } # Remove auto-supplied options (version, help). if ( keys(%hit) == 2 ) { if ( $auto_version && exists($hit{version}) ) { delete $hit{version}; } elsif ( $auto_help && exists($hit{help}) ) { delete $hit{help}; } } # Now see if it really is ambiguous. unless ( keys(%hit) == 1 ) { return (0) if $passthrough; warn ("Option ", $opt, " is ambiguous (", join(", ", @hits), ")\n"); $error++; return (1, undef); } @hits = keys(%hit); } # Complete the option name, if appropriate. if ( @hits == 1 && $hits[0] ne $opt ) { $tryopt = $hits[0]; $tryopt = lc ($tryopt) if $ignorecase > (($bundling && length($tryopt) == 1) ? 1 : 0); print STDERR ("=> option \"$opt\" -> \"$tryopt\"\n") if $debug; } } # Map to all lowercase if ignoring case. elsif ( $ignorecase ) { $tryopt = lc ($opt); } # Check validity by fetching the info. my $ctl = $opctl->{$tryopt}; unless ( defined $ctl ) { return (0) if $passthrough; # Pretend one char when bundling. if ( $bundling == 1 && length($starter) == 1 ) { $opt = substr($opt,0,1); unshift (@$argv, $starter.$rest) if defined $rest; } if ( $opt eq "" ) { warn ("Missing option after ", $starter, "\n"); } else { warn ("Unknown option: ", $opt, "\n"); } $error++; return (1, undef); } # Apparently valid. $opt = $tryopt; print STDERR ("=> found ", OptCtl($ctl), " for \"", $opt, "\"\n") if $debug; #### Determine argument status #### # If it is an option w/o argument, we're almost finished with it. my $type = $ctl->[CTL_TYPE]; my $arg; if ( $type eq '' || $type eq '!' || $type eq '+' ) { if ( defined $optarg ) { return (0) if $passthrough; warn ("Option ", $opt, " does not take an argument\n"); $error++; undef $opt; undef $optarg if $bundling_values; } elsif ( $type eq '' || $type eq '+' ) { # Supply explicit value. $arg = 1; } else { $opt =~ s/^no-?//i; # strip NO prefix $arg = 0; # supply explicit value } unshift (@$argv, $starter.$rest) if defined $rest; return (1, $opt, $ctl, $arg); } # Get mandatory status and type info. my $mand = $ctl->[CTL_AMIN]; # Check if there is an option argument available. if ( $gnu_compat ) { my $optargtype = 0; # none, 1 = empty, 2 = nonempty, 3 = aux if ( defined($optarg) ) { $optargtype = (length($optarg) == 0) ? 1 : 2; } elsif ( defined $rest || @$argv > 0 ) { # GNU getopt_long() does not accept the (optional) # argument to be passed to the option without = sign. # We do, since not doing so breaks existing scripts. $optargtype = 3; } if(($optargtype == 0) && !$mand) { my $val = defined($ctl->[CTL_DEFAULT]) ? $ctl->[CTL_DEFAULT] : $type eq 's' ? '' : 0; return (1, $opt, $ctl, $val); } return (1, $opt, $ctl, $type eq 's' ? '' : 0) if $optargtype == 1; # --foo= -> return nothing } # Check if there is an option argument available. if ( defined $optarg ? ($optarg eq '') : !(defined $rest || @$argv > 0) ) { # Complain if this option needs an argument. # if ( $mand && !($type eq 's' ? defined($optarg) : 0) ) { if ( $mand ) { return (0) if $passthrough; warn ("Option ", $opt, " requires an argument\n"); $error++; return (1, undef); } if ( $type eq 'I' ) { # Fake incremental type. my @c = @$ctl; $c[CTL_TYPE] = '+'; return (1, $opt, \@c, 1); } return (1, $opt, $ctl, defined($ctl->[CTL_DEFAULT]) ? $ctl->[CTL_DEFAULT] : $type eq 's' ? '' : 0); } # Get (possibly optional) argument. $arg = (defined $rest ? $rest : (defined $optarg ? $optarg : shift (@$argv))); # Get key if this is a "name=value" pair for a hash option. my $key; if ($ctl->[CTL_DEST] == CTL_DEST_HASH && defined $arg) { ($key, $arg) = ($arg =~ /^([^=]*)=(.*)$/s) ? ($1, $2) : ($arg, defined($ctl->[CTL_DEFAULT]) ? $ctl->[CTL_DEFAULT] : ($mand ? undef : ($type eq 's' ? "" : 1))); if (! defined $arg) { warn ("Option $opt, key \"$key\", requires a value\n"); $error++; # Push back. unshift (@$argv, $starter.$rest) if defined $rest; return (1, undef); } } #### Check if the argument is valid for this option #### my $key_valid = $ctl->[CTL_DEST] == CTL_DEST_HASH ? "[^=]+=" : ""; if ( $type eq 's' ) { # string # A mandatory string takes anything. return (1, $opt, $ctl, $arg, $key) if $mand; # Same for optional string as a hash value return (1, $opt, $ctl, $arg, $key) if $ctl->[CTL_DEST] == CTL_DEST_HASH; # An optional string takes almost anything. return (1, $opt, $ctl, $arg, $key) if defined $optarg || defined $rest; return (1, $opt, $ctl, $arg, $key) if $arg eq "-"; # ?? # Check for option or option list terminator. if ($arg eq $argend || $arg =~ /^$prefix.+/) { # Push back. unshift (@$argv, $arg); # Supply empty value. $arg = ''; } } elsif ( $type eq 'i' # numeric/integer || $type eq 'I' # numeric/integer w/ incr default || $type eq 'o' ) { # dec/oct/hex/bin value my $o_valid = $type eq 'o' ? PAT_XINT : PAT_INT; if ( $bundling && defined $rest && $rest =~ /^($key_valid)($o_valid)(.*)$/si ) { ($key, $arg, $rest) = ($1, $2, $+); chop($key) if $key; $arg = ($type eq 'o' && $arg =~ /^0/) ? oct($arg) : 0+$arg; unshift (@$argv, $starter.$rest) if defined $rest && $rest ne ''; } elsif ( $arg =~ /^$o_valid$/si ) { $arg =~ tr/_//d; $arg = ($type eq 'o' && $arg =~ /^0/) ? oct($arg) : 0+$arg; } else { if ( defined $optarg || $mand ) { if ( $passthrough ) { unshift (@$argv, defined $rest ? $starter.$rest : $arg) unless defined $optarg; return (0); } warn ("Value \"", $arg, "\" invalid for option ", $opt, " (", $type eq 'o' ? "extended " : '', "number expected)\n"); $error++; # Push back. unshift (@$argv, $starter.$rest) if defined $rest; return (1, undef); } else { # Push back. unshift (@$argv, defined $rest ? $starter.$rest : $arg); if ( $type eq 'I' ) { # Fake incremental type. my @c = @$ctl; $c[CTL_TYPE] = '+'; return (1, $opt, \@c, 1); } # Supply default value. $arg = defined($ctl->[CTL_DEFAULT]) ? $ctl->[CTL_DEFAULT] : 0; } } } elsif ( $type eq 'f' ) { # real number, int is also ok my $o_valid = PAT_FLOAT; if ( $bundling && defined $rest && $rest =~ /^($key_valid)($o_valid)(.*)$/s ) { $arg =~ tr/_//d; ($key, $arg, $rest) = ($1, $2, $+); chop($key) if $key; unshift (@$argv, $starter.$rest) if defined $rest && $rest ne ''; } elsif ( $arg =~ /^$o_valid$/ ) { $arg =~ tr/_//d; } else { if ( defined $optarg || $mand ) { if ( $passthrough ) { unshift (@$argv, defined $rest ? $starter.$rest : $arg) unless defined $optarg; return (0); } warn ("Value \"", $arg, "\" invalid for option ", $opt, " (real number expected)\n"); $error++; # Push back. unshift (@$argv, $starter.$rest) if defined $rest; return (1, undef); } else { # Push back. unshift (@$argv, defined $rest ? $starter.$rest : $arg); # Supply default value. $arg = 0.0; } } } else { die("Getopt::Long internal error (Can't happen)\n"); } return (1, $opt, $ctl, $arg, $key); } sub ValidValue ($$$$$) { my ($ctl, $arg, $mand, $argend, $prefix) = @_; if ( $ctl->[CTL_DEST] == CTL_DEST_HASH ) { return 0 unless $arg =~ /[^=]+=(.*)/; $arg = $1; } my $type = $ctl->[CTL_TYPE]; if ( $type eq 's' ) { # string # A mandatory string takes anything. return (1) if $mand; return (1) if $arg eq "-"; # Check for option or option list terminator. return 0 if $arg eq $argend || $arg =~ /^$prefix.+/; return 1; } elsif ( $type eq 'i' # numeric/integer || $type eq 'I' # numeric/integer w/ incr default || $type eq 'o' ) { # dec/oct/hex/bin value my $o_valid = $type eq 'o' ? PAT_XINT : PAT_INT; return $arg =~ /^$o_valid$/si; } elsif ( $type eq 'f' ) { # real number, int is also ok my $o_valid = PAT_FLOAT; return $arg =~ /^$o_valid$/; } die("ValidValue: Cannot happen\n"); } # Getopt::Long Configuration. sub Configure (@) { my (@options) = @_; my $prevconfig = [ $error, $debug, $major_version, $minor_version, $caller, $autoabbrev, $getopt_compat, $ignorecase, $bundling, $order, $gnu_compat, $passthrough, $genprefix, $auto_version, $auto_help, $longprefix, $bundling_values ]; if ( ref($options[0]) eq 'ARRAY' ) { ( $error, $debug, $major_version, $minor_version, $caller, $autoabbrev, $getopt_compat, $ignorecase, $bundling, $order, $gnu_compat, $passthrough, $genprefix, $auto_version, $auto_help, $longprefix, $bundling_values ) = @{shift(@options)}; } my $opt; foreach $opt ( @options ) { my $try = lc ($opt); my $action = 1; if ( $try =~ /^no_?(.*)$/s ) { $action = 0; $try = $+; } if ( ($try eq 'default' or $try eq 'defaults') && $action ) { ConfigDefaults (); } elsif ( ($try eq 'posix_default' or $try eq 'posix_defaults') ) { local $ENV{POSIXLY_CORRECT}; $ENV{POSIXLY_CORRECT} = 1 if $action; ConfigDefaults (); } elsif ( $try eq 'auto_abbrev' or $try eq 'autoabbrev' ) { $autoabbrev = $action; } elsif ( $try eq 'getopt_compat' ) { $getopt_compat = $action; $genprefix = $action ? "(--|-|\\+)" : "(--|-)"; } elsif ( $try eq 'gnu_getopt' ) { if ( $action ) { $gnu_compat = 1; $bundling = 1; $getopt_compat = 0; $genprefix = "(--|-)"; $order = $PERMUTE; $bundling_values = 0; } } elsif ( $try eq 'gnu_compat' ) { $gnu_compat = $action; $bundling = 0; $bundling_values = 1; } elsif ( $try =~ /^(auto_?)?version$/ ) { $auto_version = $action; } elsif ( $try =~ /^(auto_?)?help$/ ) { $auto_help = $action; } elsif ( $try eq 'ignorecase' or $try eq 'ignore_case' ) { $ignorecase = $action; } elsif ( $try eq 'ignorecase_always' or $try eq 'ignore_case_always' ) { $ignorecase = $action ? 2 : 0; } elsif ( $try eq 'bundling' ) { $bundling = $action; $bundling_values = 0 if $action; } elsif ( $try eq 'bundling_override' ) { $bundling = $action ? 2 : 0; $bundling_values = 0 if $action; } elsif ( $try eq 'bundling_values' ) { $bundling_values = $action; $bundling = 0 if $action; } elsif ( $try eq 'require_order' ) { $order = $action ? $REQUIRE_ORDER : $PERMUTE; } elsif ( $try eq 'permute' ) { $order = $action ? $PERMUTE : $REQUIRE_ORDER; } elsif ( $try eq 'pass_through' or $try eq 'passthrough' ) { $passthrough = $action; } elsif ( $try =~ /^prefix=(.+)$/ && $action ) { $genprefix = $1; # Turn into regexp. Needs to be parenthesized! $genprefix = "(" . quotemeta($genprefix) . ")"; eval { '' =~ /$genprefix/; }; die("Getopt::Long: invalid pattern \"$genprefix\"\n") if $@; } elsif ( $try =~ /^prefix_pattern=(.+)$/ && $action ) { $genprefix = $1; # Parenthesize if needed. $genprefix = "(" . $genprefix . ")" unless $genprefix =~ /^\(.*\)$/; eval { '' =~ m"$genprefix"; }; die("Getopt::Long: invalid pattern \"$genprefix\"\n") if $@; } elsif ( $try =~ /^long_prefix_pattern=(.+)$/ && $action ) { $longprefix = $1; # Parenthesize if needed. $longprefix = "(" . $longprefix . ")" unless $longprefix =~ /^\(.*\)$/; eval { '' =~ m"$longprefix"; }; die("Getopt::Long: invalid long prefix pattern \"$longprefix\"\n") if $@; } elsif ( $try eq 'debug' ) { $debug = $action; } else { die("Getopt::Long: unknown or erroneous config parameter \"$opt\"\n") } } $prevconfig; } # Deprecated name. sub config (@) { Configure (@_); } # Issue a standard message for --version. # # The arguments are mostly the same as for Pod::Usage::pod2usage: # # - a number (exit value) # - a string (lead in message) # - a hash with options. See Pod::Usage for details. # sub VersionMessage(@) { # Massage args. my $pa = setup_pa_args("version", @_); my $v = $main::VERSION; my $fh = $pa->{-output} || ( ($pa->{-exitval} eq "NOEXIT" || $pa->{-exitval} < 2) ? \*STDOUT : \*STDERR ); print $fh (defined($pa->{-message}) ? $pa->{-message} : (), $0, defined $v ? " version $v" : (), "\n", "(", __PACKAGE__, "::", "GetOptions", " version ", defined($Getopt::Long::VERSION_STRING) ? $Getopt::Long::VERSION_STRING : $VERSION, ";", " Perl version ", $] >= 5.006 ? sprintf("%vd", $^V) : $], ")\n"); exit($pa->{-exitval}) unless $pa->{-exitval} eq "NOEXIT"; } # Issue a standard message for --help. # # The arguments are the same as for Pod::Usage::pod2usage: # # - a number (exit value) # - a string (lead in message) # - a hash with options. See Pod::Usage for details. # sub HelpMessage(@) { eval { require Pod::Usage; import Pod::Usage; 1; } || die("Cannot provide help: cannot load Pod::Usage\n"); # Note that pod2usage will issue a warning if -exitval => NOEXIT. pod2usage(setup_pa_args("help", @_)); } # Helper routine to set up a normalized hash ref to be used as # argument to pod2usage. sub setup_pa_args($@) { my $tag = shift; # who's calling # If called by direct binding to an option, it will get the option # name and value as arguments. Remove these, if so. @_ = () if @_ == 2 && $_[0] eq $tag; my $pa; if ( @_ > 1 ) { $pa = { @_ }; } else { $pa = shift || {}; } # At this point, $pa can be a number (exit value), string # (message) or hash with options. if ( UNIVERSAL::isa($pa, 'HASH') ) { # Get rid of -msg vs. -message ambiguity. $pa->{-message} = $pa->{-msg}; delete($pa->{-msg}); } elsif ( $pa =~ /^-?\d+$/ ) { $pa = { -exitval => $pa }; } else { $pa = { -message => $pa }; } # These are _our_ defaults. $pa->{-verbose} = 0 unless exists($pa->{-verbose}); $pa->{-exitval} = 0 unless exists($pa->{-exitval}); $pa; } # Sneak way to know what version the user requested. sub VERSION { $requested_version = $_[1]; shift->SUPER::VERSION(@_); } package Getopt::Long::CallBack; sub new { my ($pkg, %atts) = @_; bless { %atts }, $pkg; } sub name { my $self = shift; ''.$self->{name}; } use overload # Treat this object as an ordinary string for legacy API. '""' => \&name, fallback => 1; 1; ################ Documentation ################ =head1 NAME Getopt::Long - Extended processing of command line options =head1 SYNOPSIS use Getopt::Long; my $data = "file.dat"; my $length = 24; my $verbose; GetOptions ("length=i" => \$length, # numeric "file=s" => \$data, # string "verbose" => \$verbose) # flag or die("Error in command line arguments\n"); =head1 DESCRIPTION The Getopt::Long module implements an extended getopt function called GetOptions(). It parses the command line from C<@ARGV>, recognizing and removing specified options and their possible values. This function adheres to the POSIX syntax for command line options, with GNU extensions. In general, this means that options have long names instead of single letters, and are introduced with a double dash "--". Support for bundling of command line options, as was the case with the more traditional single-letter approach, is provided but not enabled by default. =head1 Command Line Options, an Introduction Command line operated programs traditionally take their arguments from the command line, for example filenames or other information that the program needs to know. Besides arguments, these programs often take command line I as well. Options are not necessary for the program to work, hence the name 'option', but are used to modify its default behaviour. For example, a program could do its job quietly, but with a suitable option it could provide verbose information about what it did. Command line options come in several flavours. Historically, they are preceded by a single dash C<->, and consist of a single letter. -l -a -c Usually, these single-character options can be bundled: -lac Options can have values, the value is placed after the option character. Sometimes with whitespace in between, sometimes not: -s 24 -s24 Due to the very cryptic nature of these options, another style was developed that used long names. So instead of a cryptic C<-l> one could use the more descriptive C<--long>. To distinguish between a bundle of single-character options and a long one, two dashes are used to precede the option name. Early implementations of long options used a plus C<+> instead. Also, option values could be specified either like --size=24 or --size 24 The C<+> form is now obsolete and strongly deprecated. =head1 Getting Started with Getopt::Long Getopt::Long is the Perl5 successor of C. This was the first Perl module that provided support for handling the new style of command line options, in particular long option names, hence the Perl5 name Getopt::Long. This module also supports single-character options and bundling. To use Getopt::Long from a Perl program, you must include the following line in your Perl program: use Getopt::Long; This will load the core of the Getopt::Long module and prepare your program for using it. Most of the actual Getopt::Long code is not loaded until you really call one of its functions. In the default configuration, options names may be abbreviated to uniqueness, case does not matter, and a single dash is sufficient, even for long option names. Also, options may be placed between non-option arguments. See L for more details on how to configure Getopt::Long. =head2 Simple options The most simple options are the ones that take no values. Their mere presence on the command line enables the option. Popular examples are: --all --verbose --quiet --debug Handling simple options is straightforward: my $verbose = ''; # option variable with default value (false) my $all = ''; # option variable with default value (false) GetOptions ('verbose' => \$verbose, 'all' => \$all); The call to GetOptions() parses the command line arguments that are present in C<@ARGV> and sets the option variable to the value C<1> if the option did occur on the command line. Otherwise, the option variable is not touched. Setting the option value to true is often called I the option. The option name as specified to the GetOptions() function is called the option I. Later we'll see that this specification can contain more than just the option name. The reference to the variable is called the option I. GetOptions() will return a true value if the command line could be processed successfully. Otherwise, it will write error messages using die() and warn(), and return a false result. =head2 A little bit less simple options Getopt::Long supports two useful variants of simple options: I options and I options. A negatable option is specified with an exclamation mark C after the option name: my $verbose = ''; # option variable with default value (false) GetOptions ('verbose!' => \$verbose); Now, using C<--verbose> on the command line will enable C<$verbose>, as expected. But it is also allowed to use C<--noverbose>, which will disable C<$verbose> by setting its value to C<0>. Using a suitable default value, the program can find out whether C<$verbose> is false by default, or disabled by using C<--noverbose>. An incremental option is specified with a plus C<+> after the option name: my $verbose = ''; # option variable with default value (false) GetOptions ('verbose+' => \$verbose); Using C<--verbose> on the command line will increment the value of C<$verbose>. This way the program can keep track of how many times the option occurred on the command line. For example, each occurrence of C<--verbose> could increase the verbosity level of the program. =head2 Mixing command line option with other arguments Usually programs take command line options as well as other arguments, for example, file names. It is good practice to always specify the options first, and the other arguments last. Getopt::Long will, however, allow the options and arguments to be mixed and 'filter out' all the options before passing the rest of the arguments to the program. To stop Getopt::Long from processing further arguments, insert a double dash C<--> on the command line: --size 24 -- --all In this example, C<--all> will I be treated as an option, but passed to the program unharmed, in C<@ARGV>. =head2 Options with values For options that take values it must be specified whether the option value is required or not, and what kind of value the option expects. Three kinds of values are supported: integer numbers, floating point numbers, and strings. If the option value is required, Getopt::Long will take the command line argument that follows the option and assign this to the option variable. If, however, the option value is specified as optional, this will only be done if that value does not look like a valid command line option itself. my $tag = ''; # option variable with default value GetOptions ('tag=s' => \$tag); In the option specification, the option name is followed by an equals sign C<=> and the letter C. The equals sign indicates that this option requires a value. The letter C indicates that this value is an arbitrary string. Other possible value types are C for integer values, and C for floating point values. Using a colon C<:> instead of the equals sign indicates that the option value is optional. In this case, if no suitable value is supplied, string valued options get an empty string C<''> assigned, while numeric options are set to C<0>. =head2 Options with multiple values Options sometimes take several values. For example, a program could use multiple directories to search for library files: --library lib/stdlib --library lib/extlib To accomplish this behaviour, simply specify an array reference as the destination for the option: GetOptions ("library=s" => \@libfiles); Alternatively, you can specify that the option can have multiple values by adding a "@", and pass a reference to a scalar as the destination: GetOptions ("library=s@" => \$libfiles); Used with the example above, C<@libfiles> c.q. C<@$libfiles> would contain two strings upon completion: C<"lib/stdlib"> and C<"lib/extlib">, in that order. It is also possible to specify that only integer or floating point numbers are acceptable values. Often it is useful to allow comma-separated lists of values as well as multiple occurrences of the options. This is easy using Perl's split() and join() operators: GetOptions ("library=s" => \@libfiles); @libfiles = split(/,/,join(',',@libfiles)); Of course, it is important to choose the right separator string for each purpose. Warning: What follows is an experimental feature. Options can take multiple values at once, for example --coordinates 52.2 16.4 --rgbcolor 255 255 149 This can be accomplished by adding a repeat specifier to the option specification. Repeat specifiers are very similar to the C<{...}> repeat specifiers that can be used with regular expression patterns. For example, the above command line would be handled as follows: GetOptions('coordinates=f{2}' => \@coor, 'rgbcolor=i{3}' => \@color); The destination for the option must be an array or array reference. It is also possible to specify the minimal and maximal number of arguments an option takes. C indicates an option that takes at least two and at most 4 arguments. C indicates one or more values; C indicates zero or more option values. =head2 Options with hash values If the option destination is a reference to a hash, the option will take, as value, strings of the form IC<=>I. The value will be stored with the specified key in the hash. GetOptions ("define=s" => \%defines); Alternatively you can use: GetOptions ("define=s%" => \$defines); When used with command line options: --define os=linux --define vendor=redhat the hash C<%defines> (or C<%$defines>) will contain two keys, C<"os"> with value C<"linux"> and C<"vendor"> with value C<"redhat">. It is also possible to specify that only integer or floating point numbers are acceptable values. The keys are always taken to be strings. =head2 User-defined subroutines to handle options Ultimate control over what should be done when (actually: each time) an option is encountered on the command line can be achieved by designating a reference to a subroutine (or an anonymous subroutine) as the option destination. When GetOptions() encounters the option, it will call the subroutine with two or three arguments. The first argument is the name of the option. (Actually, it is an object that stringifies to the name of the option.) For a scalar or array destination, the second argument is the value to be stored. For a hash destination, the second argument is the key to the hash, and the third argument the value to be stored. It is up to the subroutine to store the value, or do whatever it thinks is appropriate. A trivial application of this mechanism is to implement options that are related to each other. For example: my $verbose = ''; # option variable with default value (false) GetOptions ('verbose' => \$verbose, 'quiet' => sub { $verbose = 0 }); Here C<--verbose> and C<--quiet> control the same variable C<$verbose>, but with opposite values. If the subroutine needs to signal an error, it should call die() with the desired error message as its argument. GetOptions() will catch the die(), issue the error message, and record that an error result must be returned upon completion. If the text of the error message starts with an exclamation mark C it is interpreted specially by GetOptions(). There is currently one special command implemented: C will cause GetOptions() to stop processing options, as if it encountered a double dash C<-->. In version 2.37 the first argument to the callback function was changed from string to object. This was done to make room for extensions and more detailed control. The object stringifies to the option name so this change should not introduce compatibility problems. Here is an example of how to access the option name and value from within a subroutine: GetOptions ('opt=i' => \&handler); sub handler { my ($opt_name, $opt_value) = @_; print("Option name is $opt_name and value is $opt_value\n"); } =head2 Options with multiple names Often it is user friendly to supply alternate mnemonic names for options. For example C<--height> could be an alternate name for C<--length>. Alternate names can be included in the option specification, separated by vertical bar C<|> characters. To implement the above example: GetOptions ('length|height=f' => \$length); The first name is called the I name, the other names are called I. When using a hash to store options, the key will always be the primary name. Multiple alternate names are possible. =head2 Case and abbreviations Without additional configuration, GetOptions() will ignore the case of option names, and allow the options to be abbreviated to uniqueness. GetOptions ('length|height=f' => \$length, "head" => \$head); This call will allow C<--l> and C<--L> for the length option, but requires a least C<--hea> and C<--hei> for the head and height options. =head2 Summary of Option Specifications Each option specifier consists of two parts: the name specification and the argument specification. The name specification contains the name of the option, optionally followed by a list of alternative names separated by vertical bar characters. length option name is "length" length|size|l name is "length", aliases are "size" and "l" The argument specification is optional. If omitted, the option is considered boolean, a value of 1 will be assigned when the option is used on the command line. The argument specification can be =over 4 =item ! The option does not take an argument and may be negated by prefixing it with "no" or "no-". E.g. C<"foo!"> will allow C<--foo> (a value of 1 will be assigned) as well as C<--nofoo> and C<--no-foo> (a value of 0 will be assigned). If the option has aliases, this applies to the aliases as well. Using negation on a single letter option when bundling is in effect is pointless and will result in a warning. =item + The option does not take an argument and will be incremented by 1 every time it appears on the command line. E.g. C<"more+">, when used with C<--more --more --more>, will increment the value three times, resulting in a value of 3 (provided it was 0 or undefined at first). The C<+> specifier is ignored if the option destination is not a scalar. =item = I [ I ] [ I ] The option requires an argument of the given type. Supported types are: =over 4 =item s String. An arbitrary sequence of characters. It is valid for the argument to start with C<-> or C<-->. =item i Integer. An optional leading plus or minus sign, followed by a sequence of digits. =item o Extended integer, Perl style. This can be either an optional leading plus or minus sign, followed by a sequence of digits, or an octal string (a zero, optionally followed by '0', '1', .. '7'), or a hexadecimal string (C<0x> followed by '0' .. '9', 'a' .. 'f', case insensitive), or a binary string (C<0b> followed by a series of '0' and '1'). =item f Real number. For example C<3.14>, C<-6.23E24> and so on. =back The I can be C<@> or C<%> to specify that the option is list or a hash valued. This is only needed when the destination for the option value is not otherwise specified. It should be omitted when not needed. The I specifies the number of values this option takes per occurrence on the command line. It has the format C<{> [ I ] [ C<,> [ I ] ] C<}>. I denotes the minimal number of arguments. It defaults to 1 for options with C<=> and to 0 for options with C<:>, see below. Note that I overrules the C<=> / C<:> semantics. I denotes the maximum number of arguments. It must be at least I. If I is omitted, I, there is no upper bound to the number of argument values taken. =item : I [ I ] Like C<=>, but designates the argument as optional. If omitted, an empty string will be assigned to string values options, and the value zero to numeric options. Note that if a string argument starts with C<-> or C<-->, it will be considered an option on itself. =item : I [ I ] Like C<:i>, but if the value is omitted, the I will be assigned. =item : + [ I ] Like C<:i>, but if the value is omitted, the current value for the option will be incremented. =back =head1 Advanced Possibilities =head2 Object oriented interface Getopt::Long can be used in an object oriented way as well: use Getopt::Long; $p = Getopt::Long::Parser->new; $p->configure(...configuration options...); if ($p->getoptions(...options descriptions...)) ... if ($p->getoptionsfromarray( \@array, ...options descriptions...)) ... Configuration options can be passed to the constructor: $p = new Getopt::Long::Parser config => [...configuration options...]; =head2 Thread Safety Getopt::Long is thread safe when using ithreads as of Perl 5.8. It is I thread safe when using the older (experimental and now obsolete) threads implementation that was added to Perl 5.005. =head2 Documentation and help texts Getopt::Long encourages the use of Pod::Usage to produce help messages. For example: use Getopt::Long; use Pod::Usage; my $man = 0; my $help = 0; GetOptions('help|?' => \$help, man => \$man) or pod2usage(2); pod2usage(1) if $help; pod2usage(-exitval => 0, -verbose => 2) if $man; __END__ =head1 NAME sample - Using Getopt::Long and Pod::Usage =head1 SYNOPSIS sample [options] [file ...] Options: -help brief help message -man full documentation =head1 OPTIONS =over 8 =item B<-help> Print a brief help message and exits. =item B<-man> Prints the manual page and exits. =back =head1 DESCRIPTION B will read the given input file(s) and do something useful with the contents thereof. =cut See L for details. =head2 Parsing options from an arbitrary array By default, GetOptions parses the options that are present in the global array C<@ARGV>. A special entry C can be used to parse options from an arbitrary array. use Getopt::Long qw(GetOptionsFromArray); $ret = GetOptionsFromArray(\@myopts, ...); When used like this, options and their possible values are removed from C<@myopts>, the global C<@ARGV> is not touched at all. The following two calls behave identically: $ret = GetOptions( ... ); $ret = GetOptionsFromArray(\@ARGV, ... ); This also means that a first argument hash reference now becomes the second argument: $ret = GetOptions(\%opts, ... ); $ret = GetOptionsFromArray(\@ARGV, \%opts, ... ); =head2 Parsing options from an arbitrary string A special entry C can be used to parse options from an arbitrary string. use Getopt::Long qw(GetOptionsFromString); $ret = GetOptionsFromString($string, ...); The contents of the string are split into arguments using a call to C. As with C, the global C<@ARGV> is not touched. It is possible that, upon completion, not all arguments in the string have been processed. C will, when called in list context, return both the return status and an array reference to any remaining arguments: ($ret, $args) = GetOptionsFromString($string, ... ); If any arguments remain, and C was not called in list context, a message will be given and C will return failure. As with GetOptionsFromArray, a first argument hash reference now becomes the second argument. =head2 Storing options values in a hash Sometimes, for example when there are a lot of options, having a separate variable for each of them can be cumbersome. GetOptions() supports, as an alternative mechanism, storing options values in a hash. To obtain this, a reference to a hash must be passed I to GetOptions(). For each option that is specified on the command line, the option value will be stored in the hash with the option name as key. Options that are not actually used on the command line will not be put in the hash, on other words, C (or defined()) can be used to test if an option was used. The drawback is that warnings will be issued if the program runs under C and uses C<$h{option}> without testing with exists() or defined() first. my %h = (); GetOptions (\%h, 'length=i'); # will store in $h{length} For options that take list or hash values, it is necessary to indicate this by appending an C<@> or C<%> sign after the type: GetOptions (\%h, 'colours=s@'); # will push to @{$h{colours}} To make things more complicated, the hash may contain references to the actual destinations, for example: my $len = 0; my %h = ('length' => \$len); GetOptions (\%h, 'length=i'); # will store in $len This example is fully equivalent with: my $len = 0; GetOptions ('length=i' => \$len); # will store in $len Any mixture is possible. For example, the most frequently used options could be stored in variables while all other options get stored in the hash: my $verbose = 0; # frequently referred my $debug = 0; # frequently referred my %h = ('verbose' => \$verbose, 'debug' => \$debug); GetOptions (\%h, 'verbose', 'debug', 'filter', 'size=i'); if ( $verbose ) { ... } if ( exists $h{filter} ) { ... option 'filter' was specified ... } =head2 Bundling With bundling it is possible to set several single-character options at once. For example if C, C and C are all valid options, -vax will set all three. Getopt::Long supports three styles of bundling. To enable bundling, a call to Getopt::Long::Configure is required. The simplest style of bundling can be enabled with: Getopt::Long::Configure ("bundling"); Configured this way, single-character options can be bundled but long options B always start with a double dash C<--> to avoid ambiguity. For example, when C, C, C and C are all valid options, -vax will set C, C and C, but --vax will set C. The second style of bundling lifts this restriction. It can be enabled with: Getopt::Long::Configure ("bundling_override"); Now, C<-vax> will set the option C. In all of the above cases, option values may be inserted in the bundle. For example: -h24w80 is equivalent to -h 24 -w 80 A third style of bundling allows only values to be bundled with options. It can be enabled with: Getopt::Long::Configure ("bundling_values"); Now, C<-h24> will set the option C to C<24>, but option bundles like C<-vxa> and C<-h24w80> are flagged as errors. Enabling C will disable the other two styles of bundling. When configured for bundling, single-character options are matched case sensitive while long options are matched case insensitive. To have the single-character options matched case insensitive as well, use: Getopt::Long::Configure ("bundling", "ignorecase_always"); It goes without saying that bundling can be quite confusing. =head2 The lonesome dash Normally, a lone dash C<-> on the command line will not be considered an option. Option processing will terminate (unless "permute" is configured) and the dash will be left in C<@ARGV>. It is possible to get special treatment for a lone dash. This can be achieved by adding an option specification with an empty name, for example: GetOptions ('' => \$stdio); A lone dash on the command line will now be a legal option, and using it will set variable C<$stdio>. =head2 Argument callback A special option 'name' C<< <> >> can be used to designate a subroutine to handle non-option arguments. When GetOptions() encounters an argument that does not look like an option, it will immediately call this subroutine and passes it one parameter: the argument name. Well, actually it is an object that stringifies to the argument name. For example: my $width = 80; sub process { ... } GetOptions ('width=i' => \$width, '<>' => \&process); When applied to the following command line: arg1 --width=72 arg2 --width=60 arg3 This will call C while C<$width> is C<80>, C while C<$width> is C<72>, and C while C<$width> is C<60>. This feature requires configuration option B, see section L. =head1 Configuring Getopt::Long Getopt::Long can be configured by calling subroutine Getopt::Long::Configure(). This subroutine takes a list of quoted strings, each specifying a configuration option to be enabled, e.g. C, or disabled, e.g. C. Case does not matter. Multiple calls to Configure() are possible. Alternatively, as of version 2.24, the configuration options may be passed together with the C statement: use Getopt::Long qw(:config no_ignore_case bundling); The following options are available: =over 12 =item default This option causes all configuration options to be reset to their default values. =item posix_default This option causes all configuration options to be reset to their default values as if the environment variable POSIXLY_CORRECT had been set. =item auto_abbrev Allow option names to be abbreviated to uniqueness. Default is enabled unless environment variable POSIXLY_CORRECT has been set, in which case C is disabled. =item getopt_compat Allow C<+> to start options. Default is enabled unless environment variable POSIXLY_CORRECT has been set, in which case C is disabled. =item gnu_compat C controls whether C<--opt=> is allowed, and what it should do. Without C, C<--opt=> gives an error. With C, C<--opt=> will give option C and empty value. This is the way GNU getopt_long() does it. Note that C<--opt value> is still accepted, even though GNU getopt_long() doesn't. =item gnu_getopt This is a short way of setting C C C C. With C, command line handling should be reasonably compatible with GNU getopt_long(). =item require_order Whether command line arguments are allowed to be mixed with options. Default is disabled unless environment variable POSIXLY_CORRECT has been set, in which case C is enabled. See also C, which is the opposite of C. =item permute Whether command line arguments are allowed to be mixed with options. Default is enabled unless environment variable POSIXLY_CORRECT has been set, in which case C is disabled. Note that C is the opposite of C. If C is enabled, this means that --foo arg1 --bar arg2 arg3 is equivalent to --foo --bar arg1 arg2 arg3 If an argument callback routine is specified, C<@ARGV> will always be empty upon successful return of GetOptions() since all options have been processed. The only exception is when C<--> is used: --foo arg1 --bar arg2 -- arg3 This will call the callback routine for arg1 and arg2, and then terminate GetOptions() leaving C<"arg3"> in C<@ARGV>. If C is enabled, options processing terminates when the first non-option is encountered. --foo arg1 --bar arg2 arg3 is equivalent to --foo -- arg1 --bar arg2 arg3 If C is also enabled, options processing will terminate at the first unrecognized option, or non-option, whichever comes first. =item bundling (default: disabled) Enabling this option will allow single-character options to be bundled. To distinguish bundles from long option names, long options I be introduced with C<--> and bundles with C<->. Note that, if you have options C, C and C, and auto_abbrev enabled, possible arguments and option settings are: using argument sets option(s) ------------------------------------------ -a, --a a -l, --l l -al, -la, -ala, -all,... a, l --al, --all all The surprising part is that C<--a> sets option C (due to auto completion), not C. Note: disabling C also disables C. =item bundling_override (default: disabled) If C is enabled, bundling is enabled as with C but now long option names override option bundles. Note: disabling C also disables C. B Using option bundling can easily lead to unexpected results, especially when mixing long options and bundles. Caveat emptor. =item ignore_case (default: enabled) If enabled, case is ignored when matching option names. If, however, bundling is enabled as well, single character options will be treated case-sensitive. With C, option specifications for options that only differ in case, e.g., C<"foo"> and C<"Foo">, will be flagged as duplicates. Note: disabling C also disables C. =item ignore_case_always (default: disabled) When bundling is in effect, case is ignored on single-character options also. Note: disabling C also disables C. =item auto_version (default:disabled) Automatically provide support for the B<--version> option if the application did not specify a handler for this option itself. Getopt::Long will provide a standard version message that includes the program name, its version (if $main::VERSION is defined), and the versions of Getopt::Long and Perl. The message will be written to standard output and processing will terminate. C will be enabled if the calling program explicitly specified a version number higher than 2.32 in the C or C statement. =item auto_help (default:disabled) Automatically provide support for the B<--help> and B<-?> options if the application did not specify a handler for this option itself. Getopt::Long will provide a help message using module L. The message, derived from the SYNOPSIS POD section, will be written to standard output and processing will terminate. C will be enabled if the calling program explicitly specified a version number higher than 2.32 in the C or C statement. =item pass_through (default: disabled) With C anything that is unknown, ambiguous or supplied with an invalid option will not be flagged as an error. Instead the unknown option(s) will be passed to the catchall C<< <> >> if present, otherwise through to C<@ARGV>. This makes it possible to write wrapper scripts that process only part of the user supplied command line arguments, and pass the remaining options to some other program. If C is enabled, options processing will terminate at the first unrecognized option, or non-option, whichever comes first and all remaining arguments are passed to C<@ARGV> instead of the catchall C<< <> >> if present. However, if C is enabled instead, results can become confusing. Note that the options terminator (default C<-->), if present, will also be passed through in C<@ARGV>. =item prefix The string that starts options. If a constant string is not sufficient, see C. =item prefix_pattern A Perl pattern that identifies the strings that introduce options. Default is C<--|-|\+> unless environment variable POSIXLY_CORRECT has been set, in which case it is C<--|->. =item long_prefix_pattern A Perl pattern that allows the disambiguation of long and short prefixes. Default is C<-->. Typically you only need to set this if you are using nonstandard prefixes and want some or all of them to have the same semantics as '--' does under normal circumstances. For example, setting prefix_pattern to C<--|-|\+|\/> and long_prefix_pattern to C<--|\/> would add Win32 style argument handling. =item debug (default: disabled) Enable debugging output. =back =head1 Exportable Methods =over =item VersionMessage This subroutine provides a standard version message. Its argument can be: =over 4 =item * A string containing the text of a message to print I printing the standard message. =item * A numeric value corresponding to the desired exit status. =item * A reference to a hash. =back If more than one argument is given then the entire argument list is assumed to be a hash. If a hash is supplied (either as a reference or as a list) it should contain one or more elements with the following keys: =over 4 =item C<-message> =item C<-msg> The text of a message to print immediately prior to printing the program's usage message. =item C<-exitval> The desired exit status to pass to the B function. This should be an integer, or else the string "NOEXIT" to indicate that control should simply be returned without terminating the invoking process. =item C<-output> A reference to a filehandle, or the pathname of a file to which the usage message should be written. The default is C<\*STDERR> unless the exit value is less than 2 (in which case the default is C<\*STDOUT>). =back You cannot tie this routine directly to an option, e.g.: GetOptions("version" => \&VersionMessage); Use this instead: GetOptions("version" => sub { VersionMessage() }); =item HelpMessage This subroutine produces a standard help message, derived from the program's POD section SYNOPSIS using L. It takes the same arguments as VersionMessage(). In particular, you cannot tie it directly to an option, e.g.: GetOptions("help" => \&HelpMessage); Use this instead: GetOptions("help" => sub { HelpMessage() }); =back =head1 Return values and Errors Configuration errors and errors in the option definitions are signalled using die() and will terminate the calling program unless the call to Getopt::Long::GetOptions() was embedded in C, or die() was trapped using C<$SIG{__DIE__}>. GetOptions returns true to indicate success. It returns false when the function detected one or more errors during option parsing. These errors are signalled using warn() and can be trapped with C<$SIG{__WARN__}>. =head1 Legacy The earliest development of C started in 1990, with Perl version 4. As a result, its development, and the development of Getopt::Long, has gone through several stages. Since backward compatibility has always been extremely important, the current version of Getopt::Long still supports a lot of constructs that nowadays are no longer necessary or otherwise unwanted. This section describes briefly some of these 'features'. =head2 Default destinations When no destination is specified for an option, GetOptions will store the resultant value in a global variable named CI, where I is the primary name of this option. When a program executes under C (recommended), these variables must be pre-declared with our() or C. our $opt_length = 0; GetOptions ('length=i'); # will store in $opt_length To yield a usable Perl variable, characters that are not part of the syntax for variables are translated to underscores. For example, C<--fpp-struct-return> will set the variable C<$opt_fpp_struct_return>. Note that this variable resides in the namespace of the calling program, not necessarily C
. For example: GetOptions ("size=i", "sizes=i@"); with command line "-size 10 -sizes 24 -sizes 48" will perform the equivalent of the assignments $opt_size = 10; @opt_sizes = (24, 48); =head2 Alternative option starters A string of alternative option starter characters may be passed as the first argument (or the first argument after a leading hash reference argument). my $len = 0; GetOptions ('/', 'length=i' => $len); Now the command line may look like: /length 24 -- arg Note that to terminate options processing still requires a double dash C<-->. GetOptions() will not interpret a leading C<< "<>" >> as option starters if the next argument is a reference. To force C<< "<" >> and C<< ">" >> as option starters, use C<< "><" >>. Confusing? Well, B anyway. =head2 Configuration variables Previous versions of Getopt::Long used variables for the purpose of configuring. Although manipulating these variables still work, it is strongly encouraged to use the C routine that was introduced in version 2.17. Besides, it is much easier. =head1 Tips and Techniques =head2 Pushing multiple values in a hash option Sometimes you want to combine the best of hashes and arrays. For example, the command line: --list add=first --list add=second --list add=third where each successive 'list add' option will push the value of add into array ref $list->{'add'}. The result would be like $list->{add} = [qw(first second third)]; This can be accomplished with a destination routine: GetOptions('list=s%' => sub { push(@{$list{$_[1]}}, $_[2]) }); =head1 Troubleshooting =head2 GetOptions does not return a false result when an option is not supplied That's why they're called 'options'. =head2 GetOptions does not split the command line correctly The command line is not split by GetOptions, but by the command line interpreter (CLI). On Unix, this is the shell. On Windows, it is COMMAND.COM or CMD.EXE. Other operating systems have other CLIs. It is important to know that these CLIs may behave different when the command line contains special characters, in particular quotes or backslashes. For example, with Unix shells you can use single quotes (C<'>) and double quotes (C<">) to group words together. The following alternatives are equivalent on Unix: "two words" 'two words' two\ words In case of doubt, insert the following statement in front of your Perl program: print STDERR (join("|",@ARGV),"\n"); to verify how your CLI passes the arguments to the program. =head2 Undefined subroutine &main::GetOptions called Are you running Windows, and did you write use GetOpt::Long; (note the capital 'O')? =head2 How do I put a "-?" option into a Getopt::Long? You can only obtain this using an alias, and Getopt::Long of at least version 2.13. use Getopt::Long; GetOptions ("help|?"); # -help and -? will both set $opt_help Other characters that can't appear in Perl identifiers are also supported as aliases with Getopt::Long of at least version 2.39. As of version 2.32 Getopt::Long provides auto-help, a quick and easy way to add the options --help and -? to your program, and handle them. See C in section L. =head1 AUTHOR Johan Vromans =head1 COPYRIGHT AND DISCLAIMER This program is Copyright 1990,2015 by Johan Vromans. This program is free software; you can redistribute it and/or modify it under the terms of the Perl Artistic License or the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. If you do not have a copy of the GNU General Public License write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. =cut GETOPT_LONG $fatpacked{"HTTP/Tiny.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'HTTP_TINY'; # vim: ts=4 sts=4 sw=4 et: package HTTP::Tiny; use strict; use warnings; # ABSTRACT: A small, simple, correct HTTP/1.1 client our $VERSION = '0.070'; sub _croak { require Carp; Carp::croak(@_) } #pod =method new #pod #pod $http = HTTP::Tiny->new( %attributes ); #pod #pod This constructor returns a new HTTP::Tiny object. Valid attributes include: #pod #pod =for :list #pod * C — A user-agent string (defaults to 'HTTP-Tiny/$VERSION'). If #pod C — ends in a space character, the default user-agent string is #pod appended. #pod * C — An instance of L — or equivalent class #pod that supports the C and C methods #pod * C — A hashref of default headers to apply to requests #pod * C — The local IP address to bind to #pod * C — Whether to reuse the last connection (if for the same #pod scheme, host and port) (defaults to 1) #pod * C — Maximum number of redirects allowed (defaults to 5) #pod * C — Maximum response size in bytes (only when not using a data #pod callback). If defined, responses larger than this will return an #pod exception. #pod * C — URL of a proxy server to use for HTTP connections #pod (default is C<$ENV{http_proxy}> — if set) #pod * C — URL of a proxy server to use for HTTPS connections #pod (default is C<$ENV{https_proxy}> — if set) #pod * C — URL of a generic proxy server for both HTTP and HTTPS #pod connections (default is C<$ENV{all_proxy}> — if set) #pod * C — List of domain suffixes that should not be proxied. Must #pod be a comma-separated string or an array reference. (default is #pod C<$ENV{no_proxy}> —) #pod * C — Request timeout in seconds (default is 60) If a socket open, #pod read or write takes longer than the timeout, an exception is thrown. #pod * C — A boolean that indicates whether to validate the SSL #pod certificate of an C — connection (default is false) #pod * C — A hashref of C — options to pass through to #pod L #pod #pod Passing an explicit C for C, C or C will #pod prevent getting the corresponding proxies from the environment. #pod #pod Exceptions from C, C or other errors will result in a #pod pseudo-HTTP status code of 599 and a reason of "Internal Exception". The #pod content field in the response will contain the text of the exception. #pod #pod The C parameter enables a persistent connection, but only to a #pod single destination scheme, host and port. Also, if any connection-relevant #pod attributes are modified, or if the process ID or thread ID change, the #pod persistent connection will be dropped. If you want persistent connections #pod across multiple destinations, use multiple HTTP::Tiny objects. #pod #pod See L for more on the C and C attributes. #pod #pod =cut my @attributes; BEGIN { @attributes = qw( cookie_jar default_headers http_proxy https_proxy keep_alive local_address max_redirect max_size proxy no_proxy SSL_options verify_SSL ); my %persist_ok = map {; $_ => 1 } qw( cookie_jar default_headers max_redirect max_size ); no strict 'refs'; no warnings 'uninitialized'; for my $accessor ( @attributes ) { *{$accessor} = sub { @_ > 1 ? do { delete $_[0]->{handle} if !$persist_ok{$accessor} && $_[1] ne $_[0]->{$accessor}; $_[0]->{$accessor} = $_[1] } : $_[0]->{$accessor}; }; } } sub agent { my($self, $agent) = @_; if( @_ > 1 ){ $self->{agent} = (defined $agent && $agent =~ / $/) ? $agent . $self->_agent : $agent; } return $self->{agent}; } sub timeout { my ($self, $timeout) = @_; if ( @_ > 1 ) { $self->{timeout} = $timeout; if ($self->{handle}) { $self->{handle}->timeout($timeout); } } return $self->{timeout}; } sub new { my($class, %args) = @_; my $self = { max_redirect => 5, timeout => defined $args{timeout} ? $args{timeout} : 60, keep_alive => 1, verify_SSL => $args{verify_SSL} || $args{verify_ssl} || 0, # no verification by default no_proxy => $ENV{no_proxy}, }; bless $self, $class; $class->_validate_cookie_jar( $args{cookie_jar} ) if $args{cookie_jar}; for my $key ( @attributes ) { $self->{$key} = $args{$key} if exists $args{$key} } $self->agent( exists $args{agent} ? $args{agent} : $class->_agent ); $self->_set_proxies; return $self; } sub _set_proxies { my ($self) = @_; # get proxies from %ENV only if not provided; explicit undef will disable # getting proxies from the environment # generic proxy if (! exists $self->{proxy} ) { $self->{proxy} = $ENV{all_proxy} || $ENV{ALL_PROXY}; } if ( defined $self->{proxy} ) { $self->_split_proxy( 'generic proxy' => $self->{proxy} ); # validate } else { delete $self->{proxy}; } # http proxy if (! exists $self->{http_proxy} ) { # under CGI, bypass HTTP_PROXY as request sets it from Proxy header local $ENV{HTTP_PROXY} if $ENV{REQUEST_METHOD}; $self->{http_proxy} = $ENV{http_proxy} || $ENV{HTTP_PROXY} || $self->{proxy}; } if ( defined $self->{http_proxy} ) { $self->_split_proxy( http_proxy => $self->{http_proxy} ); # validate $self->{_has_proxy}{http} = 1; } else { delete $self->{http_proxy}; } # https proxy if (! exists $self->{https_proxy} ) { $self->{https_proxy} = $ENV{https_proxy} || $ENV{HTTPS_PROXY} || $self->{proxy}; } if ( $self->{https_proxy} ) { $self->_split_proxy( https_proxy => $self->{https_proxy} ); # validate $self->{_has_proxy}{https} = 1; } else { delete $self->{https_proxy}; } # Split no_proxy to array reference if not provided as such unless ( ref $self->{no_proxy} eq 'ARRAY' ) { $self->{no_proxy} = (defined $self->{no_proxy}) ? [ split /\s*,\s*/, $self->{no_proxy} ] : []; } return; } #pod =method get|head|put|post|delete #pod #pod $response = $http->get($url); #pod $response = $http->get($url, \%options); #pod $response = $http->head($url); #pod #pod These methods are shorthand for calling C for the given method. The #pod URL must have unsafe characters escaped and international domain names encoded. #pod See C for valid options and a description of the response. #pod #pod The C field of the response will be true if the status code is 2XX. #pod #pod =cut for my $sub_name ( qw/get head put post delete/ ) { my $req_method = uc $sub_name; no strict 'refs'; eval <<"HERE"; ## no critic sub $sub_name { my (\$self, \$url, \$args) = \@_; \@_ == 2 || (\@_ == 3 && ref \$args eq 'HASH') or _croak(q/Usage: \$http->$sub_name(URL, [HASHREF])/ . "\n"); return \$self->request('$req_method', \$url, \$args || {}); } HERE } #pod =method post_form #pod #pod $response = $http->post_form($url, $form_data); #pod $response = $http->post_form($url, $form_data, \%options); #pod #pod This method executes a C request and sends the key/value pairs from a #pod form data hash or array reference to the given URL with a C of #pod C. If data is provided as an array #pod reference, the order is preserved; if provided as a hash reference, the terms #pod are sorted on key and value for consistency. See documentation for the #pod C method for details on the encoding. #pod #pod The URL must have unsafe characters escaped and international domain names #pod encoded. See C for valid options and a description of the response. #pod Any C header or content in the options hashref will be ignored. #pod #pod The C field of the response will be true if the status code is 2XX. #pod #pod =cut sub post_form { my ($self, $url, $data, $args) = @_; (@_ == 3 || @_ == 4 && ref $args eq 'HASH') or _croak(q/Usage: $http->post_form(URL, DATAREF, [HASHREF])/ . "\n"); my $headers = {}; while ( my ($key, $value) = each %{$args->{headers} || {}} ) { $headers->{lc $key} = $value; } delete $args->{headers}; return $self->request('POST', $url, { %$args, content => $self->www_form_urlencode($data), headers => { %$headers, 'content-type' => 'application/x-www-form-urlencoded' }, } ); } #pod =method mirror #pod #pod $response = $http->mirror($url, $file, \%options) #pod if ( $response->{success} ) { #pod print "$file is up to date\n"; #pod } #pod #pod Executes a C request for the URL and saves the response body to the file #pod name provided. The URL must have unsafe characters escaped and international #pod domain names encoded. If the file already exists, the request will include an #pod C header with the modification timestamp of the file. You #pod may specify a different C header yourself in the C<< #pod $options->{headers} >> hash. #pod #pod The C field of the response will be true if the status code is 2XX #pod or if the status code is 304 (unmodified). #pod #pod If the file was modified and the server response includes a properly #pod formatted C header, the file modification time will #pod be updated accordingly. #pod #pod =cut sub mirror { my ($self, $url, $file, $args) = @_; @_ == 3 || (@_ == 4 && ref $args eq 'HASH') or _croak(q/Usage: $http->mirror(URL, FILE, [HASHREF])/ . "\n"); if ( exists $args->{headers} ) { my $headers = {}; while ( my ($key, $value) = each %{$args->{headers} || {}} ) { $headers->{lc $key} = $value; } $args->{headers} = $headers; } if ( -e $file and my $mtime = (stat($file))[9] ) { $args->{headers}{'if-modified-since'} ||= $self->_http_date($mtime); } my $tempfile = $file . int(rand(2**31)); require Fcntl; sysopen my $fh, $tempfile, Fcntl::O_CREAT()|Fcntl::O_EXCL()|Fcntl::O_WRONLY() or _croak(qq/Error: Could not create temporary file $tempfile for downloading: $!\n/); binmode $fh; $args->{data_callback} = sub { print {$fh} $_[0] }; my $response = $self->request('GET', $url, $args); close $fh or _croak(qq/Error: Caught error closing temporary file $tempfile: $!\n/); if ( $response->{success} ) { rename $tempfile, $file or _croak(qq/Error replacing $file with $tempfile: $!\n/); my $lm = $response->{headers}{'last-modified'}; if ( $lm and my $mtime = $self->_parse_http_date($lm) ) { utime $mtime, $mtime, $file; } } $response->{success} ||= $response->{status} eq '304'; unlink $tempfile; return $response; } #pod =method request #pod #pod $response = $http->request($method, $url); #pod $response = $http->request($method, $url, \%options); #pod #pod Executes an HTTP request of the given method type ('GET', 'HEAD', 'POST', #pod 'PUT', etc.) on the given URL. The URL must have unsafe characters escaped and #pod international domain names encoded. #pod #pod If the URL includes a "user:password" stanza, they will be used for Basic-style #pod authorization headers. (Authorization headers will not be included in a #pod redirected request.) For example: #pod #pod $http->request('GET', 'http://Aladdin:open sesame@example.com/'); #pod #pod If the "user:password" stanza contains reserved characters, they must #pod be percent-escaped: #pod #pod $http->request('GET', 'http://john%40example.com:password@example.com/'); #pod #pod A hashref of options may be appended to modify the request. #pod #pod Valid options are: #pod #pod =for :list #pod * C — #pod A hashref containing headers to include with the request. If the value for #pod a header is an array reference, the header will be output multiple times with #pod each value in the array. These headers over-write any default headers. #pod * C — #pod A scalar to include as the body of the request OR a code reference #pod that will be called iteratively to produce the body of the request #pod * C — #pod A code reference that will be called if it exists to provide a hashref #pod of trailing headers (only used with chunked transfer-encoding) #pod * C — #pod A code reference that will be called for each chunks of the response #pod body received. #pod * C — #pod Override host resolution and force all connections to go only to a #pod specific peer address, regardless of the URL of the request. This will #pod include any redirections! This options should be used with extreme #pod caution (e.g. debugging or very special circumstances). #pod #pod The C header is generated from the URL in accordance with RFC 2616. It #pod is a fatal error to specify C in the C option. Other headers #pod may be ignored or overwritten if necessary for transport compliance. #pod #pod If the C option is a code reference, it will be called iteratively #pod to provide the content body of the request. It should return the empty #pod string or undef when the iterator is exhausted. #pod #pod If the C option is the empty string, no C or #pod C headers will be generated. #pod #pod If the C option is provided, it will be called iteratively until #pod the entire response body is received. The first argument will be a string #pod containing a chunk of the response body, the second argument will be the #pod in-progress response hash reference, as described below. (This allows #pod customizing the action of the callback based on the C or C #pod received prior to the content body.) #pod #pod The C method returns a hashref containing the response. The hashref #pod will have the following keys: #pod #pod =for :list #pod * C — #pod Boolean indicating whether the operation returned a 2XX status code #pod * C — #pod URL that provided the response. This is the URL of the request unless #pod there were redirections, in which case it is the last URL queried #pod in a redirection chain #pod * C — #pod The HTTP status code of the response #pod * C — #pod The response phrase returned by the server #pod * C — #pod The body of the response. If the response does not have any content #pod or if a data callback is provided to consume the response body, #pod this will be the empty string #pod * C — #pod A hashref of header fields. All header field names will be normalized #pod to be lower case. If a header is repeated, the value will be an arrayref; #pod it will otherwise be a scalar string containing the value #pod * C #pod If this field exists, it is an arrayref of response hash references from #pod redirects in the same order that redirections occurred. If it does #pod not exist, then no redirections occurred. #pod #pod On an exception during the execution of the request, the C field will #pod contain 599, and the C field will contain the text of the exception. #pod #pod =cut my %idempotent = map { $_ => 1 } qw/GET HEAD PUT DELETE OPTIONS TRACE/; sub request { my ($self, $method, $url, $args) = @_; @_ == 3 || (@_ == 4 && ref $args eq 'HASH') or _croak(q/Usage: $http->request(METHOD, URL, [HASHREF])/ . "\n"); $args ||= {}; # we keep some state in this during _request # RFC 2616 Section 8.1.4 mandates a single retry on broken socket my $response; for ( 0 .. 1 ) { $response = eval { $self->_request($method, $url, $args) }; last unless $@ && $idempotent{$method} && $@ =~ m{^(?:Socket closed|Unexpected end)}; } if (my $e = $@) { # maybe we got a response hash thrown from somewhere deep if ( ref $e eq 'HASH' && exists $e->{status} ) { $e->{redirects} = delete $args->{_redirects} if @{ $args->{_redirects} || []}; return $e; } # otherwise, stringify it $e = "$e"; $response = { url => $url, success => q{}, status => 599, reason => 'Internal Exception', content => $e, headers => { 'content-type' => 'text/plain', 'content-length' => length $e, }, ( @{$args->{_redirects} || []} ? (redirects => delete $args->{_redirects}) : () ), }; } return $response; } #pod =method www_form_urlencode #pod #pod $params = $http->www_form_urlencode( $data ); #pod $response = $http->get("http://example.com/query?$params"); #pod #pod This method converts the key/value pairs from a data hash or array reference #pod into a C string. The keys and values from the data #pod reference will be UTF-8 encoded and escaped per RFC 3986. If a value is an #pod array reference, the key will be repeated with each of the values of the array #pod reference. If data is provided as a hash reference, the key/value pairs in the #pod resulting string will be sorted by key and value for consistent ordering. #pod #pod =cut sub www_form_urlencode { my ($self, $data) = @_; (@_ == 2 && ref $data) or _croak(q/Usage: $http->www_form_urlencode(DATAREF)/ . "\n"); (ref $data eq 'HASH' || ref $data eq 'ARRAY') or _croak("form data must be a hash or array reference\n"); my @params = ref $data eq 'HASH' ? %$data : @$data; @params % 2 == 0 or _croak("form data reference must have an even number of terms\n"); my @terms; while( @params ) { my ($key, $value) = splice(@params, 0, 2); if ( ref $value eq 'ARRAY' ) { unshift @params, map { $key => $_ } @$value; } else { push @terms, join("=", map { $self->_uri_escape($_) } $key, $value); } } return join("&", (ref $data eq 'ARRAY') ? (@terms) : (sort @terms) ); } #pod =method can_ssl #pod #pod $ok = HTTP::Tiny->can_ssl; #pod ($ok, $why) = HTTP::Tiny->can_ssl; #pod ($ok, $why) = $http->can_ssl; #pod #pod Indicates if SSL support is available. When called as a class object, it #pod checks for the correct version of L and L. #pod When called as an object methods, if C is true or if C #pod is set in C, it checks that a CA file is available. #pod #pod In scalar context, returns a boolean indicating if SSL is available. #pod In list context, returns the boolean and a (possibly multi-line) string of #pod errors indicating why SSL isn't available. #pod #pod =cut sub can_ssl { my ($self) = @_; my($ok, $reason) = (1, ''); # Need IO::Socket::SSL 1.42 for SSL_create_ctx_callback local @INC = @INC; pop @INC if $INC[-1] eq '.'; unless (eval {require IO::Socket::SSL; IO::Socket::SSL->VERSION(1.42)}) { $ok = 0; $reason .= qq/IO::Socket::SSL 1.42 must be installed for https support\n/; } # Need Net::SSLeay 1.49 for MODE_AUTO_RETRY unless (eval {require Net::SSLeay; Net::SSLeay->VERSION(1.49)}) { $ok = 0; $reason .= qq/Net::SSLeay 1.49 must be installed for https support\n/; } # If an object, check that SSL config lets us get a CA if necessary if ( ref($self) && ( $self->{verify_SSL} || $self->{SSL_options}{SSL_verify_mode} ) ) { my $handle = HTTP::Tiny::Handle->new( SSL_options => $self->{SSL_options}, verify_SSL => $self->{verify_SSL}, ); unless ( eval { $handle->_find_CA_file; 1 } ) { $ok = 0; $reason .= "$@"; } } wantarray ? ($ok, $reason) : $ok; } #pod =method connected #pod #pod $host = $http->connected; #pod ($host, $port) = $http->connected; #pod #pod Indicates if a connection to a peer is being kept alive, per the C #pod option. #pod #pod In scalar context, returns the peer host and port, joined with a colon, or #pod C (if no peer is connected). #pod In list context, returns the peer host and port or an empty list (if no peer #pod is connected). #pod #pod B: This method cannot reliably be used to discover whether the remote #pod host has closed its end of the socket. #pod #pod =cut sub connected { my ($self) = @_; # If a socket exists... if ($self->{handle} && $self->{handle}{fh}) { my $socket = $self->{handle}{fh}; # ...and is connected, return the peer host and port. if ($socket->connected) { return wantarray ? ($socket->peerhost, $socket->peerport) : join(':', $socket->peerhost, $socket->peerport); } } return; } #--------------------------------------------------------------------------# # private methods #--------------------------------------------------------------------------# my %DefaultPort = ( http => 80, https => 443, ); sub _agent { my $class = ref($_[0]) || $_[0]; (my $default_agent = $class) =~ s{::}{-}g; return $default_agent . "/" . $class->VERSION; } sub _request { my ($self, $method, $url, $args) = @_; my ($scheme, $host, $port, $path_query, $auth) = $self->_split_url($url); my $request = { method => $method, scheme => $scheme, host => $host, port => $port, host_port => ($port == $DefaultPort{$scheme} ? $host : "$host:$port"), uri => $path_query, headers => {}, }; my $peer = $args->{peer} || $host; # We remove the cached handle so it is not reused in the case of redirect. # If all is well, it will be recached at the end of _request. We only # reuse for the same scheme, host and port my $handle = delete $self->{handle}; if ( $handle ) { unless ( $handle->can_reuse( $scheme, $host, $port, $peer ) ) { $handle->close; undef $handle; } } $handle ||= $self->_open_handle( $request, $scheme, $host, $port, $peer ); $self->_prepare_headers_and_cb($request, $args, $url, $auth); $handle->write_request($request); my $response; do { $response = $handle->read_response_header } until (substr($response->{status},0,1) ne '1'); $self->_update_cookie_jar( $url, $response ) if $self->{cookie_jar}; my @redir_args = $self->_maybe_redirect($request, $response, $args); my $known_message_length; if ($method eq 'HEAD' || $response->{status} =~ /^[23]04/) { # response has no message body $known_message_length = 1; } else { # Ignore any data callbacks during redirection. my $cb_args = @redir_args ? +{} : $args; my $data_cb = $self->_prepare_data_cb($response, $cb_args); $known_message_length = $handle->read_body($data_cb, $response); } if ( $self->{keep_alive} && $known_message_length && $response->{protocol} eq 'HTTP/1.1' && ($response->{headers}{connection} || '') ne 'close' ) { $self->{handle} = $handle; } else { $handle->close; } $response->{success} = substr( $response->{status}, 0, 1 ) eq '2'; $response->{url} = $url; # Push the current response onto the stack of redirects if redirecting. if (@redir_args) { push @{$args->{_redirects}}, $response; return $self->_request(@redir_args, $args); } # Copy the stack of redirects into the response before returning. $response->{redirects} = delete $args->{_redirects} if @{$args->{_redirects}}; return $response; } sub _open_handle { my ($self, $request, $scheme, $host, $port, $peer) = @_; my $handle = HTTP::Tiny::Handle->new( timeout => $self->{timeout}, SSL_options => $self->{SSL_options}, verify_SSL => $self->{verify_SSL}, local_address => $self->{local_address}, keep_alive => $self->{keep_alive} ); if ($self->{_has_proxy}{$scheme} && ! grep { $host =~ /\Q$_\E$/ } @{$self->{no_proxy}}) { return $self->_proxy_connect( $request, $handle ); } else { return $handle->connect($scheme, $host, $port, $peer); } } sub _proxy_connect { my ($self, $request, $handle) = @_; my @proxy_vars; if ( $request->{scheme} eq 'https' ) { _croak(qq{No https_proxy defined}) unless $self->{https_proxy}; @proxy_vars = $self->_split_proxy( https_proxy => $self->{https_proxy} ); if ( $proxy_vars[0] eq 'https' ) { _croak(qq{Can't proxy https over https: $request->{uri} via $self->{https_proxy}}); } } else { _croak(qq{No http_proxy defined}) unless $self->{http_proxy}; @proxy_vars = $self->_split_proxy( http_proxy => $self->{http_proxy} ); } my ($p_scheme, $p_host, $p_port, $p_auth) = @proxy_vars; if ( length $p_auth && ! defined $request->{headers}{'proxy-authorization'} ) { $self->_add_basic_auth_header( $request, 'proxy-authorization' => $p_auth ); } $handle->connect($p_scheme, $p_host, $p_port, $p_host); if ($request->{scheme} eq 'https') { $self->_create_proxy_tunnel( $request, $handle ); } else { # non-tunneled proxy requires absolute URI $request->{uri} = "$request->{scheme}://$request->{host_port}$request->{uri}"; } return $handle; } sub _split_proxy { my ($self, $type, $proxy) = @_; my ($scheme, $host, $port, $path_query, $auth) = eval { $self->_split_url($proxy) }; unless( defined($scheme) && length($scheme) && length($host) && length($port) && $path_query eq '/' ) { _croak(qq{$type URL must be in format http[s]://[auth@]:/\n}); } return ($scheme, $host, $port, $auth); } sub _create_proxy_tunnel { my ($self, $request, $handle) = @_; $handle->_assert_ssl; my $agent = exists($request->{headers}{'user-agent'}) ? $request->{headers}{'user-agent'} : $self->{agent}; my $connect_request = { method => 'CONNECT', uri => "$request->{host}:$request->{port}", headers => { host => "$request->{host}:$request->{port}", 'user-agent' => $agent, } }; if ( $request->{headers}{'proxy-authorization'} ) { $connect_request->{headers}{'proxy-authorization'} = delete $request->{headers}{'proxy-authorization'}; } $handle->write_request($connect_request); my $response; do { $response = $handle->read_response_header } until (substr($response->{status},0,1) ne '1'); # if CONNECT failed, throw the response so it will be # returned from the original request() method; unless (substr($response->{status},0,1) eq '2') { die $response; } # tunnel established, so start SSL handshake $handle->start_ssl( $request->{host} ); return; } sub _prepare_headers_and_cb { my ($self, $request, $args, $url, $auth) = @_; for ($self->{default_headers}, $args->{headers}) { next unless defined; while (my ($k, $v) = each %$_) { $request->{headers}{lc $k} = $v; $request->{header_case}{lc $k} = $k; } } if (exists $request->{headers}{'host'}) { die(qq/The 'Host' header must not be provided as header option\n/); } $request->{headers}{'host'} = $request->{host_port}; $request->{headers}{'user-agent'} ||= $self->{agent}; $request->{headers}{'connection'} = "close" unless $self->{keep_alive}; if ( defined $args->{content} ) { if (ref $args->{content} eq 'CODE') { $request->{headers}{'content-type'} ||= "application/octet-stream"; $request->{headers}{'transfer-encoding'} = 'chunked' unless $request->{headers}{'content-length'} || $request->{headers}{'transfer-encoding'}; $request->{cb} = $args->{content}; } elsif ( length $args->{content} ) { my $content = $args->{content}; if ( $] ge '5.008' ) { utf8::downgrade($content, 1) or die(qq/Wide character in request message body\n/); } $request->{headers}{'content-type'} ||= "application/octet-stream"; $request->{headers}{'content-length'} = length $content unless $request->{headers}{'content-length'} || $request->{headers}{'transfer-encoding'}; $request->{cb} = sub { substr $content, 0, length $content, '' }; } $request->{trailer_cb} = $args->{trailer_callback} if ref $args->{trailer_callback} eq 'CODE'; } ### If we have a cookie jar, then maybe add relevant cookies if ( $self->{cookie_jar} ) { my $cookies = $self->cookie_jar->cookie_header( $url ); $request->{headers}{cookie} = $cookies if length $cookies; } # if we have Basic auth parameters, add them if ( length $auth && ! defined $request->{headers}{authorization} ) { $self->_add_basic_auth_header( $request, 'authorization' => $auth ); } return; } sub _add_basic_auth_header { my ($self, $request, $header, $auth) = @_; require MIME::Base64; $request->{headers}{$header} = "Basic " . MIME::Base64::encode_base64($auth, ""); return; } sub _prepare_data_cb { my ($self, $response, $args) = @_; my $data_cb = $args->{data_callback}; $response->{content} = ''; if (!$data_cb || $response->{status} !~ /^2/) { if (defined $self->{max_size}) { $data_cb = sub { $_[1]->{content} .= $_[0]; die(qq/Size of response body exceeds the maximum allowed of $self->{max_size}\n/) if length $_[1]->{content} > $self->{max_size}; }; } else { $data_cb = sub { $_[1]->{content} .= $_[0] }; } } return $data_cb; } sub _update_cookie_jar { my ($self, $url, $response) = @_; my $cookies = $response->{headers}->{'set-cookie'}; return unless defined $cookies; my @cookies = ref $cookies ? @$cookies : $cookies; $self->cookie_jar->add( $url, $_ ) for @cookies; return; } sub _validate_cookie_jar { my ($class, $jar) = @_; # duck typing for my $method ( qw/add cookie_header/ ) { _croak(qq/Cookie jar must provide the '$method' method\n/) unless ref($jar) && ref($jar)->can($method); } return; } sub _maybe_redirect { my ($self, $request, $response, $args) = @_; my $headers = $response->{headers}; my ($status, $method) = ($response->{status}, $request->{method}); $args->{_redirects} ||= []; if (($status eq '303' or ($status =~ /^30[1278]/ && $method =~ /^GET|HEAD$/)) and $headers->{location} and @{$args->{_redirects}} < $self->{max_redirect} ) { my $location = ($headers->{location} =~ /^\//) ? "$request->{scheme}://$request->{host_port}$headers->{location}" : $headers->{location} ; return (($status eq '303' ? 'GET' : $method), $location); } return; } sub _split_url { my $url = pop; # URI regex adapted from the URI module my ($scheme, $host, $path_query) = $url =~ m<\A([^:/?#]+)://([^/?#]*)([^#]*)> or die(qq/Cannot parse URL: '$url'\n/); $scheme = lc $scheme; $path_query = "/$path_query" unless $path_query =~ m<\A/>; my $auth = ''; if ( (my $i = index $host, '@') != -1 ) { # user:pass@host $auth = substr $host, 0, $i, ''; # take up to the @ for auth substr $host, 0, 1, ''; # knock the @ off the host # userinfo might be percent escaped, so recover real auth info $auth =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; } my $port = $host =~ s/:(\d*)\z// && length $1 ? $1 : $scheme eq 'http' ? 80 : $scheme eq 'https' ? 443 : undef; return ($scheme, (length $host ? lc $host : "localhost") , $port, $path_query, $auth); } # Date conversions adapted from HTTP::Date my $DoW = "Sun|Mon|Tue|Wed|Thu|Fri|Sat"; my $MoY = "Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec"; sub _http_date { my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($_[1]); return sprintf("%s, %02d %s %04d %02d:%02d:%02d GMT", substr($DoW,$wday*4,3), $mday, substr($MoY,$mon*4,3), $year+1900, $hour, $min, $sec ); } sub _parse_http_date { my ($self, $str) = @_; require Time::Local; my @tl_parts; if ($str =~ /^[SMTWF][a-z]+, +(\d{1,2}) ($MoY) +(\d\d\d\d) +(\d\d):(\d\d):(\d\d) +GMT$/) { @tl_parts = ($6, $5, $4, $1, (index($MoY,$2)/4), $3); } elsif ($str =~ /^[SMTWF][a-z]+, +(\d\d)-($MoY)-(\d{2,4}) +(\d\d):(\d\d):(\d\d) +GMT$/ ) { @tl_parts = ($6, $5, $4, $1, (index($MoY,$2)/4), $3); } elsif ($str =~ /^[SMTWF][a-z]+ +($MoY) +(\d{1,2}) +(\d\d):(\d\d):(\d\d) +(?:[^0-9]+ +)?(\d\d\d\d)$/ ) { @tl_parts = ($5, $4, $3, $2, (index($MoY,$1)/4), $6); } return eval { my $t = @tl_parts ? Time::Local::timegm(@tl_parts) : -1; $t < 0 ? undef : $t; }; } # URI escaping adapted from URI::Escape # c.f. http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 # perl 5.6 ready UTF-8 encoding adapted from JSON::PP my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255; $escapes{' '}="+"; my $unsafe_char = qr/[^A-Za-z0-9\-\._~]/; sub _uri_escape { my ($self, $str) = @_; if ( $] ge '5.008' ) { utf8::encode($str); } else { $str = pack("U*", unpack("C*", $str)) # UTF-8 encode a byte string if ( length $str == do { use bytes; length $str } ); $str = pack("C*", unpack("C*", $str)); # clear UTF-8 flag } $str =~ s/($unsafe_char)/$escapes{$1}/ge; return $str; } package HTTP::Tiny::Handle; # hide from PAUSE/indexers use strict; use warnings; use Errno qw[EINTR EPIPE]; use IO::Socket qw[SOCK_STREAM]; use Socket qw[SOL_SOCKET SO_KEEPALIVE]; # PERL_HTTP_TINY_IPV4_ONLY is a private environment variable to force old # behavior if someone is unable to boostrap CPAN from a new perl install; it is # not intended for general, per-client use and may be removed in the future my $SOCKET_CLASS = $ENV{PERL_HTTP_TINY_IPV4_ONLY} ? 'IO::Socket::INET' : eval { require IO::Socket::IP; IO::Socket::IP->VERSION(0.25) } ? 'IO::Socket::IP' : 'IO::Socket::INET'; sub BUFSIZE () { 32768 } ## no critic my $Printable = sub { local $_ = shift; s/\r/\\r/g; s/\n/\\n/g; s/\t/\\t/g; s/([^\x20-\x7E])/sprintf('\\x%.2X', ord($1))/ge; $_; }; my $Token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]/; my $Field_Content = qr/[[:print:]]+ (?: [\x20\x09]+ [[:print:]]+ )*/x; sub new { my ($class, %args) = @_; return bless { rbuf => '', timeout => 60, max_line_size => 16384, max_header_lines => 64, verify_SSL => 0, SSL_options => {}, %args }, $class; } sub timeout { my ($self, $timeout) = @_; if ( @_ > 1 ) { $self->{timeout} = $timeout; if ( $self->{fh} && $self->{fh}->can('timeout') ) { $self->{fh}->timeout($timeout); } } return $self->{timeout}; } sub connect { @_ == 5 || die(q/Usage: $handle->connect(scheme, host, port, peer)/ . "\n"); my ($self, $scheme, $host, $port, $peer) = @_; if ( $scheme eq 'https' ) { $self->_assert_ssl; } elsif ( $scheme ne 'http' ) { die(qq/Unsupported URL scheme '$scheme'\n/); } $self->{fh} = $SOCKET_CLASS->new( PeerHost => $peer, PeerPort => $port, $self->{local_address} ? ( LocalAddr => $self->{local_address} ) : (), Proto => 'tcp', Type => SOCK_STREAM, Timeout => $self->{timeout}, ) or die(qq/Could not connect to '$host:$port': $@\n/); binmode($self->{fh}) or die(qq/Could not binmode() socket: '$!'\n/); if ( $self->{keep_alive} ) { unless ( defined( $self->{fh}->setsockopt( SOL_SOCKET, SO_KEEPALIVE, 1 ) ) ) { CORE::close($self->{fh}); die(qq/Could not set SO_KEEPALIVE on socket: '$!'\n/); } } $self->start_ssl($host) if $scheme eq 'https'; $self->{scheme} = $scheme; $self->{host} = $host; $self->{peer} = $peer; $self->{port} = $port; $self->{pid} = $$; $self->{tid} = _get_tid(); return $self; } sub start_ssl { my ($self, $host) = @_; # As this might be used via CONNECT after an SSL session # to a proxy, we shut down any existing SSL before attempting # the handshake if ( ref($self->{fh}) eq 'IO::Socket::SSL' ) { unless ( $self->{fh}->stop_SSL ) { my $ssl_err = IO::Socket::SSL->errstr; die(qq/Error halting prior SSL connection: $ssl_err/); } } my $ssl_args = $self->_ssl_args($host); IO::Socket::SSL->start_SSL( $self->{fh}, %$ssl_args, SSL_create_ctx_callback => sub { my $ctx = shift; Net::SSLeay::CTX_set_mode($ctx, Net::SSLeay::MODE_AUTO_RETRY()); }, ); unless ( ref($self->{fh}) eq 'IO::Socket::SSL' ) { my $ssl_err = IO::Socket::SSL->errstr; die(qq/SSL connection failed for $host: $ssl_err\n/); } } sub close { @_ == 1 || die(q/Usage: $handle->close()/ . "\n"); my ($self) = @_; CORE::close($self->{fh}) or die(qq/Could not close socket: '$!'\n/); } sub write { @_ == 2 || die(q/Usage: $handle->write(buf)/ . "\n"); my ($self, $buf) = @_; if ( $] ge '5.008' ) { utf8::downgrade($buf, 1) or die(qq/Wide character in write()\n/); } my $len = length $buf; my $off = 0; local $SIG{PIPE} = 'IGNORE'; while () { $self->can_write or die(qq/Timed out while waiting for socket to become ready for writing\n/); my $r = syswrite($self->{fh}, $buf, $len, $off); if (defined $r) { $len -= $r; $off += $r; last unless $len > 0; } elsif ($! == EPIPE) { die(qq/Socket closed by remote server: $!\n/); } elsif ($! != EINTR) { if ($self->{fh}->can('errstr')){ my $err = $self->{fh}->errstr(); die (qq/Could not write to SSL socket: '$err'\n /); } else { die(qq/Could not write to socket: '$!'\n/); } } } return $off; } sub read { @_ == 2 || @_ == 3 || die(q/Usage: $handle->read(len [, allow_partial])/ . "\n"); my ($self, $len, $allow_partial) = @_; my $buf = ''; my $got = length $self->{rbuf}; if ($got) { my $take = ($got < $len) ? $got : $len; $buf = substr($self->{rbuf}, 0, $take, ''); $len -= $take; } while ($len > 0) { $self->can_read or die(q/Timed out while waiting for socket to become ready for reading/ . "\n"); my $r = sysread($self->{fh}, $buf, $len, length $buf); if (defined $r) { last unless $r; $len -= $r; } elsif ($! != EINTR) { if ($self->{fh}->can('errstr')){ my $err = $self->{fh}->errstr(); die (qq/Could not read from SSL socket: '$err'\n /); } else { die(qq/Could not read from socket: '$!'\n/); } } } if ($len && !$allow_partial) { die(qq/Unexpected end of stream\n/); } return $buf; } sub readline { @_ == 1 || die(q/Usage: $handle->readline()/ . "\n"); my ($self) = @_; while () { if ($self->{rbuf} =~ s/\A ([^\x0D\x0A]* \x0D?\x0A)//x) { return $1; } if (length $self->{rbuf} >= $self->{max_line_size}) { die(qq/Line size exceeds the maximum allowed size of $self->{max_line_size}\n/); } $self->can_read or die(qq/Timed out while waiting for socket to become ready for reading\n/); my $r = sysread($self->{fh}, $self->{rbuf}, BUFSIZE, length $self->{rbuf}); if (defined $r) { last unless $r; } elsif ($! != EINTR) { if ($self->{fh}->can('errstr')){ my $err = $self->{fh}->errstr(); die (qq/Could not read from SSL socket: '$err'\n /); } else { die(qq/Could not read from socket: '$!'\n/); } } } die(qq/Unexpected end of stream while looking for line\n/); } sub read_header_lines { @_ == 1 || @_ == 2 || die(q/Usage: $handle->read_header_lines([headers])/ . "\n"); my ($self, $headers) = @_; $headers ||= {}; my $lines = 0; my $val; while () { my $line = $self->readline; if (++$lines >= $self->{max_header_lines}) { die(qq/Header lines exceeds maximum number allowed of $self->{max_header_lines}\n/); } elsif ($line =~ /\A ([^\x00-\x1F\x7F:]+) : [\x09\x20]* ([^\x0D\x0A]*)/x) { my ($field_name) = lc $1; if (exists $headers->{$field_name}) { for ($headers->{$field_name}) { $_ = [$_] unless ref $_ eq "ARRAY"; push @$_, $2; $val = \$_->[-1]; } } else { $val = \($headers->{$field_name} = $2); } } elsif ($line =~ /\A [\x09\x20]+ ([^\x0D\x0A]*)/x) { $val or die(qq/Unexpected header continuation line\n/); next unless length $1; $$val .= ' ' if length $$val; $$val .= $1; } elsif ($line =~ /\A \x0D?\x0A \z/x) { last; } else { die(q/Malformed header line: / . $Printable->($line) . "\n"); } } return $headers; } sub write_request { @_ == 2 || die(q/Usage: $handle->write_request(request)/ . "\n"); my($self, $request) = @_; $self->write_request_header(@{$request}{qw/method uri headers header_case/}); $self->write_body($request) if $request->{cb}; return; } # Standard request header names/case from HTTP/1.1 RFCs my @rfc_request_headers = qw( Accept Accept-Charset Accept-Encoding Accept-Language Authorization Cache-Control Connection Content-Length Expect From Host If-Match If-Modified-Since If-None-Match If-Range If-Unmodified-Since Max-Forwards Pragma Proxy-Authorization Range Referer TE Trailer Transfer-Encoding Upgrade User-Agent Via ); my @other_request_headers = qw( Content-Encoding Content-MD5 Content-Type Cookie DNT Date Origin X-XSS-Protection ); my %HeaderCase = map { lc($_) => $_ } @rfc_request_headers, @other_request_headers; # to avoid multiple small writes and hence nagle, you can pass the method line or anything else to # combine writes. sub write_header_lines { (@_ >= 2 && @_ <= 4 && ref $_[1] eq 'HASH') || die(q/Usage: $handle->write_header_lines(headers, [header_case, prefix])/ . "\n"); my($self, $headers, $header_case, $prefix_data) = @_; $header_case ||= {}; my $buf = (defined $prefix_data ? $prefix_data : ''); # Per RFC, control fields should be listed first my %seen; for my $k ( qw/host cache-control expect max-forwards pragma range te/ ) { next unless exists $headers->{$k}; $seen{$k}++; my $field_name = $HeaderCase{$k}; my $v = $headers->{$k}; for (ref $v eq 'ARRAY' ? @$v : $v) { $_ = '' unless defined $_; $buf .= "$field_name: $_\x0D\x0A"; } } # Other headers sent in arbitrary order while (my ($k, $v) = each %$headers) { my $field_name = lc $k; next if $seen{$field_name}; if (exists $HeaderCase{$field_name}) { $field_name = $HeaderCase{$field_name}; } else { if (exists $header_case->{$field_name}) { $field_name = $header_case->{$field_name}; } else { $field_name =~ s/\b(\w)/\u$1/g; } $field_name =~ /\A $Token+ \z/xo or die(q/Invalid HTTP header field name: / . $Printable->($field_name) . "\n"); $HeaderCase{lc $field_name} = $field_name; } for (ref $v eq 'ARRAY' ? @$v : $v) { # unwrap a field value if pre-wrapped by user s/\x0D?\x0A\s+/ /g; die(qq/Invalid HTTP header field value ($field_name): / . $Printable->($_). "\n") unless $_ eq '' || /\A $Field_Content \z/xo; $_ = '' unless defined $_; $buf .= "$field_name: $_\x0D\x0A"; } } $buf .= "\x0D\x0A"; return $self->write($buf); } # return value indicates whether message length was defined; this is generally # true unless there was no content-length header and we just read until EOF. # Other message length errors are thrown as exceptions sub read_body { @_ == 3 || die(q/Usage: $handle->read_body(callback, response)/ . "\n"); my ($self, $cb, $response) = @_; my $te = $response->{headers}{'transfer-encoding'} || ''; my $chunked = grep { /chunked/i } ( ref $te eq 'ARRAY' ? @$te : $te ) ; return $chunked ? $self->read_chunked_body($cb, $response) : $self->read_content_body($cb, $response); } sub write_body { @_ == 2 || die(q/Usage: $handle->write_body(request)/ . "\n"); my ($self, $request) = @_; if ($request->{headers}{'content-length'}) { return $self->write_content_body($request); } else { return $self->write_chunked_body($request); } } sub read_content_body { @_ == 3 || @_ == 4 || die(q/Usage: $handle->read_content_body(callback, response, [read_length])/ . "\n"); my ($self, $cb, $response, $content_length) = @_; $content_length ||= $response->{headers}{'content-length'}; if ( defined $content_length ) { my $len = $content_length; while ($len > 0) { my $read = ($len > BUFSIZE) ? BUFSIZE : $len; $cb->($self->read($read, 0), $response); $len -= $read; } return length($self->{rbuf}) == 0; } my $chunk; $cb->($chunk, $response) while length( $chunk = $self->read(BUFSIZE, 1) ); return; } sub write_content_body { @_ == 2 || die(q/Usage: $handle->write_content_body(request)/ . "\n"); my ($self, $request) = @_; my ($len, $content_length) = (0, $request->{headers}{'content-length'}); while () { my $data = $request->{cb}->(); defined $data && length $data or last; if ( $] ge '5.008' ) { utf8::downgrade($data, 1) or die(qq/Wide character in write_content()\n/); } $len += $self->write($data); } $len == $content_length or die(qq/Content-Length mismatch (got: $len expected: $content_length)\n/); return $len; } sub read_chunked_body { @_ == 3 || die(q/Usage: $handle->read_chunked_body(callback, $response)/ . "\n"); my ($self, $cb, $response) = @_; while () { my $head = $self->readline; $head =~ /\A ([A-Fa-f0-9]+)/x or die(q/Malformed chunk head: / . $Printable->($head) . "\n"); my $len = hex($1) or last; $self->read_content_body($cb, $response, $len); $self->read(2) eq "\x0D\x0A" or die(qq/Malformed chunk: missing CRLF after chunk data\n/); } $self->read_header_lines($response->{headers}); return 1; } sub write_chunked_body { @_ == 2 || die(q/Usage: $handle->write_chunked_body(request)/ . "\n"); my ($self, $request) = @_; my $len = 0; while () { my $data = $request->{cb}->(); defined $data && length $data or last; if ( $] ge '5.008' ) { utf8::downgrade($data, 1) or die(qq/Wide character in write_chunked_body()\n/); } $len += length $data; my $chunk = sprintf '%X', length $data; $chunk .= "\x0D\x0A"; $chunk .= $data; $chunk .= "\x0D\x0A"; $self->write($chunk); } $self->write("0\x0D\x0A"); if ( ref $request->{trailer_cb} eq 'CODE' ) { $self->write_header_lines($request->{trailer_cb}->()) } else { $self->write("\x0D\x0A"); } return $len; } sub read_response_header { @_ == 1 || die(q/Usage: $handle->read_response_header()/ . "\n"); my ($self) = @_; my $line = $self->readline; $line =~ /\A (HTTP\/(0*\d+\.0*\d+)) [\x09\x20]+ ([0-9]{3}) [\x09\x20]+ ([^\x0D\x0A]*) \x0D?\x0A/x or die(q/Malformed Status-Line: / . $Printable->($line). "\n"); my ($protocol, $version, $status, $reason) = ($1, $2, $3, $4); die (qq/Unsupported HTTP protocol: $protocol\n/) unless $version =~ /0*1\.0*[01]/; return { status => $status, reason => $reason, headers => $self->read_header_lines, protocol => $protocol, }; } sub write_request_header { @_ == 5 || die(q/Usage: $handle->write_request_header(method, request_uri, headers, header_case)/ . "\n"); my ($self, $method, $request_uri, $headers, $header_case) = @_; return $self->write_header_lines($headers, $header_case, "$method $request_uri HTTP/1.1\x0D\x0A"); } sub _do_timeout { my ($self, $type, $timeout) = @_; $timeout = $self->{timeout} unless defined $timeout && $timeout >= 0; my $fd = fileno $self->{fh}; defined $fd && $fd >= 0 or die(qq/select(2): 'Bad file descriptor'\n/); my $initial = time; my $pending = $timeout; my $nfound; vec(my $fdset = '', $fd, 1) = 1; while () { $nfound = ($type eq 'read') ? select($fdset, undef, undef, $pending) : select(undef, $fdset, undef, $pending) ; if ($nfound == -1) { $! == EINTR or die(qq/select(2): '$!'\n/); redo if !$timeout || ($pending = $timeout - (time - $initial)) > 0; $nfound = 0; } last; } $! = 0; return $nfound; } sub can_read { @_ == 1 || @_ == 2 || die(q/Usage: $handle->can_read([timeout])/ . "\n"); my $self = shift; if ( ref($self->{fh}) eq 'IO::Socket::SSL' ) { return 1 if $self->{fh}->pending; } return $self->_do_timeout('read', @_) } sub can_write { @_ == 1 || @_ == 2 || die(q/Usage: $handle->can_write([timeout])/ . "\n"); my $self = shift; return $self->_do_timeout('write', @_) } sub _assert_ssl { my($ok, $reason) = HTTP::Tiny->can_ssl(); die $reason unless $ok; } sub can_reuse { my ($self,$scheme,$host,$port,$peer) = @_; return 0 if $self->{pid} != $$ || $self->{tid} != _get_tid() || length($self->{rbuf}) || $scheme ne $self->{scheme} || $host ne $self->{host} || $port ne $self->{port} || $peer ne $self->{peer} || eval { $self->can_read(0) } || $@ ; return 1; } # Try to find a CA bundle to validate the SSL cert, # prefer Mozilla::CA or fallback to a system file sub _find_CA_file { my $self = shift(); my $ca_file = defined( $self->{SSL_options}->{SSL_ca_file} ) ? $self->{SSL_options}->{SSL_ca_file} : $ENV{SSL_CERT_FILE}; if ( defined $ca_file ) { unless ( -r $ca_file ) { die qq/SSL_ca_file '$ca_file' not found or not readable\n/; } return $ca_file; } local @INC = @INC; pop @INC if $INC[-1] eq '.'; return Mozilla::CA::SSL_ca_file() if eval { require Mozilla::CA; 1 }; # cert list copied from golang src/crypto/x509/root_unix.go foreach my $ca_bundle ( "/etc/ssl/certs/ca-certificates.crt", # Debian/Ubuntu/Gentoo etc. "/etc/pki/tls/certs/ca-bundle.crt", # Fedora/RHEL "/etc/ssl/ca-bundle.pem", # OpenSUSE "/etc/openssl/certs/ca-certificates.crt", # NetBSD "/etc/ssl/cert.pem", # OpenBSD "/usr/local/share/certs/ca-root-nss.crt", # FreeBSD/DragonFly "/etc/pki/tls/cacert.pem", # OpenELEC "/etc/certs/ca-certificates.crt", # Solaris 11.2+ ) { return $ca_bundle if -e $ca_bundle; } die qq/Couldn't find a CA bundle with which to verify the SSL certificate.\n/ . qq/Try installing Mozilla::CA from CPAN\n/; } # for thread safety, we need to know thread id if threads are loaded sub _get_tid { no warnings 'reserved'; # for 'threads' return threads->can("tid") ? threads->tid : 0; } sub _ssl_args { my ($self, $host) = @_; my %ssl_args; # This test reimplements IO::Socket::SSL::can_client_sni(), which wasn't # added until IO::Socket::SSL 1.84 if ( Net::SSLeay::OPENSSL_VERSION_NUMBER() >= 0x01000000 ) { $ssl_args{SSL_hostname} = $host, # Sane SNI support } if ($self->{verify_SSL}) { $ssl_args{SSL_verifycn_scheme} = 'http'; # enable CN validation $ssl_args{SSL_verifycn_name} = $host; # set validation hostname $ssl_args{SSL_verify_mode} = 0x01; # enable cert validation $ssl_args{SSL_ca_file} = $self->_find_CA_file; } else { $ssl_args{SSL_verifycn_scheme} = 'none'; # disable CN validation $ssl_args{SSL_verify_mode} = 0x00; # disable cert validation } # user options override settings from verify_SSL for my $k ( keys %{$self->{SSL_options}} ) { $ssl_args{$k} = $self->{SSL_options}{$k} if $k =~ m/^SSL_/; } return \%ssl_args; } 1; __END__ =pod =encoding UTF-8 =head1 NAME HTTP::Tiny - A small, simple, correct HTTP/1.1 client =head1 VERSION version 0.070 =head1 SYNOPSIS use HTTP::Tiny; my $response = HTTP::Tiny->new->get('http://example.com/'); die "Failed!\n" unless $response->{success}; print "$response->{status} $response->{reason}\n"; while (my ($k, $v) = each %{$response->{headers}}) { for (ref $v eq 'ARRAY' ? @$v : $v) { print "$k: $_\n"; } } print $response->{content} if length $response->{content}; =head1 DESCRIPTION This is a very simple HTTP/1.1 client, designed for doing simple requests without the overhead of a large framework like L. It is more correct and more complete than L. It supports proxies and redirection. It also correctly resumes after EINTR. If L 0.25 or later is installed, HTTP::Tiny will use it instead of L for transparent support for both IPv4 and IPv6. Cookie support requires L or an equivalent class. =head1 METHODS =head2 new $http = HTTP::Tiny->new( %attributes ); This constructor returns a new HTTP::Tiny object. Valid attributes include: =over 4 =item * C — A user-agent string (defaults to 'HTTP-Tiny/$VERSION'). If C — ends in a space character, the default user-agent string is appended. =item * C — An instance of L — or equivalent class that supports the C and C methods =item * C — A hashref of default headers to apply to requests =item * C — The local IP address to bind to =item * C — Whether to reuse the last connection (if for the same scheme, host and port) (defaults to 1) =item * C — Maximum number of redirects allowed (defaults to 5) =item * C — Maximum response size in bytes (only when not using a data callback). If defined, responses larger than this will return an exception. =item * C — URL of a proxy server to use for HTTP connections (default is C<$ENV{http_proxy}> — if set) =item * C — URL of a proxy server to use for HTTPS connections (default is C<$ENV{https_proxy}> — if set) =item * C — URL of a generic proxy server for both HTTP and HTTPS connections (default is C<$ENV{all_proxy}> — if set) =item * C — List of domain suffixes that should not be proxied. Must be a comma-separated string or an array reference. (default is C<$ENV{no_proxy}> —) =item * C — Request timeout in seconds (default is 60) If a socket open, read or write takes longer than the timeout, an exception is thrown. =item * C — A boolean that indicates whether to validate the SSL certificate of an C — connection (default is false) =item * C — A hashref of C — options to pass through to L =back Passing an explicit C for C, C or C will prevent getting the corresponding proxies from the environment. Exceptions from C, C or other errors will result in a pseudo-HTTP status code of 599 and a reason of "Internal Exception". The content field in the response will contain the text of the exception. The C parameter enables a persistent connection, but only to a single destination scheme, host and port. Also, if any connection-relevant attributes are modified, or if the process ID or thread ID change, the persistent connection will be dropped. If you want persistent connections across multiple destinations, use multiple HTTP::Tiny objects. See L for more on the C and C attributes. =head2 get|head|put|post|delete $response = $http->get($url); $response = $http->get($url, \%options); $response = $http->head($url); These methods are shorthand for calling C for the given method. The URL must have unsafe characters escaped and international domain names encoded. See C for valid options and a description of the response. The C field of the response will be true if the status code is 2XX. =head2 post_form $response = $http->post_form($url, $form_data); $response = $http->post_form($url, $form_data, \%options); This method executes a C request and sends the key/value pairs from a form data hash or array reference to the given URL with a C of C. If data is provided as an array reference, the order is preserved; if provided as a hash reference, the terms are sorted on key and value for consistency. See documentation for the C method for details on the encoding. The URL must have unsafe characters escaped and international domain names encoded. See C for valid options and a description of the response. Any C header or content in the options hashref will be ignored. The C field of the response will be true if the status code is 2XX. =head2 mirror $response = $http->mirror($url, $file, \%options) if ( $response->{success} ) { print "$file is up to date\n"; } Executes a C request for the URL and saves the response body to the file name provided. The URL must have unsafe characters escaped and international domain names encoded. If the file already exists, the request will include an C header with the modification timestamp of the file. You may specify a different C header yourself in the C<< $options->{headers} >> hash. The C field of the response will be true if the status code is 2XX or if the status code is 304 (unmodified). If the file was modified and the server response includes a properly formatted C header, the file modification time will be updated accordingly. =head2 request $response = $http->request($method, $url); $response = $http->request($method, $url, \%options); Executes an HTTP request of the given method type ('GET', 'HEAD', 'POST', 'PUT', etc.) on the given URL. The URL must have unsafe characters escaped and international domain names encoded. If the URL includes a "user:password" stanza, they will be used for Basic-style authorization headers. (Authorization headers will not be included in a redirected request.) For example: $http->request('GET', 'http://Aladdin:open sesame@example.com/'); If the "user:password" stanza contains reserved characters, they must be percent-escaped: $http->request('GET', 'http://john%40example.com:password@example.com/'); A hashref of options may be appended to modify the request. Valid options are: =over 4 =item * C — A hashref containing headers to include with the request. If the value for a header is an array reference, the header will be output multiple times with each value in the array. These headers over-write any default headers. =item * C — A scalar to include as the body of the request OR a code reference that will be called iteratively to produce the body of the request =item * C — A code reference that will be called if it exists to provide a hashref of trailing headers (only used with chunked transfer-encoding) =item * C — A code reference that will be called for each chunks of the response body received. =item * C — Override host resolution and force all connections to go only to a specific peer address, regardless of the URL of the request. This will include any redirections! This options should be used with extreme caution (e.g. debugging or very special circumstances). =back The C header is generated from the URL in accordance with RFC 2616. It is a fatal error to specify C in the C option. Other headers may be ignored or overwritten if necessary for transport compliance. If the C option is a code reference, it will be called iteratively to provide the content body of the request. It should return the empty string or undef when the iterator is exhausted. If the C option is the empty string, no C or C headers will be generated. If the C option is provided, it will be called iteratively until the entire response body is received. The first argument will be a string containing a chunk of the response body, the second argument will be the in-progress response hash reference, as described below. (This allows customizing the action of the callback based on the C or C received prior to the content body.) The C method returns a hashref containing the response. The hashref will have the following keys: =over 4 =item * C — Boolean indicating whether the operation returned a 2XX status code =item * C — URL that provided the response. This is the URL of the request unless there were redirections, in which case it is the last URL queried in a redirection chain =item * C — The HTTP status code of the response =item * C — The response phrase returned by the server =item * C — The body of the response. If the response does not have any content or if a data callback is provided to consume the response body, this will be the empty string =item * C — A hashref of header fields. All header field names will be normalized to be lower case. If a header is repeated, the value will be an arrayref; it will otherwise be a scalar string containing the value =item * C If this field exists, it is an arrayref of response hash references from redirects in the same order that redirections occurred. If it does not exist, then no redirections occurred. =back On an exception during the execution of the request, the C field will contain 599, and the C field will contain the text of the exception. =head2 www_form_urlencode $params = $http->www_form_urlencode( $data ); $response = $http->get("http://example.com/query?$params"); This method converts the key/value pairs from a data hash or array reference into a C string. The keys and values from the data reference will be UTF-8 encoded and escaped per RFC 3986. If a value is an array reference, the key will be repeated with each of the values of the array reference. If data is provided as a hash reference, the key/value pairs in the resulting string will be sorted by key and value for consistent ordering. =head2 can_ssl $ok = HTTP::Tiny->can_ssl; ($ok, $why) = HTTP::Tiny->can_ssl; ($ok, $why) = $http->can_ssl; Indicates if SSL support is available. When called as a class object, it checks for the correct version of L and L. When called as an object methods, if C is true or if C is set in C, it checks that a CA file is available. In scalar context, returns a boolean indicating if SSL is available. In list context, returns the boolean and a (possibly multi-line) string of errors indicating why SSL isn't available. =head2 connected $host = $http->connected; ($host, $port) = $http->connected; Indicates if a connection to a peer is being kept alive, per the C option. In scalar context, returns the peer host and port, joined with a colon, or C (if no peer is connected). In list context, returns the peer host and port or an empty list (if no peer is connected). B: This method cannot reliably be used to discover whether the remote host has closed its end of the socket. =for Pod::Coverage SSL_options agent cookie_jar default_headers http_proxy https_proxy keep_alive local_address max_redirect max_size no_proxy proxy timeout verify_SSL =head1 SSL SUPPORT Direct C connections are supported only if L 1.56 or greater and L 1.49 or greater are installed. An exception will be thrown if new enough versions of these modules are not installed or if the SSL encryption fails. You can also use C utility function that returns boolean to see if the required modules are installed. An C connection may be made via an C proxy that supports the CONNECT command (i.e. RFC 2817). You may not proxy C via a proxy that itself requires C to communicate. SSL provides two distinct capabilities: =over 4 =item * Encrypted communication channel =item * Verification of server identity =back B. Server identity verification is controversial and potentially tricky because it depends on a (usually paid) third-party Certificate Authority (CA) trust model to validate a certificate as legitimate. This discriminates against servers with self-signed certificates or certificates signed by free, community-driven CA's such as L. By default, HTTP::Tiny does not make any assumptions about your trust model, threat level or risk tolerance. It just aims to give you an encrypted channel when you need one. Setting the C attribute to a true value will make HTTP::Tiny verify that an SSL connection has a valid SSL certificate corresponding to the host name of the connection and that the SSL certificate has been verified by a CA. Assuming you trust the CA, this will protect against a L. If you are concerned about security, you should enable this option. Certificate verification requires a file containing trusted CA certificates. If the environment variable C is present, HTTP::Tiny will try to find a CA certificate file in that location. If the L module is installed, HTTP::Tiny will use the CA file included with it as a source of trusted CA's. (This means you trust Mozilla, the author of Mozilla::CA, the CPAN mirror where you got Mozilla::CA, the toolchain used to install it, and your operating system security, right?) If that module is not available, then HTTP::Tiny will search several system-specific default locations for a CA certificate file: =over 4 =item * /etc/ssl/certs/ca-certificates.crt =item * /etc/pki/tls/certs/ca-bundle.crt =item * /etc/ssl/ca-bundle.pem =back An exception will be raised if C is true and no CA certificate file is available. If you desire complete control over SSL connections, the C attribute lets you provide a hash reference that will be passed through to C, overriding any options set by HTTP::Tiny. For example, to provide your own trusted CA file: SSL_options => { SSL_ca_file => $file_path, } The C attribute could also be used for such things as providing a client certificate for authentication to a server or controlling the choice of cipher used for the SSL connection. See L documentation for details. =head1 PROXY SUPPORT HTTP::Tiny can proxy both C and C requests. Only Basic proxy authorization is supported and it must be provided as part of the proxy URL: C. HTTP::Tiny supports the following proxy environment variables: =over 4 =item * http_proxy or HTTP_PROXY =item * https_proxy or HTTPS_PROXY =item * all_proxy or ALL_PROXY =back If the C environment variable is set, then this might be a CGI process and C would be set from the C header, which is a security risk. If C is set, C (the upper case variant only) is ignored. Tunnelling C over an C proxy using the CONNECT method is supported. If your proxy uses C itself, you can not tunnel C over it. Be warned that proxying an C connection opens you to the risk of a man-in-the-middle attack by the proxy server. The C environment variable is supported in the format of a comma-separated list of domain extensions proxy should not be used for. Proxy arguments passed to C will override their corresponding environment variables. =head1 LIMITATIONS HTTP::Tiny is I with the L: =over 4 =item * "Message Syntax and Routing" [RFC7230] =item * "Semantics and Content" [RFC7231] =item * "Conditional Requests" [RFC7232] =item * "Range Requests" [RFC7233] =item * "Caching" [RFC7234] =item * "Authentication" [RFC7235] =back It attempts to meet all "MUST" requirements of the specification, but does not implement all "SHOULD" requirements. (Note: it was developed against the earlier RFC 2616 specification and may not yet meet the revised RFC 7230-7235 spec.) Some particular limitations of note include: =over =item * HTTP::Tiny focuses on correct transport. Users are responsible for ensuring that user-defined headers and content are compliant with the HTTP/1.1 specification. =item * Users must ensure that URLs are properly escaped for unsafe characters and that international domain names are properly encoded to ASCII. See L, L and L. =item * Redirection is very strict against the specification. Redirection is only automatic for response codes 301, 302, 307 and 308 if the request method is 'GET' or 'HEAD'. Response code 303 is always converted into a 'GET' redirection, as mandated by the specification. There is no automatic support for status 305 ("Use proxy") redirections. =item * There is no provision for delaying a request body using an C header. Unexpected C<1XX> responses are silently ignored as per the specification. =item * Only 'chunked' C is supported. =item * There is no support for a Request-URI of '*' for the 'OPTIONS' request. =item * Headers mentioned in the RFCs and some other, well-known headers are generated with their canonical case. Other headers are sent in the case provided by the user. Except for control headers (which are sent first), headers are sent in arbitrary order. =back Despite the limitations listed above, HTTP::Tiny is considered feature-complete. New feature requests should be directed to L. =head1 SEE ALSO =over 4 =item * L - Higher level UA features for HTTP::Tiny =item * L - HTTP::Tiny wrapper with L/L compatibility =item * L - Wrap L instance in HTTP::Tiny compatible interface =item * L - Required for IPv6 support =item * L - Required for SSL support =item * L - If HTTP::Tiny isn't enough for you, this is the "standard" way to do things =item * L - Required if you want to validate SSL certificates =item * L - Required for SSL support =back =for :stopwords cpan testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan =head1 SUPPORT =head2 Bugs / Feature Requests Please report any bugs or feature requests through the issue tracker at L. You will be notified automatically of any progress on your issue. =head2 Source Code This is open source software. The code repository is available for public review and contribution under the terms of the license. L git clone https://github.com/chansen/p5-http-tiny.git =head1 AUTHORS =over 4 =item * Christian Hansen =item * David Golden =back =head1 CONTRIBUTORS =for stopwords Alan Gardner Alessandro Ghedini A. Sinan Unur Brad Gilbert brian m. carlson Chris Nehren Weyl Claes Jakobsson Clinton Gormley Craig Berry David Golden Dean Pearce Edward Zborowski James Raspass Jeremy Mates Jess Robinson Karen Etheridge Lukas Eklund Martin J. Evans Martin-Louis Bright Mike Doherty Nicolas Rochelemagne Olaf Alders Olivier Mengué Petr Písař SkyMarshal Sören Kornetzki Steve Grazzini Syohei YOSHIDA Tatsuhiko Miyagawa Tom Hukins Tony Cook =over 4 =item * Alan Gardner =item * Alessandro Ghedini =item * A. Sinan Unur =item * Brad Gilbert =item * brian m. carlson =item * Chris Nehren =item * Chris Weyl =item * Claes Jakobsson =item * Clinton Gormley =item * Craig A. Berry =item * David Golden =item * Dean Pearce =item * Edward Zborowski =item * James Raspass =item * Jeremy Mates =item * Jess Robinson =item * Karen Etheridge =item * Lukas Eklund =item * Martin J. Evans =item * Martin-Louis Bright =item * Mike Doherty =item * Nicolas Rochelemagne =item * Olaf Alders =item * Olivier Mengué =item * Petr Písař =item * SkyMarshal =item * Sören Kornetzki =item * Steve Grazzini =item * Syohei YOSHIDA =item * Tatsuhiko Miyagawa =item * Tom Hukins =item * Tony Cook =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2016 by Christian Hansen. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut HTTP_TINY $fatpacked{"IPC/System/Simple.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'IPC_SYSTEM_SIMPLE'; package IPC::System::Simple; # ABSTRACT: Run commands simply, with detailed diagnostics use 5.006; use strict; use warnings; use re 'taint'; use Carp; use List::Util qw(first); use Scalar::Util qw(tainted); use Config; use constant WINDOWS => ($^O eq 'MSWin32'); use constant VMS => ($^O eq 'VMS'); BEGIN { # It would be lovely to use the 'if' module here, but it didn't # enter core until 5.6.2, and we want to keep 5.6.0 compatibility. if (WINDOWS) { ## no critic (ProhibitStringyEval) eval q{ use Win32::Process qw(INFINITE NORMAL_PRIORITY_CLASS); use File::Spec; use Win32; # This uses the same rules as the core win32.c/get_shell() call. use constant WINDOWS_SHELL => eval { Win32::IsWinNT() } ? [ qw(cmd.exe /x/d/c) ] : [ qw(command.com /c) ]; # These are used when invoking _win32_capture use constant NO_SHELL => 0; use constant USE_SHELL => 1; }; ## use critic # Die nosily if any of the above broke. die $@ if $@; } } # Note that we don't use WIFSTOPPED because perl never uses # the WUNTRACED flag, and hence will never return early from # system() if the child processes is suspended with a SIGSTOP. use POSIX qw(WIFEXITED WEXITSTATUS WIFSIGNALED WTERMSIG); use constant FAIL_START => q{"%s" failed to start: "%s"}; use constant FAIL_PLUMBING => q{Error in IPC::System::Simple plumbing: "%s" - "%s"}; use constant FAIL_CMD_BLANK => q{Entirely blank command passed: "%s"}; use constant FAIL_INTERNAL => q{Internal error in IPC::System::Simple: "%s"}; use constant FAIL_TAINT => q{%s called with tainted argument "%s"}; use constant FAIL_TAINT_ENV => q{%s called with tainted environment $ENV{%s}}; use constant FAIL_SIGNAL => q{"%s" died to signal "%s" (%d)%s}; use constant FAIL_BADEXIT => q{"%s" unexpectedly returned exit value %d}; use constant FAIL_UNDEF => q{%s called with undefined command}; use constant FAIL_POSIX => q{IPC::System::Simple does not understand the POSIX error '%s'. Please check http://search.cpan.org/perldoc?IPC::System::Simple to see if there is an updated version. If not please report this as a bug to http://rt.cpan.org/Public/Bug/Report.html?Queue=IPC-System-Simple}; # On Perl's older than 5.8.x we can't assume that there'll be a # $^{TAINT} for us to check, so we assume that our args may always # be tainted. use constant ASSUME_TAINTED => ($] < 5.008); use constant EXIT_ANY_CONST => -1; # Used internally use constant EXIT_ANY => [ EXIT_ANY_CONST ]; # Exported use constant UNDEFINED_POSIX_RE => qr{not (?:defined|a valid) POSIX macro|not implemented on this architecture}; require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw( capture capturex run runx system systemx $EXITVAL EXIT_ANY ); our $VERSION = '1.25'; # VERSION : From dzil our $EXITVAL = -1; my @Signal_from_number = split(' ', $Config{sig_name}); # Environment variables we don't want to see tainted. my @Check_tainted_env = qw(PATH IFS CDPATH ENV BASH_ENV); if (WINDOWS) { push(@Check_tainted_env, 'PERL5SHELL'); } if (VMS) { push(@Check_tainted_env, 'DCL$PATH'); } # Not all systems implement the WIFEXITED calls, but POSIX # will always export them (even if they're just stubs that # die with an error). Test for the presence of a working # WIFEXITED and friends, or define our own. eval { WIFEXITED(0); }; if ($@ =~ UNDEFINED_POSIX_RE) { no warnings 'redefine'; ## no critic *WIFEXITED = sub { not $_[0] & 0xff }; *WEXITSTATUS = sub { $_[0] >> 8 }; *WIFSIGNALED = sub { $_[0] & 127 }; *WTERMSIG = sub { $_[0] & 127 }; } elsif ($@) { croak sprintf FAIL_POSIX, $@; } # None of the POSIX modules I've found define WCOREDUMP, although # many systems define it. Check the POSIX module in the hope that # it may actually be there. # TODO: Ideally, $NATIVE_WCOREDUMP should be a constant. my $NATIVE_WCOREDUMP; eval { POSIX::WCOREDUMP(1); }; if ($@ =~ UNDEFINED_POSIX_RE) { *WCOREDUMP = sub { $_[0] & 128 }; $NATIVE_WCOREDUMP = 0; } elsif ($@) { croak sprintf FAIL_POSIX, $@; } else { # POSIX actually has it defined! Huzzah! *WCOREDUMP = \&POSIX::WCOREDUMP; $NATIVE_WCOREDUMP = 1; } sub _native_wcoredump { return $NATIVE_WCOREDUMP; } # system simply calls run *system = \&run; *systemx = \&runx; # run is our way of running a process with system() semantics sub run { _check_taint(@_); my ($valid_returns, $command, @args) = _process_args(@_); # If we have arguments, we really want to call systemx, # so we do so. if (@args) { return systemx($valid_returns, $command, @args); } # Without arguments, we're calling system, and checking # the results. # We're throwing our own exception on command not found, so # we don't need a warning from Perl. { # silence 'Statement unlikely to be reached' warning no warnings 'exec'; ## no critic CORE::system($command,@args); } return _process_child_error($?,$command,$valid_returns); } # runx is just like system/run, but *never* invokes the shell. sub runx { _check_taint(@_); my ($valid_returns, $command, @args) = _process_args(@_); if (WINDOWS) { our $EXITVAL = -1; my $pid = _spawn_or_die($command, "$command @args"); $pid->Wait(INFINITE); # Wait for process exit. $pid->GetExitCode($EXITVAL); return _check_exit($command,$EXITVAL,$valid_returns); } # If system() fails, we throw our own exception. We don't # need to have perl complain about it too. no warnings; ## no critic CORE::system { $command } $command, @args; return _process_child_error($?, $command, $valid_returns); } # capture is our way of running a process with backticks/qx semantics sub capture { _check_taint(@_); my ($valid_returns, $command, @args) = _process_args(@_); if (@args) { return capturex($valid_returns, $command, @args); } if (WINDOWS) { # USE_SHELL really means "You may use the shell if you need it." return _win32_capture(USE_SHELL, $valid_returns, $command, @args); } our $EXITVAL = -1; my $wantarray = wantarray(); # We'll produce our own warnings on failure to execute. no warnings 'exec'; ## no critic if ($wantarray) { my @results = qx($command); _process_child_error($?,$command,$valid_returns); return @results; } my $results = qx($command); _process_child_error($?,$command,$valid_returns); return $results; } # _win32_capture implements the capture and capurex commands on Win32. # We need to wrap the whole internals of this sub into # an if (WINDOWS) block to avoid it being compiled on non-Win32 systems. sub _win32_capture { if (not WINDOWS) { croak sprintf(FAIL_INTERNAL, "_win32_capture called when not under Win32"); } else { my ($use_shell, $valid_returns, $command, @args) = @_; my $wantarray = wantarray(); # Perl doesn't support multi-arg open under # Windows. Perl also doesn't provide very good # feedback when normal backtails fail, either; # it returns exit status from the shell # (which is indistinguishable from the command # running and producing the same exit status). # As such, we essentially have to write our own # backticks. # We start by dup'ing STDOUT. open(my $saved_stdout, '>&', \*STDOUT) ## no critic or croak sprintf(FAIL_PLUMBING, "Can't dup STDOUT", $!); # We now open up a pipe that will allow us to # communicate with the new process. pipe(my ($read_fh, $write_fh)) or croak sprintf(FAIL_PLUMBING, "Can't create pipe", $!); # Allow CRLF sequences to become "\n", since # this is what Perl backticks do. binmode($read_fh, ':crlf'); # Now we re-open our STDOUT to $write_fh... open(STDOUT, '>&', $write_fh) ## no critic or croak sprintf(FAIL_PLUMBING, "Can't redirect STDOUT", $!); # If we have args, or we're told not to use the shell, then # we treat $command as our shell. Otherwise we grub around # in our command to look for a command to run. # # Note that we don't actually *use* the shell (although in # a future version we might). Being told not to use the shell # (capturex) means we treat our command as really being a command, # and not a command line. my $exe = @args ? $command : (! $use_shell) ? $command : $command =~ m{^"([^"]+)"}x ? $1 : $command =~ m{(\S+) }x ? $1 : croak sprintf(FAIL_CMD_BLANK, $command); # And now we spawn our new process with inherited # filehandles. my $err; my $pid = eval { _spawn_or_die($exe, "$command @args"); } or do { $err = $@; }; # Regardless of whether our command ran, we must restore STDOUT. # RT #48319 open(STDOUT, '>&', $saved_stdout) ## no critic or croak sprintf(FAIL_PLUMBING,"Can't restore STDOUT", $!); # And now, if there was an actual error , propagate it. die $err if defined $err; # If there's an error from _spawn_or_die # Clean-up the filehandles we no longer need... close($write_fh) or croak sprintf(FAIL_PLUMBING,q{Can't close write end of pipe}, $!); close($saved_stdout) or croak sprintf(FAIL_PLUMBING,q{Can't close saved STDOUT}, $!); # Read the data from our child... my (@results, $result); if ($wantarray) { @results = <$read_fh>; } else { $result = join("",<$read_fh>); } # Tidy up our windows process and we're done! $pid->Wait(INFINITE); # Wait for process exit. $pid->GetExitCode($EXITVAL); _check_exit($command,$EXITVAL,$valid_returns); return $wantarray ? @results : $result; } } # capturex() is just like backticks/qx, but never invokes the shell. sub capturex { _check_taint(@_); my ($valid_returns, $command, @args) = _process_args(@_); our $EXITVAL = -1; my $wantarray = wantarray(); if (WINDOWS) { return _win32_capture(NO_SHELL, $valid_returns, $command, @args); } # We can't use a multi-arg piped open here, since 5.6.x # doesn't like them. Instead we emulate what 5.8.x does, # which is to create a pipe(), set the close-on-exec flag # on the child, and the fork/exec. If the exec fails, the # child writes to the pipe. If the exec succeeds, then # the pipe closes without data. pipe(my ($read_fh, $write_fh)) or croak sprintf(FAIL_PLUMBING, "Can't create pipe", $!); # This next line also does an implicit fork. my $pid = open(my $pipe, '-|'); ## no critic if (not defined $pid) { croak sprintf(FAIL_START, $command, $!); } elsif (not $pid) { # Child process, execs command. close($read_fh); # TODO: 'no warnings exec' doesn't get rid # of the 'unlikely to be reached' warnings. # This is a bug in perl / perldiag / perllexwarn / warnings. no warnings; ## no critic CORE::exec { $command } $command, @args; # Oh no, exec fails! Send the reason why to # the parent. print {$write_fh} int($!); exit(-1); } { # In parent process. close($write_fh); # Parent process, check for child error. my $error = <$read_fh>; # Tidy up our pipes. close($read_fh); # Check for error. if ($error) { # Setting $! to our child error number gives # us nice looking strings when printed. local $! = $error; croak sprintf(FAIL_START, $command, $!); } } # Parent process, we don't care about our pid, but we # do go and read our pipe. if ($wantarray) { my @results = <$pipe>; close($pipe); _process_child_error($?,$command,$valid_returns); return @results; } # NB: We don't check the return status on close(), since # on failure it sets $?, which we then inspect for more # useful information. my $results = join("",<$pipe>); close($pipe); _process_child_error($?,$command,$valid_returns); return $results; } # Tries really hard to spawn a process under Windows. Returns # the pid on success, or undef on error. sub _spawn_or_die { # We need to wrap practically the entire sub in an # if block to ensure it doesn't get compiled under non-Win32 # systems. Compiling on these systems would not only be a # waste of time, but also results in complaints about # the NORMAL_PRIORITY_CLASS constant. if (not WINDOWS) { croak sprintf(FAIL_INTERNAL, "_spawn_or_die called when not under Win32"); } else { my ($orig_exe, $cmdline) = @_; my $pid; my $exe = $orig_exe; # If our command doesn't have an extension, add one. $exe .= $Config{_exe} if ($exe !~ m{\.}); Win32::Process::Create( $pid, $exe, $cmdline, 1, NORMAL_PRIORITY_CLASS, "." ) and return $pid; my @path = split(/;/,$ENV{PATH}); foreach my $dir (@path) { my $fullpath = File::Spec->catfile($dir,$exe); # We're using -x here on the assumption that stat() # is faster than spawn, so trying to spawn a process # for each path element will be unacceptably # inefficient. if (-x $fullpath) { Win32::Process::Create( $pid, $fullpath, $cmdline, 1, NORMAL_PRIORITY_CLASS, "." ) and return $pid; } } croak sprintf(FAIL_START, $orig_exe, $^E); } } # Complain on tainted arguments or environment. # ASSUME_TAINTED is true for 5.6.x, since it's missing ${^TAINT} sub _check_taint { return if not (ASSUME_TAINTED or ${^TAINT}); my $caller = (caller(1))[3]; foreach my $var (@_) { if (tainted $var) { croak sprintf(FAIL_TAINT, $caller, $var); } } foreach my $var (@Check_tainted_env) { if (tainted $ENV{$var} ) { croak sprintf(FAIL_TAINT_ENV, $caller, $var); } } return; } # This subroutine performs the difficult task of interpreting # $?. It's not intended to be called directly, as it will # croak on errors, and its implementation and interface may # change in the future. sub _process_child_error { my ($child_error, $command, $valid_returns) = @_; $EXITVAL = -1; my $coredump = WCOREDUMP($child_error); # There's a bug in perl 5.10.0 where if the system # does not provide a native WCOREDUMP, then $? will # never contain coredump information. This code # checks to see if we have the bug, and works around # it if needed. if ($] >= 5.010 and not $NATIVE_WCOREDUMP) { $coredump ||= WCOREDUMP( ${^CHILD_ERROR_NATIVE} ); } if ($child_error == -1) { croak sprintf(FAIL_START, $command, $!); } elsif ( WIFEXITED( $child_error ) ) { $EXITVAL = WEXITSTATUS( $child_error ); return _check_exit($command,$EXITVAL,$valid_returns); } elsif ( WIFSIGNALED( $child_error ) ) { my $signal_no = WTERMSIG( $child_error ); my $signal_name = $Signal_from_number[$signal_no] || "UNKNOWN"; croak sprintf FAIL_SIGNAL, $command, $signal_name, $signal_no, ($coredump ? " and dumped core" : ""); } croak sprintf(FAIL_INTERNAL, qq{'$command' ran without exit value or signal}); } # A simple subroutine for checking exit values. Results in better # assurance of consistent error messages, and better forward support # for new features in I::S::S. sub _check_exit { my ($command, $exitval, $valid_returns) = @_; # If we have a single-value list consisting of the EXIT_ANY # value, then we're happy with whatever exit value we're given. if (@$valid_returns == 1 and $valid_returns->[0] == EXIT_ANY_CONST) { return $exitval; } if (not defined first { $_ == $exitval } @$valid_returns) { croak sprintf FAIL_BADEXIT, $command, $exitval; } return $exitval; } # This subroutine simply determines a list of valid returns, the command # name, and any arguments that we need to pass to it. sub _process_args { my $valid_returns = [ 0 ]; my $caller = (caller(1))[3]; if (not @_) { croak "$caller called with no arguments"; } if (ref $_[0] eq "ARRAY") { $valid_returns = shift(@_); } if (not @_) { croak "$caller called with no command"; } my $command = shift(@_); if (not defined $command) { croak sprintf( FAIL_UNDEF, $caller ); } return ($valid_returns,$command,@_); } 1; __END__ =head1 NAME IPC::System::Simple - Run commands simply, with detailed diagnostics =head1 SYNOPSIS use IPC::System::Simple qw(system systemx capture capturex); system("some_command"); # Command succeeds or dies! system("some_command",@args); # Succeeds or dies, avoids shell if @args systemx("some_command",@args); # Succeeds or dies, NEVER uses the shell # Capture the output of a command (just like backticks). Dies on error. my $output = capture("some_command"); # Just like backticks in list context. Dies on error. my @output = capture("some_command"); # As above, but avoids the shell if @args is non-empty my $output = capture("some_command", @args); # As above, but NEVER invokes the shell. my $output = capturex("some_command", @args); my @output = capturex("some_command", @args); =head1 DESCRIPTION Calling Perl's in-built C function is easy, determining if it was successful is I. Let's face it, C<$?> isn't the nicest variable in the world to play with, and even if you I check it, producing a well-formatted error string takes a lot of work. C takes the hard work out of calling external commands. In fact, if you want to be really lazy, you can just write: use IPC::System::Simple qw(system); and all of your C commands will either succeed (run to completion and return a zero exit value), or die with rich diagnostic messages. The C module also provides a simple replacement to Perl's backticks operator. Simply write: use IPC::System::Simple qw(capture); and then use the L command just like you'd use backticks. If there's an error, it will die with a detailed description of what went wrong. Better still, you can even use C to run the equivalent of backticks, but without the shell: use IPC::System::Simple qw(capturex); my $result = capturex($command, @args); If you want more power than the basic interface, including the ability to specify which exit values are acceptable, trap errors, or process diagnostics, then read on! =head1 ADVANCED SYNOPSIS use IPC::System::Simple qw( capture capturex system systemx run runx $EXITVAL EXIT_ANY ); # Run a command, throwing exception on failure run("some_command"); runx("some_command",@args); # Run a command, avoiding the shell # Do the same thing, but with the drop-in system replacement. system("some_command"); systemx("some_command", @args); # Run a command which must return 0..5, avoid the shell, and get the # exit value (we could also look at $EXITVAL) my $exit_value = runx([0..5], "some_command", @args); # The same, but any exit value will do. my $exit_value = runx(EXIT_ANY, "some_command", @args); # Capture output into $result and throw exception on failure my $result = capture("some_command"); # Check exit value from captured command print "some_command exited with status $EXITVAL\n"; # Captures into @lines, splitting on $/ my @lines = capture("some_command"); # Run a command which must return 0..5, capture the output into # @lines, and avoid the shell. my @lines = capturex([0..5], "some_command", @args); =head1 ADVANCED USAGE =head2 run() and system() C provides a subroutine called C, that executes a command using the same semantics is Perl's built-in C: use IPC::System::Simple qw(run); run("cat *.txt"); # Execute command via the shell run("cat","/etc/motd"); # Execute command without shell The primary difference between Perl's in-built system and the C command is that C will throw an exception on failure, and allows a list of acceptable exit values to be set. See L for further information. In fact, you can even have C replace the default C function for your package so it has the same behaviour: use IPC::System::Simple qw(system); system("cat *.txt"); # system now suceeds or dies! C and C are aliases to each other. See also L for variants of C and C that never invoke the shell, even with a single argument. =head2 capture() A second subroutine, named C executes a command with the same semantics as Perl's built-in backticks (and C): use IPC::System::Simple qw(capture); # Capture text while invoking the shell. my $file = capture("cat /etc/motd"); my @lines = capture("cat /etc/passwd"); However unlike regular backticks, which always use the shell, C will bypass the shell when called with multiple arguments: # Capture text while avoiding the shell. my $file = capture("cat", "/etc/motd"); my @lines = capture("cat", "/etc/passwd"); See also L for a variant of C that never invokes the shell, even with a single argument. =head2 runx(), systemx() and capturex() The C, C and C commands are identical to the multi-argument forms of C, C and C respectively, but I invoke the shell, even when called with a single argument. These forms are particularly useful when a command's argument list I be empty, for example: systemx($cmd, @args); The use of C here guarantees that the shell will I be invoked, even if C<@args> is empty. =head2 Exception handling In the case where the command returns an unexpected status, both C and C will throw an exception, which if not caught will terminate your program with an error. Capturing the exception is easy: eval { run("cat *.txt"); }; if ($@) { print "Something went wrong - $@\n"; } See the diagnostics section below for more details. =head3 Exception cases C considers the following to be unexpected, and worthy of exception: =over 4 =item * Failing to start entirely (eg, command not found, permission denied). =item * Returning an exit value other than zero (but see below). =item * Being killed by a signal. =item * Being passed tainted data (in taint mode). =back =head2 Exit values Traditionally, system commands return a zero status for success and a non-zero status for failure. C will default to throwing an exception if a non-zero exit value is returned. You may specify a range of values which are considered acceptable exit values by passing an I as the first argument. The special constant C can be used to allow I exit value to be returned. use IPC::System::Simple qw(run system capture EXIT_ANY); run( [0..5], "cat *.txt"); # Exit values 0-5 are OK system( [0..5], "cat *.txt"); # This works the same way my @lines = capture( EXIT_ANY, "cat *.txt"); # Any exit is fine. The C and replacement C subroutines returns the exit value of the process: my $exit_value = run( [0..5], "cat *.txt"); # OR: my $exit_value = system( [0..5] "cat *.txt"); print "Program exited with value $exit_value\n"; =head3 $EXITVAL The exit value of any command executed by C can always be retrieved from the C<$IPC::System::Simple::EXITVAL> variable: This is particularly useful when inspecting results from C, which returns the captured text from the command. use IPC::System::Simple qw(capture $EXITVAL EXIT_ANY); my @enemies_defeated = capture(EXIT_ANY, "defeat_evil", "/dev/mordor"); print "Program exited with value $EXITVAL\n"; C<$EXITVAL> will be set to C<-1> if the command did not exit normally (eg, being terminated by a signal) or did not start. In this situation an exception will also be thrown. =head2 WINDOWS-SPECIFIC NOTES As of C v0.06, the C subroutine I will make available the full 32-bit exit value on Win32 systems. This is different from the previous versions of C and from Perl's in-build C function, which can only handle 8-bit return values. The C subroutine always returns the 32-bit exit value under Windows. The C subroutine also never uses the shell, even when passed a single argument. Versions of C before v0.09 would not search the C environment variable when the multi-argument form of C was called. Versions from v0.09 onwards correctly search the path provided the command is provided including the extension (eg, C rather than just C, or C rather than just C). If no extension is provided, C<.exe> is assumed. Signals are not supported on Windows systems. Sending a signal to a Windows process will usually cause it to exit with the signal number used. =head1 DIAGNOSTICS =over 4 =item "%s" failed to start: "%s" The command specified did not even start. It may not exist, or you may not have permission to use it. The reason it could not start (as determined from C<$!>) will be provided. =item "%s" unexpectedly returned exit value %d The command ran successfully, but returned an exit value we did not expect. The value returned is reported. =item "%s" died to signal "%s" (%d) %s The command was killed by a signal. The name of the signal will be reported, or C if it cannot be determined. The signal number is always reported. If we detected that the process dumped core, then the string C is appended. =item IPC::System::Simple::%s called with no arguments You attempted to call C or C but did not provide any arguments at all. At the very lease you need to supply a command to run. =item IPC::System::Simple::%s called with no command You called C or C with a list of acceptable exit values, but no actual command. =item IPC::System::Simple::%s called with tainted argument "%s" You called C or C with tainted (untrusted) arguments, which is almost certainly a bad idea. To untaint your arguments you'll need to pass your data through a regular expression and use the resulting match variables. See L for more information. =item IPC::System::Simple::%s called with tainted environment $ENV{%s} You called C or C but part of your environment was tainted (untrusted). You should either delete the named environment variable before calling C, or set it to an untainted value (usually one set inside your program). See L for more information. =item Error in IPC::System::Simple plumbing: "%s" - "%s" Implementing the C command involves dark and terrible magicks involving pipes, and one of them has sprung a leak. This could be due to a lack of file descriptors, although there are other possibilities. If you are able to reproduce this error, you are encouraged to submit a bug report according to the L section below. =item Internal error in IPC::System::Simple: "%s" You've found a bug in C. Please check to see if an updated version of C is available. If not, please file a bug report according to the L section below. =item IPC::System::Simple::%s called with undefined command You've passed the undefined value as a command to be executed. While this is a very Zen-like action, it's not supported by Perl's current implementation. =back =head1 DEPENDENCIES This module depends upon L when used on Win32 system. C is bundled as a core module in ActivePerl 5.6 and above. There are no non-core dependencies on non-Win32 systems. =head1 COMPARISON TO OTHER APIs Perl provides a range of in-built functions for handling external commands, and CPAN provides even more. The C differentiates itself from other options by providing: =over 4 =item Extremely detailed diagnostics The diagnostics produced by C are designed to provide as much information as possible. Rather than requiring the developer to inspect C<$?>, C does the hard work for you. If an odd exit status is provided, you're informed of what it is. If a signal kills your process, you are informed of both its name and number. If tainted data or environment prevents your command from running, you are informed of exactly which datais =item Exceptions on failure C takes an aggressive approach to error handling. Rather than allow commands to fail silently, exceptions are thrown when unexpected results are seen. This allows for easy development using a try/catch style, and avoids the possibility of accidently continuing after a failed command. =item Easy access to exit status The C, C and C commands all set C<$EXITVAL>, making it easy to determine the exit status of a command. Additionally, the C and C interfaces return the exit status. =item Consistent interfaces When called with multiple arguments, the C, C and C interfaces I invoke the shell. This differs from the in-built Perl C command which may invoke the shell under Windows when called with multiple arguments. It differs from the in-built Perl backticks operator which always invokes the shell. =back =head1 BUGS When C is exported, the exotic form C is not supported. Attemping to use the exotic form is a syntax error. This affects the calling package I. Use C if you need it, or consider using the L module to replace C with lexical scope. Core dumps are only checked for when a process dies due to a signal. It is not believed there are any systems where processes can dump core without dying to a signal. C status is not checked, as perl never spawns processes with the C option. Signals are not supported under Win32 systems, since they don't work at all like Unix signals. Win32 singals cause commands to exit with a given exit value, which this modules I capture. Only 8-bit values are returned when C or C is called with a single value under Win32. Multi-argument calls to C and C, as well as the C and C always return the 32-bit Windows return values. =head2 Reporting bugs Before reporting a bug, please check to ensure you are using the most recent version of C. Your problem may have already been fixed in a new release. You can find the C bug-tracker at L . Please check to see if your bug has already been reported; if in doubt, report yours anyway. Submitting a patch and/or failing test case will greatly expedite the fixing of bugs. =head1 FEEDBACK If you find this module useful, please consider rating it on the CPAN Ratings service at L . The module author loves to hear how C has made your life better (or worse). Feedback can be sent to Epjf@perltraining.com.auE. =head1 SEE ALSO L uses C to provide succeed-or-die replacements to C (and other built-ins) with lexical scope. L, L, L, L, L, L, L =head1 AUTHOR Paul Fenwick Epjf@cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright (C) 2006-2008 by Paul Fenwick This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.6.0 or, at your option, any later version of Perl 5 you may have available. =for Pod::Coverage WCOREDUMP =cut IPC_SYSTEM_SIMPLE $fatpacked{"JSON/MaybeXS.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'JSON_MAYBEXS'; package JSON::MaybeXS; use strict; use warnings FATAL => 'all'; use base qw(Exporter); our $VERSION = '1.003009'; $VERSION = eval $VERSION; sub _choose_json_module { return 'Cpanel::JSON::XS' if $INC{'Cpanel/JSON/XS.pm'}; return 'JSON::XS' if $INC{'JSON/XS.pm'}; my @err; return 'Cpanel::JSON::XS' if eval { require Cpanel::JSON::XS; 1; }; push @err, "Error loading Cpanel::JSON::XS: $@"; return 'JSON::XS' if eval { require JSON::XS; 1; }; push @err, "Error loading JSON::XS: $@"; return 'JSON::PP' if eval { require JSON::PP; 1 }; push @err, "Error loading JSON::PP: $@"; die join( "\n", "Couldn't load a JSON module:", @err ); } BEGIN { our $JSON_Class = _choose_json_module(); $JSON_Class->import(qw(encode_json decode_json)); } our @EXPORT = qw(encode_json decode_json JSON); my @EXPORT_ALL = qw(is_bool); our @EXPORT_OK = qw(is_bool to_json from_json); our %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_ALL ], legacy => [ @EXPORT, @EXPORT_OK ], ); sub JSON () { our $JSON_Class } sub new { shift; my %args = @_ == 1 ? %{$_[0]} : @_; my $new = (our $JSON_Class)->new; $new->$_($args{$_}) for keys %args; return $new; } use Scalar::Util (); sub is_bool { die 'is_bool is not a method' if $_[1]; Scalar::Util::blessed($_[0]) and ($_[0]->isa('JSON::XS::Boolean') or $_[0]->isa('Cpanel::JSON::XS::Boolean') or $_[0]->isa('JSON::PP::Boolean')); } # (mostly) CopyPasta from JSON.pm version 2.90 use Carp (); sub from_json ($@) { if ( ref($_[0]) =~ /^JSON/ or $_[0] =~ /^JSON/ ) { Carp::croak "from_json should not be called as a method."; } my $json = JSON()->new; if (@_ == 2 and ref $_[1] eq 'HASH') { my $opt = $_[1]; for my $method (keys %$opt) { $json->$method( $opt->{$method} ); } } return $json->decode( $_[0] ); } sub to_json ($@) { if ( ref($_[0]) =~ /^JSON/ or (@_ > 2 and $_[0] =~ /^JSON/) ) { Carp::croak "to_json should not be called as a method."; } my $json = JSON()->new; if (@_ == 2 and ref $_[1] eq 'HASH') { my $opt = $_[1]; for my $method (keys %$opt) { $json->$method( $opt->{$method} ); } } $json->encode($_[0]); } 1; =head1 NAME JSON::MaybeXS - Use L with a fallback to L and L =head1 SYNOPSIS use JSON::MaybeXS; my $data_structure = decode_json($json_input); my $json_output = encode_json($data_structure); my $json = JSON->new; my $json_with_args = JSON::MaybeXS->new(utf8 => 1); # or { utf8 => 1 } =head1 DESCRIPTION This module first checks to see if either L or L is already loaded, in which case it uses that module. Otherwise it tries to load L, then L, then L in order, and either uses the first module it finds or throws an error. It then exports the C and C functions from the loaded module, along with a C constant that returns the class name for calling C on. If you're writing fresh code rather than replacing L usage, you might want to pass options as constructor args rather than calling mutators, so we provide our own C method that supports that. =head1 EXPORTS C, C and C are exported by default; C is exported on request. To import only some symbols, specify them on the C line: use JSON::MaybeXS qw(encode_json decode_json is_bool); # functions only use JSON::MaybeXS qw(JSON); # JSON constant only To import all available sensible symbols (C, C, and C), use C<:all>: use JSON::MaybeXS ':all'; To import all symbols including those needed by legacy apps that use L: use JSON::MaybeXS ':legacy'; This imports the C and C symbols as well as everything in C<:all>. NOTE: This is to support legacy code that makes extensive use of C and C which you are not yet in a position to refactor. DO NOT use this import tag in new code, in order to avoid the crawling horrors of getting UTF-8 support subtly wrong. See the documentation for L for further details. =head2 encode_json This is the C function provided by the selected implementation module, and takes a perl data structure which is serialised to JSON text. my $json_text = encode_json($data_structure); =head2 decode_json This is the C function provided by the selected implementation module, and takes a string of JSON text to deserialise to a perl data structure. my $data_structure = decode_json($json_text); =head2 to_json, from_json See L for details. These are included to support legacy code B. =head2 JSON The C constant returns the selected implementation module's name for use as a class name - so: my $json_obj = JSON->new; # returns a Cpanel::JSON::XS or JSON::PP object and that object can then be used normally: my $data_structure = $json_obj->decode($json_text); # etc. =head2 is_bool $is_boolean = is_bool($scalar) Returns true if the passed scalar represents either C or C, two constants that act like C<1> and C<0>, respectively and are used to represent JSON C and C values in Perl. Since this is a bare sub in the various backend classes, it cannot be called as a class method like the other interfaces; it must be called as a function, with no invocant. It supports the representation used in all JSON backends. =head1 CONSTRUCTOR =head2 new With L, L and L you are required to call mutators to set options, such as: my $json = $class->new->utf8(1)->pretty(1); Since this is a trifle irritating and noticeably un-perlish, we also offer: my $json = JSON::MaybeXS->new(utf8 => 1, pretty => 1); which works equivalently to the above (and in the usual tradition will accept a hashref instead of a hash, should you so desire). The resulting object is blessed into the underlying backend, which offers (at least) the methods C and C. =head1 BOOLEANS To include JSON-aware booleans (C, C) in your data, just do: use JSON::MaybeXS; my $true = JSON->true; my $false = JSON->false; =head1 CONVERTING FROM JSON::Any L used to be the favoured compatibility layer above the various JSON backends, but over time has grown a lot of extra code to deal with legacy backends (e.g. L) that are no longer needed. This is a rough guide of translating such code: Change code from: use JSON::Any; my $json = JSON::Any->new->objToJson($data); # or to_json($data), or Dump($data) to: use JSON::MaybeXS; my $json = encode_json($data); Change code from: use JSON::Any; my $data = JSON::Any->new->jsonToObj($json); # or from_json($json), or Load($json) to: use JSON::MaybeXS; my $json = decode_json($data); =head1 CAVEATS The C method in this module is technically a factory, not a constructor, because the objects it returns will I be blessed into the C class. If you are using an object returned by this module as a Moo(se) attribute, this type constraint code: is 'json' => ( isa => 'JSON::MaybeXS' ); will I do what you expect. Instead, either rely on the C class constant described above, as so: is 'json' => ( isa => JSON::MaybeXS::JSON() ); Alternatively, you can use duck typing: use Moose::Util::TypeConstraints 'duck_type'; is 'json' => ( isa => Object , duck_type([qw/ encode decode /])); =head1 INSTALLATION At installation time, F will attempt to determine if you have a working compiler available, and therefore whether you are able to run XS code. If so, L will be added to the prerequisite list, unless L is already installed at a high enough version. L may also be upgraded to fix any incompatibility issues. Because running XS code is not mandatory and L (which is in perl core) is used as a fallback backend, this module is safe to be used in a suite of code that is fatpacked or installed into a restricted-resource environment. You can also prevent any XS dependencies from being installed by setting C in F options (or in the C environment variable), or using the C<--pp> or C<--pureperl> flags with the L. =head1 AUTHOR mst - Matt S. Trout (cpan:MSTROUT) =head1 CONTRIBUTORS =over 4 =item * Clinton Gormley =item * Karen Etheridge =item * Kieren Diment =back =head1 COPYRIGHT Copyright (c) 2013 the C L and L as listed above. =head1 LICENSE This library is free software and may be distributed under the same terms as perl itself. =cut JSON_MAYBEXS $fatpacked{"JSON/PP.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'JSON_PP'; package JSON::PP; # JSON-2.0 use 5.005; use strict; use Exporter (); BEGIN { @JSON::PP::ISA = ('Exporter') } use overload (); use JSON::PP::Boolean; use Carp (); #use Devel::Peek; $JSON::PP::VERSION = '2.97001'; @JSON::PP::EXPORT = qw(encode_json decode_json from_json to_json); # instead of hash-access, i tried index-access for speed. # but this method is not faster than what i expected. so it will be changed. use constant P_ASCII => 0; use constant P_LATIN1 => 1; use constant P_UTF8 => 2; use constant P_INDENT => 3; use constant P_CANONICAL => 4; use constant P_SPACE_BEFORE => 5; use constant P_SPACE_AFTER => 6; use constant P_ALLOW_NONREF => 7; use constant P_SHRINK => 8; use constant P_ALLOW_BLESSED => 9; use constant P_CONVERT_BLESSED => 10; use constant P_RELAXED => 11; use constant P_LOOSE => 12; use constant P_ALLOW_BIGNUM => 13; use constant P_ALLOW_BAREKEY => 14; use constant P_ALLOW_SINGLEQUOTE => 15; use constant P_ESCAPE_SLASH => 16; use constant P_AS_NONBLESSED => 17; use constant P_ALLOW_UNKNOWN => 18; use constant OLD_PERL => $] < 5.008 ? 1 : 0; use constant USE_B => 0; BEGIN { if (USE_B) { require B; } } BEGIN { my @xs_compati_bit_properties = qw( latin1 ascii utf8 indent canonical space_before space_after allow_nonref shrink allow_blessed convert_blessed relaxed allow_unknown ); my @pp_bit_properties = qw( allow_singlequote allow_bignum loose allow_barekey escape_slash as_nonblessed ); # Perl version check, Unicode handling is enabled? # Helper module sets @JSON::PP::_properties. if ( OLD_PERL ) { my $helper = $] >= 5.006 ? 'JSON::PP::Compat5006' : 'JSON::PP::Compat5005'; eval qq| require $helper |; if ($@) { Carp::croak $@; } } for my $name (@xs_compati_bit_properties, @pp_bit_properties) { my $property_id = 'P_' . uc($name); eval qq/ sub $name { my \$enable = defined \$_[1] ? \$_[1] : 1; if (\$enable) { \$_[0]->{PROPS}->[$property_id] = 1; } else { \$_[0]->{PROPS}->[$property_id] = 0; } \$_[0]; } sub get_$name { \$_[0]->{PROPS}->[$property_id] ? 1 : ''; } /; } } # Functions my $JSON; # cache sub encode_json ($) { # encode ($JSON ||= __PACKAGE__->new->utf8)->encode(@_); } sub decode_json { # decode ($JSON ||= __PACKAGE__->new->utf8)->decode(@_); } # Obsoleted sub to_json($) { Carp::croak ("JSON::PP::to_json has been renamed to encode_json."); } sub from_json($) { Carp::croak ("JSON::PP::from_json has been renamed to decode_json."); } # Methods sub new { my $class = shift; my $self = { max_depth => 512, max_size => 0, indent_length => 3, }; bless $self, $class; } sub encode { return $_[0]->PP_encode_json($_[1]); } sub decode { return $_[0]->PP_decode_json($_[1], 0x00000000); } sub decode_prefix { return $_[0]->PP_decode_json($_[1], 0x00000001); } # accessor # pretty printing sub pretty { my ($self, $v) = @_; my $enable = defined $v ? $v : 1; if ($enable) { # indent_length(3) for JSON::XS compatibility $self->indent(1)->space_before(1)->space_after(1); } else { $self->indent(0)->space_before(0)->space_after(0); } $self; } # etc sub max_depth { my $max = defined $_[1] ? $_[1] : 0x80000000; $_[0]->{max_depth} = $max; $_[0]; } sub get_max_depth { $_[0]->{max_depth}; } sub max_size { my $max = defined $_[1] ? $_[1] : 0; $_[0]->{max_size} = $max; $_[0]; } sub get_max_size { $_[0]->{max_size}; } sub filter_json_object { if (defined $_[1] and ref $_[1] eq 'CODE') { $_[0]->{cb_object} = $_[1]; } else { delete $_[0]->{cb_object}; } $_[0]->{F_HOOK} = ($_[0]->{cb_object} or $_[0]->{cb_sk_object}) ? 1 : 0; $_[0]; } sub filter_json_single_key_object { if (@_ == 1 or @_ > 3) { Carp::croak("Usage: JSON::PP::filter_json_single_key_object(self, key, callback = undef)"); } if (defined $_[2] and ref $_[2] eq 'CODE') { $_[0]->{cb_sk_object}->{$_[1]} = $_[2]; } else { delete $_[0]->{cb_sk_object}->{$_[1]}; delete $_[0]->{cb_sk_object} unless %{$_[0]->{cb_sk_object} || {}}; } $_[0]->{F_HOOK} = ($_[0]->{cb_object} or $_[0]->{cb_sk_object}) ? 1 : 0; $_[0]; } sub indent_length { if (!defined $_[1] or $_[1] > 15 or $_[1] < 0) { Carp::carp "The acceptable range of indent_length() is 0 to 15."; } else { $_[0]->{indent_length} = $_[1]; } $_[0]; } sub get_indent_length { $_[0]->{indent_length}; } sub sort_by { $_[0]->{sort_by} = defined $_[1] ? $_[1] : 1; $_[0]; } sub allow_bigint { Carp::carp("allow_bigint() is obsoleted. use allow_bignum() instead."); $_[0]->allow_bignum; } ############################### ### ### Perl => JSON ### { # Convert my $max_depth; my $indent; my $ascii; my $latin1; my $utf8; my $space_before; my $space_after; my $canonical; my $allow_blessed; my $convert_blessed; my $indent_length; my $escape_slash; my $bignum; my $as_nonblessed; my $depth; my $indent_count; my $keysort; sub PP_encode_json { my $self = shift; my $obj = shift; $indent_count = 0; $depth = 0; my $props = $self->{PROPS}; ($ascii, $latin1, $utf8, $indent, $canonical, $space_before, $space_after, $allow_blessed, $convert_blessed, $escape_slash, $bignum, $as_nonblessed) = @{$props}[P_ASCII .. P_SPACE_AFTER, P_ALLOW_BLESSED, P_CONVERT_BLESSED, P_ESCAPE_SLASH, P_ALLOW_BIGNUM, P_AS_NONBLESSED]; ($max_depth, $indent_length) = @{$self}{qw/max_depth indent_length/}; $keysort = $canonical ? sub { $a cmp $b } : undef; if ($self->{sort_by}) { $keysort = ref($self->{sort_by}) eq 'CODE' ? $self->{sort_by} : $self->{sort_by} =~ /\D+/ ? $self->{sort_by} : sub { $a cmp $b }; } encode_error("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)") if(!ref $obj and !$props->[ P_ALLOW_NONREF ]); my $str = $self->object_to_json($obj); $str .= "\n" if ( $indent ); # JSON::XS 2.26 compatible unless ($ascii or $latin1 or $utf8) { utf8::upgrade($str); } if ($props->[ P_SHRINK ]) { utf8::downgrade($str, 1); } return $str; } sub object_to_json { my ($self, $obj) = @_; my $type = ref($obj); if($type eq 'HASH'){ return $self->hash_to_json($obj); } elsif($type eq 'ARRAY'){ return $self->array_to_json($obj); } elsif ($type) { # blessed object? if (blessed($obj)) { return $self->value_to_json($obj) if ( $obj->isa('JSON::PP::Boolean') ); if ( $convert_blessed and $obj->can('TO_JSON') ) { my $result = $obj->TO_JSON(); if ( defined $result and ref( $result ) ) { if ( refaddr( $obj ) eq refaddr( $result ) ) { encode_error( sprintf( "%s::TO_JSON method returned same object as was passed instead of a new one", ref $obj ) ); } } return $self->object_to_json( $result ); } return "$obj" if ( $bignum and _is_bignum($obj) ); if ($allow_blessed) { return $self->blessed_to_json($obj) if ($as_nonblessed); # will be removed. return 'null'; } encode_error( sprintf("encountered object '%s', but neither allow_blessed " . "nor convert_blessed settings are enabled", $obj) ); } else { return $self->value_to_json($obj); } } else{ return $self->value_to_json($obj); } } sub hash_to_json { my ($self, $obj) = @_; my @res; encode_error("json text or perl structure exceeds maximum nesting level (max_depth set too low?)") if (++$depth > $max_depth); my ($pre, $post) = $indent ? $self->_up_indent() : ('', ''); my $del = ($space_before ? ' ' : '') . ':' . ($space_after ? ' ' : ''); for my $k ( _sort( $obj ) ) { if ( OLD_PERL ) { utf8::decode($k) } # key for Perl 5.6 / be optimized push @res, $self->string_to_json( $k ) . $del . ( ref $obj->{$k} ? $self->object_to_json( $obj->{$k} ) : $self->value_to_json( $obj->{$k} ) ); } --$depth; $self->_down_indent() if ($indent); return '{}' unless @res; return '{' . $pre . join( ",$pre", @res ) . $post . '}'; } sub array_to_json { my ($self, $obj) = @_; my @res; encode_error("json text or perl structure exceeds maximum nesting level (max_depth set too low?)") if (++$depth > $max_depth); my ($pre, $post) = $indent ? $self->_up_indent() : ('', ''); for my $v (@$obj){ push @res, ref($v) ? $self->object_to_json($v) : $self->value_to_json($v); } --$depth; $self->_down_indent() if ($indent); return '[]' unless @res; return '[' . $pre . join( ",$pre", @res ) . $post . ']'; } sub _looks_like_number { my $value = shift; if (USE_B) { my $b_obj = B::svref_2object(\$value); my $flags = $b_obj->FLAGS; return 1 if $flags & ( B::SVp_IOK() | B::SVp_NOK() ) and !( $flags & B::SVp_POK() ); return; } else { no warnings 'numeric'; # if the utf8 flag is on, it almost certainly started as a string return if utf8::is_utf8($value); # detect numbers # string & "" -> "" # number & "" -> 0 (with warning) # nan and inf can detect as numbers, so check with * 0 return unless length((my $dummy = "") & $value); return unless 0 + $value eq $value; return 1 if $value * 0 == 0; return -1; # inf/nan } } sub value_to_json { my ($self, $value) = @_; return 'null' if(!defined $value); my $type = ref($value); if (!$type) { if (_looks_like_number($value)) { return $value; } return $self->string_to_json($value); } elsif( blessed($value) and $value->isa('JSON::PP::Boolean') ){ return $$value == 1 ? 'true' : 'false'; } else { if ((overload::StrVal($value) =~ /=(\w+)/)[0]) { return $self->value_to_json("$value"); } if ($type eq 'SCALAR' and defined $$value) { return $$value eq '1' ? 'true' : $$value eq '0' ? 'false' : $self->{PROPS}->[ P_ALLOW_UNKNOWN ] ? 'null' : encode_error("cannot encode reference to scalar"); } if ( $self->{PROPS}->[ P_ALLOW_UNKNOWN ] ) { return 'null'; } else { if ( $type eq 'SCALAR' or $type eq 'REF' ) { encode_error("cannot encode reference to scalar"); } else { encode_error("encountered $value, but JSON can only represent references to arrays or hashes"); } } } } my %esc = ( "\n" => '\n', "\r" => '\r', "\t" => '\t', "\f" => '\f', "\b" => '\b', "\"" => '\"', "\\" => '\\\\', "\'" => '\\\'', ); sub string_to_json { my ($self, $arg) = @_; $arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g; $arg =~ s/\//\\\//g if ($escape_slash); $arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg; if ($ascii) { $arg = JSON_PP_encode_ascii($arg); } if ($latin1) { $arg = JSON_PP_encode_latin1($arg); } if ($utf8) { utf8::encode($arg); } return '"' . $arg . '"'; } sub blessed_to_json { my $reftype = reftype($_[1]) || ''; if ($reftype eq 'HASH') { return $_[0]->hash_to_json($_[1]); } elsif ($reftype eq 'ARRAY') { return $_[0]->array_to_json($_[1]); } else { return 'null'; } } sub encode_error { my $error = shift; Carp::croak "$error"; } sub _sort { defined $keysort ? (sort $keysort (keys %{$_[0]})) : keys %{$_[0]}; } sub _up_indent { my $self = shift; my $space = ' ' x $indent_length; my ($pre,$post) = ('',''); $post = "\n" . $space x $indent_count; $indent_count++; $pre = "\n" . $space x $indent_count; return ($pre,$post); } sub _down_indent { $indent_count--; } sub PP_encode_box { { depth => $depth, indent_count => $indent_count, }; } } # Convert sub _encode_ascii { join('', map { $_ <= 127 ? chr($_) : $_ <= 65535 ? sprintf('\u%04x', $_) : sprintf('\u%x\u%x', _encode_surrogates($_)); } unpack('U*', $_[0]) ); } sub _encode_latin1 { join('', map { $_ <= 255 ? chr($_) : $_ <= 65535 ? sprintf('\u%04x', $_) : sprintf('\u%x\u%x', _encode_surrogates($_)); } unpack('U*', $_[0]) ); } sub _encode_surrogates { # from perlunicode my $uni = $_[0] - 0x10000; return ($uni / 0x400 + 0xD800, $uni % 0x400 + 0xDC00); } sub _is_bignum { $_[0]->isa('Math::BigInt') or $_[0]->isa('Math::BigFloat'); } # # JSON => Perl # my $max_intsize; BEGIN { my $checkint = 1111; for my $d (5..64) { $checkint .= 1; my $int = eval qq| $checkint |; if ($int =~ /[eE]/) { $max_intsize = $d - 1; last; } } } { # PARSE my %escapes = ( # by Jeremy Muhlich b => "\x8", t => "\x9", n => "\xA", f => "\xC", r => "\xD", '\\' => '\\', '"' => '"', '/' => '/', ); my $text; # json data my $at; # offset my $ch; # first character my $len; # text length (changed according to UTF8 or NON UTF8) # INTERNAL my $depth; # nest counter my $encoding; # json text encoding my $is_valid_utf8; # temp variable my $utf8_len; # utf8 byte length # FLAGS my $utf8; # must be utf8 my $max_depth; # max nest number of objects and arrays my $max_size; my $relaxed; my $cb_object; my $cb_sk_object; my $F_HOOK; my $allow_bignum; # using Math::BigInt/BigFloat my $singlequote; # loosely quoting my $loose; # my $allow_barekey; # bareKey sub _detect_utf_encoding { my $text = shift; my @octets = unpack('C4', $text); return 'unknown' unless defined $octets[3]; return ( $octets[0] and $octets[1]) ? 'UTF-8' : (!$octets[0] and $octets[1]) ? 'UTF-16BE' : (!$octets[0] and !$octets[1]) ? 'UTF-32BE' : ( $octets[2] ) ? 'UTF-16LE' : (!$octets[2] ) ? 'UTF-32LE' : 'unknown'; } sub PP_decode_json { my ($self, $want_offset); ($self, $text, $want_offset) = @_; ($at, $ch, $depth) = (0, '', 0); if ( !defined $text or ref $text ) { decode_error("malformed JSON string, neither array, object, number, string or atom"); } my $props = $self->{PROPS}; ($utf8, $relaxed, $loose, $allow_bignum, $allow_barekey, $singlequote) = @{$props}[P_UTF8, P_RELAXED, P_LOOSE .. P_ALLOW_SINGLEQUOTE]; if ( $utf8 ) { $encoding = _detect_utf_encoding($text); if ($encoding ne 'UTF-8' and $encoding ne 'unknown') { require Encode; Encode::from_to($text, $encoding, 'utf-8'); } else { utf8::downgrade( $text, 1 ) or Carp::croak("Wide character in subroutine entry"); } } else { utf8::upgrade( $text ); utf8::encode( $text ); } $len = length $text; ($max_depth, $max_size, $cb_object, $cb_sk_object, $F_HOOK) = @{$self}{qw/max_depth max_size cb_object cb_sk_object F_HOOK/}; if ($max_size > 1) { use bytes; my $bytes = length $text; decode_error( sprintf("attempted decode of JSON text of %s bytes size, but max_size is set to %s" , $bytes, $max_size), 1 ) if ($bytes > $max_size); } white(); # remove head white space decode_error("malformed JSON string, neither array, object, number, string or atom") unless defined $ch; # Is there a first character for JSON structure? my $result = value(); if ( !$props->[ P_ALLOW_NONREF ] and !ref $result ) { decode_error( 'JSON text must be an object or array (but found number, string, true, false or null,' . ' use allow_nonref to allow this)', 1); } Carp::croak('something wrong.') if $len < $at; # we won't arrive here. my $consumed = defined $ch ? $at - 1 : $at; # consumed JSON text length white(); # remove tail white space return ( $result, $consumed ) if $want_offset; # all right if decode_prefix decode_error("garbage after JSON object") if defined $ch; $result; } sub next_chr { return $ch = undef if($at >= $len); $ch = substr($text, $at++, 1); } sub value { white(); return if(!defined $ch); return object() if($ch eq '{'); return array() if($ch eq '['); return string() if($ch eq '"' or ($singlequote and $ch eq "'")); return number() if($ch =~ /[0-9]/ or $ch eq '-'); return word(); } sub string { my $utf16; my $is_utf8; ($is_valid_utf8, $utf8_len) = ('', 0); my $s = ''; # basically UTF8 flag on if($ch eq '"' or ($singlequote and $ch eq "'")){ my $boundChar = $ch; OUTER: while( defined(next_chr()) ){ if($ch eq $boundChar){ next_chr(); if ($utf16) { decode_error("missing low surrogate character in surrogate pair"); } utf8::decode($s) if($is_utf8); return $s; } elsif($ch eq '\\'){ next_chr(); if(exists $escapes{$ch}){ $s .= $escapes{$ch}; } elsif($ch eq 'u'){ # UNICODE handling my $u = ''; for(1..4){ $ch = next_chr(); last OUTER if($ch !~ /[0-9a-fA-F]/); $u .= $ch; } # U+D800 - U+DBFF if ($u =~ /^[dD][89abAB][0-9a-fA-F]{2}/) { # UTF-16 high surrogate? $utf16 = $u; } # U+DC00 - U+DFFF elsif ($u =~ /^[dD][c-fC-F][0-9a-fA-F]{2}/) { # UTF-16 low surrogate? unless (defined $utf16) { decode_error("missing high surrogate character in surrogate pair"); } $is_utf8 = 1; $s .= JSON_PP_decode_surrogates($utf16, $u) || next; $utf16 = undef; } else { if (defined $utf16) { decode_error("surrogate pair expected"); } if ( ( my $hex = hex( $u ) ) > 127 ) { $is_utf8 = 1; $s .= JSON_PP_decode_unicode($u) || next; } else { $s .= chr $hex; } } } else{ unless ($loose) { $at -= 2; decode_error('illegal backslash escape sequence in string'); } $s .= $ch; } } else{ if ( ord $ch > 127 ) { unless( $ch = is_valid_utf8($ch) ) { $at -= 1; decode_error("malformed UTF-8 character in JSON string"); } else { $at += $utf8_len - 1; } $is_utf8 = 1; } if (!$loose) { if ($ch =~ /[\x00-\x1f\x22\x5c]/) { # '/' ok $at--; decode_error('invalid character encountered while parsing JSON string'); } } $s .= $ch; } } } decode_error("unexpected end of string while parsing JSON string"); } sub white { while( defined $ch ){ if($ch eq '' or $ch =~ /\A[ \t\r\n]\z/){ next_chr(); } elsif($relaxed and $ch eq '/'){ next_chr(); if(defined $ch and $ch eq '/'){ 1 while(defined(next_chr()) and $ch ne "\n" and $ch ne "\r"); } elsif(defined $ch and $ch eq '*'){ next_chr(); while(1){ if(defined $ch){ if($ch eq '*'){ if(defined(next_chr()) and $ch eq '/'){ next_chr(); last; } } else{ next_chr(); } } else{ decode_error("Unterminated comment"); } } next; } else{ $at--; decode_error("malformed JSON string, neither array, object, number, string or atom"); } } else{ if ($relaxed and $ch eq '#') { # correctly? pos($text) = $at; $text =~ /\G([^\n]*(?:\r\n|\r|\n|$))/g; $at = pos($text); next_chr; next; } last; } } } sub array { my $a = $_[0] || []; # you can use this code to use another array ref object. decode_error('json text or perl structure exceeds maximum nesting level (max_depth set too low?)') if (++$depth > $max_depth); next_chr(); white(); if(defined $ch and $ch eq ']'){ --$depth; next_chr(); return $a; } else { while(defined($ch)){ push @$a, value(); white(); if (!defined $ch) { last; } if($ch eq ']'){ --$depth; next_chr(); return $a; } if($ch ne ','){ last; } next_chr(); white(); if ($relaxed and $ch eq ']') { --$depth; next_chr(); return $a; } } } $at-- if defined $ch and $ch ne ''; decode_error(", or ] expected while parsing array"); } sub object { my $o = $_[0] || {}; # you can use this code to use another hash ref object. my $k; decode_error('json text or perl structure exceeds maximum nesting level (max_depth set too low?)') if (++$depth > $max_depth); next_chr(); white(); if(defined $ch and $ch eq '}'){ --$depth; next_chr(); if ($F_HOOK) { return _json_object_hook($o); } return $o; } else { while (defined $ch) { $k = ($allow_barekey and $ch ne '"' and $ch ne "'") ? bareKey() : string(); white(); if(!defined $ch or $ch ne ':'){ $at--; decode_error("':' expected"); } next_chr(); $o->{$k} = value(); white(); last if (!defined $ch); if($ch eq '}'){ --$depth; next_chr(); if ($F_HOOK) { return _json_object_hook($o); } return $o; } if($ch ne ','){ last; } next_chr(); white(); if ($relaxed and $ch eq '}') { --$depth; next_chr(); if ($F_HOOK) { return _json_object_hook($o); } return $o; } } } $at-- if defined $ch and $ch ne ''; decode_error(", or } expected while parsing object/hash"); } sub bareKey { # doesn't strictly follow Standard ECMA-262 3rd Edition my $key; while($ch =~ /[^\x00-\x23\x25-\x2F\x3A-\x40\x5B-\x5E\x60\x7B-\x7F]/){ $key .= $ch; next_chr(); } return $key; } sub word { my $word = substr($text,$at-1,4); if($word eq 'true'){ $at += 3; next_chr; return $JSON::PP::true; } elsif($word eq 'null'){ $at += 3; next_chr; return undef; } elsif($word eq 'fals'){ $at += 3; if(substr($text,$at,1) eq 'e'){ $at++; next_chr; return $JSON::PP::false; } } $at--; # for decode_error report decode_error("'null' expected") if ($word =~ /^n/); decode_error("'true' expected") if ($word =~ /^t/); decode_error("'false' expected") if ($word =~ /^f/); decode_error("malformed JSON string, neither array, object, number, string or atom"); } sub number { my $n = ''; my $v; my $is_dec; my $is_exp; if($ch eq '-'){ $n = '-'; next_chr; if (!defined $ch or $ch !~ /\d/) { decode_error("malformed number (no digits after initial minus)"); } } # According to RFC4627, hex or oct digits are invalid. if($ch eq '0'){ my $peek = substr($text,$at,1); if($peek =~ /^[0-9a-dfA-DF]/){ # e may be valid (exponential) decode_error("malformed number (leading zero must not be followed by another digit)"); } $n .= $ch; next_chr; } while(defined $ch and $ch =~ /\d/){ $n .= $ch; next_chr; } if(defined $ch and $ch eq '.'){ $n .= '.'; $is_dec = 1; next_chr; if (!defined $ch or $ch !~ /\d/) { decode_error("malformed number (no digits after decimal point)"); } else { $n .= $ch; } while(defined(next_chr) and $ch =~ /\d/){ $n .= $ch; } } if(defined $ch and ($ch eq 'e' or $ch eq 'E')){ $n .= $ch; $is_exp = 1; next_chr; if(defined($ch) and ($ch eq '+' or $ch eq '-')){ $n .= $ch; next_chr; if (!defined $ch or $ch =~ /\D/) { decode_error("malformed number (no digits after exp sign)"); } $n .= $ch; } elsif(defined($ch) and $ch =~ /\d/){ $n .= $ch; } else { decode_error("malformed number (no digits after exp sign)"); } while(defined(next_chr) and $ch =~ /\d/){ $n .= $ch; } } $v .= $n; if ($is_dec or $is_exp) { if ($allow_bignum) { require Math::BigFloat; return Math::BigFloat->new($v); } } else { if (length $v > $max_intsize) { if ($allow_bignum) { # from Adam Sussman require Math::BigInt; return Math::BigInt->new($v); } else { return "$v"; } } } return $is_dec ? $v/1.0 : 0+$v; } sub is_valid_utf8 { $utf8_len = $_[0] =~ /[\x00-\x7F]/ ? 1 : $_[0] =~ /[\xC2-\xDF]/ ? 2 : $_[0] =~ /[\xE0-\xEF]/ ? 3 : $_[0] =~ /[\xF0-\xF4]/ ? 4 : 0 ; return unless $utf8_len; my $is_valid_utf8 = substr($text, $at - 1, $utf8_len); return ( $is_valid_utf8 =~ /^(?: [\x00-\x7F] |[\xC2-\xDF][\x80-\xBF] |[\xE0][\xA0-\xBF][\x80-\xBF] |[\xE1-\xEC][\x80-\xBF][\x80-\xBF] |[\xED][\x80-\x9F][\x80-\xBF] |[\xEE-\xEF][\x80-\xBF][\x80-\xBF] |[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] )$/x ) ? $is_valid_utf8 : ''; } sub decode_error { my $error = shift; my $no_rep = shift; my $str = defined $text ? substr($text, $at) : ''; my $mess = ''; my $type = 'U*'; if ( OLD_PERL ) { my $type = $] < 5.006 ? 'C*' : utf8::is_utf8( $str ) ? 'U*' # 5.6 : 'C*' ; } for my $c ( unpack( $type, $str ) ) { # emulate pv_uni_display() ? $mess .= $c == 0x07 ? '\a' : $c == 0x09 ? '\t' : $c == 0x0a ? '\n' : $c == 0x0d ? '\r' : $c == 0x0c ? '\f' : $c < 0x20 ? sprintf('\x{%x}', $c) : $c == 0x5c ? '\\\\' : $c < 0x80 ? chr($c) : sprintf('\x{%x}', $c) ; if ( length $mess >= 20 ) { $mess .= '...'; last; } } unless ( length $mess ) { $mess = '(end of string)'; } Carp::croak ( $no_rep ? "$error" : "$error, at character offset $at (before \"$mess\")" ); } sub _json_object_hook { my $o = $_[0]; my @ks = keys %{$o}; if ( $cb_sk_object and @ks == 1 and exists $cb_sk_object->{ $ks[0] } and ref $cb_sk_object->{ $ks[0] } ) { my @val = $cb_sk_object->{ $ks[0] }->( $o->{$ks[0]} ); if (@val == 1) { return $val[0]; } } my @val = $cb_object->($o) if ($cb_object); if (@val == 0 or @val > 1) { return $o; } else { return $val[0]; } } sub PP_decode_box { { text => $text, at => $at, ch => $ch, len => $len, depth => $depth, encoding => $encoding, is_valid_utf8 => $is_valid_utf8, }; } } # PARSE sub _decode_surrogates { # from perlunicode my $uni = 0x10000 + (hex($_[0]) - 0xD800) * 0x400 + (hex($_[1]) - 0xDC00); my $un = pack('U*', $uni); utf8::encode( $un ); return $un; } sub _decode_unicode { my $un = pack('U', hex shift); utf8::encode( $un ); return $un; } # # Setup for various Perl versions (the code from JSON::PP58) # BEGIN { unless ( defined &utf8::is_utf8 ) { require Encode; *utf8::is_utf8 = *Encode::is_utf8; } if ( !OLD_PERL ) { *JSON::PP::JSON_PP_encode_ascii = \&_encode_ascii; *JSON::PP::JSON_PP_encode_latin1 = \&_encode_latin1; *JSON::PP::JSON_PP_decode_surrogates = \&_decode_surrogates; *JSON::PP::JSON_PP_decode_unicode = \&_decode_unicode; if ($] < 5.008003) { # join() in 5.8.0 - 5.8.2 is broken. package JSON::PP; require subs; subs->import('join'); eval q| sub join { return '' if (@_ < 2); my $j = shift; my $str = shift; for (@_) { $str .= $j . $_; } return $str; } |; } } sub JSON::PP::incr_parse { local $Carp::CarpLevel = 1; ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_parse( @_ ); } sub JSON::PP::incr_skip { ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_skip; } sub JSON::PP::incr_reset { ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_reset; } eval q{ sub JSON::PP::incr_text : lvalue { $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new; if ( $_[0]->{_incr_parser}->{incr_pos} ) { Carp::croak("incr_text cannot be called when the incremental parser already started parsing"); } $_[0]->{_incr_parser}->{incr_text}; } } if ( $] >= 5.006 ); } # Setup for various Perl versions (the code from JSON::PP58) ############################### # Utilities # BEGIN { eval 'require Scalar::Util'; unless($@){ *JSON::PP::blessed = \&Scalar::Util::blessed; *JSON::PP::reftype = \&Scalar::Util::reftype; *JSON::PP::refaddr = \&Scalar::Util::refaddr; } else{ # This code is from Scalar::Util. # warn $@; eval 'sub UNIVERSAL::a_sub_not_likely_to_be_here { ref($_[0]) }'; *JSON::PP::blessed = sub { local($@, $SIG{__DIE__}, $SIG{__WARN__}); ref($_[0]) ? eval { $_[0]->a_sub_not_likely_to_be_here } : undef; }; require B; my %tmap = qw( B::NULL SCALAR B::HV HASH B::AV ARRAY B::CV CODE B::IO IO B::GV GLOB B::REGEXP REGEXP ); *JSON::PP::reftype = sub { my $r = shift; return undef unless length(ref($r)); my $t = ref(B::svref_2object($r)); return exists $tmap{$t} ? $tmap{$t} : length(ref($$r)) ? 'REF' : 'SCALAR'; }; *JSON::PP::refaddr = sub { return undef unless length(ref($_[0])); my $addr; if(defined(my $pkg = blessed($_[0]))) { $addr .= bless $_[0], 'Scalar::Util::Fake'; bless $_[0], $pkg; } else { $addr .= $_[0] } $addr =~ /0x(\w+)/; local $^W; #no warnings 'portable'; hex($1); } } } # shamelessly copied and modified from JSON::XS code. $JSON::PP::true = do { bless \(my $dummy = 1), "JSON::PP::Boolean" }; $JSON::PP::false = do { bless \(my $dummy = 0), "JSON::PP::Boolean" }; sub is_bool { blessed $_[0] and $_[0]->isa("JSON::PP::Boolean"); } sub true { $JSON::PP::true } sub false { $JSON::PP::false } sub null { undef; } ############################### package JSON::PP::IncrParser; use strict; use constant INCR_M_WS => 0; # initial whitespace skipping use constant INCR_M_STR => 1; # inside string use constant INCR_M_BS => 2; # inside backslash use constant INCR_M_JSON => 3; # outside anything, count nesting use constant INCR_M_C0 => 4; use constant INCR_M_C1 => 5; $JSON::PP::IncrParser::VERSION = '1.01'; sub new { my ( $class ) = @_; bless { incr_nest => 0, incr_text => undef, incr_pos => 0, incr_mode => 0, }, $class; } sub incr_parse { my ( $self, $coder, $text ) = @_; $self->{incr_text} = '' unless ( defined $self->{incr_text} ); if ( defined $text ) { if ( utf8::is_utf8( $text ) and !utf8::is_utf8( $self->{incr_text} ) ) { utf8::upgrade( $self->{incr_text} ) ; utf8::decode( $self->{incr_text} ) ; } $self->{incr_text} .= $text; } if ( defined wantarray ) { my $max_size = $coder->get_max_size; my $p = $self->{incr_pos}; my @ret; { do { unless ( $self->{incr_nest} <= 0 and $self->{incr_mode} == INCR_M_JSON ) { $self->_incr_parse( $coder ); if ( $max_size and $self->{incr_pos} > $max_size ) { Carp::croak("attempted decode of JSON text of $self->{incr_pos} bytes size, but max_size is set to $max_size"); } unless ( $self->{incr_nest} <= 0 and $self->{incr_mode} == INCR_M_JSON ) { # as an optimisation, do not accumulate white space in the incr buffer if ( $self->{incr_mode} == INCR_M_WS and $self->{incr_pos} ) { $self->{incr_pos} = 0; $self->{incr_text} = ''; } last; } } my ($obj, $offset) = $coder->PP_decode_json( $self->{incr_text}, 0x00000001 ); push @ret, $obj; use bytes; $self->{incr_text} = substr( $self->{incr_text}, $offset || 0 ); $self->{incr_pos} = 0; $self->{incr_nest} = 0; $self->{incr_mode} = 0; last unless wantarray; } while ( wantarray ); } if ( wantarray ) { return @ret; } else { # in scalar context return $ret[0] ? $ret[0] : undef; } } } sub _incr_parse { my ($self, $coder) = @_; my $text = $self->{incr_text}; my $len = length $text; my $p = $self->{incr_pos}; INCR_PARSE: while ( $len > $p ) { my $s = substr( $text, $p, 1 ); last INCR_PARSE unless defined $s; my $mode = $self->{incr_mode}; if ( $mode == INCR_M_WS ) { while ( $len > $p ) { $s = substr( $text, $p, 1 ); last INCR_PARSE unless defined $s; if ( ord($s) > 0x20 ) { if ( $s eq '#' ) { $self->{incr_mode} = INCR_M_C0; redo INCR_PARSE; } else { $self->{incr_mode} = INCR_M_JSON; redo INCR_PARSE; } } $p++; } } elsif ( $mode == INCR_M_BS ) { $p++; $self->{incr_mode} = INCR_M_STR; redo INCR_PARSE; } elsif ( $mode == INCR_M_C0 or $mode == INCR_M_C1 ) { while ( $len > $p ) { $s = substr( $text, $p, 1 ); last INCR_PARSE unless defined $s; if ( $s eq "\n" ) { $self->{incr_mode} = $self->{incr_mode} == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON; last; } $p++; } next; } elsif ( $mode == INCR_M_STR ) { while ( $len > $p ) { $s = substr( $text, $p, 1 ); last INCR_PARSE unless defined $s; if ( $s eq '"' ) { $p++; $self->{incr_mode} = INCR_M_JSON; last INCR_PARSE unless $self->{incr_nest}; redo INCR_PARSE; } elsif ( $s eq '\\' ) { $p++; if ( !defined substr($text, $p, 1) ) { $self->{incr_mode} = INCR_M_BS; last INCR_PARSE; } } $p++; } } elsif ( $mode == INCR_M_JSON ) { while ( $len > $p ) { $s = substr( $text, $p++, 1 ); if ( $s eq "\x00" ) { $p--; last INCR_PARSE; } elsif ( $s eq "\x09" or $s eq "\x0a" or $s eq "\x0d" or $s eq "\x20" ) { if ( !$self->{incr_nest} ) { $p--; # do not eat the whitespace, let the next round do it last INCR_PARSE; } next; } elsif ( $s eq '"' ) { $self->{incr_mode} = INCR_M_STR; redo INCR_PARSE; } elsif ( $s eq '[' or $s eq '{' ) { if ( ++$self->{incr_nest} > $coder->get_max_depth ) { Carp::croak('json text or perl structure exceeds maximum nesting level (max_depth set too low?)'); } next; } elsif ( $s eq ']' or $s eq '}' ) { if ( --$self->{incr_nest} <= 0 ) { last INCR_PARSE; } } elsif ( $s eq '#' ) { $self->{incr_mode} = INCR_M_C1; redo INCR_PARSE; } } } } $self->{incr_pos} = $p; $self->{incr_parsing} = $p ? 1 : 0; # for backward compatibility } sub incr_text { if ( $_[0]->{incr_pos} ) { Carp::croak("incr_text cannot be called when the incremental parser already started parsing"); } $_[0]->{incr_text}; } sub incr_skip { my $self = shift; $self->{incr_text} = substr( $self->{incr_text}, $self->{incr_pos} ); $self->{incr_pos} = 0; $self->{incr_mode} = 0; $self->{incr_nest} = 0; } sub incr_reset { my $self = shift; $self->{incr_text} = undef; $self->{incr_pos} = 0; $self->{incr_mode} = 0; $self->{incr_nest} = 0; } ############################### 1; __END__ =pod =head1 NAME JSON::PP - JSON::XS compatible pure-Perl module. =head1 SYNOPSIS use JSON::PP; # exported functions, they croak on error # and expect/generate UTF-8 $utf8_encoded_json_text = encode_json $perl_hash_or_arrayref; $perl_hash_or_arrayref = decode_json $utf8_encoded_json_text; # OO-interface $json = JSON::PP->new->ascii->pretty->allow_nonref; $pretty_printed_json_text = $json->encode( $perl_scalar ); $perl_scalar = $json->decode( $json_text ); # Note that JSON version 2.0 and above will automatically use # JSON::XS or JSON::PP, so you should be able to just: use JSON; =head1 VERSION 2.97001 =head1 DESCRIPTION JSON::PP is a pure perl JSON decoder/encoder (as of RFC4627, which we know is obsolete but we still stick to; see below for an option to support part of RFC7159), and (almost) compatible to much faster L written by Marc Lehmann in C. JSON::PP works as a fallback module when you use L module without having installed JSON::XS. Because of this fallback feature of JSON.pm, JSON::PP tries not to be more JavaScript-friendly than JSON::XS (i.e. not to escape extra characters such as U+2028 and U+2029 nor support RFC7159/ECMA-404), in order for you not to lose such JavaScript-friendliness silently when you use JSON.pm and install JSON::XS for speed or by accident. If you need JavaScript-friendly RFC7159-compliant pure perl module, try L, which is derived from L web framework and is also smaller and faster than JSON::PP. JSON::PP has been in the Perl core since Perl 5.14, mainly for CPAN toolchain modules to parse META.json. =head1 FUNCTIONAL INTERFACE This section is taken from JSON::XS almost verbatim. C and C are exported by default. =head2 encode_json $json_text = encode_json $perl_scalar Converts the given Perl data structure to a UTF-8 encoded, binary string (that is, the string contains octets only). Croaks on error. This function call is functionally identical to: $json_text = JSON::PP->new->utf8->encode($perl_scalar) Except being faster. =head2 decode_json $perl_scalar = decode_json $json_text The opposite of C: expects an UTF-8 (binary) string and tries to parse that as an UTF-8 encoded JSON text, returning the resulting reference. Croaks on error. This function call is functionally identical to: $perl_scalar = JSON::PP->new->utf8->decode($json_text) Except being faster. =head2 JSON::PP::is_bool $is_boolean = JSON::PP::is_bool($scalar) Returns true if the passed scalar represents either JSON::PP::true or JSON::PP::false, two constants that act like C<1> and C<0> respectively and are also used to represent JSON C and C in Perl strings. See L, below, for more information on how JSON values are mapped to Perl. =head1 OBJECT-ORIENTED INTERFACE This section is also taken from JSON::XS. The object oriented interface lets you configure your own encoding or decoding style, within the limits of supported formats. =head2 new $json = JSON::PP->new Creates a new JSON::PP object that can be used to de/encode JSON strings. All boolean flags described below are by default I. The mutators for flags all return the JSON::PP object again and thus calls can be chained: my $json = JSON::PP->new->utf8->space_after->encode({a => [1,2]}) => {"a": [1, 2]} =head2 ascii $json = $json->ascii([$enable]) $enabled = $json->get_ascii If C<$enable> is true (or missing), then the C method will not generate characters outside the code range C<0..127> (which is ASCII). Any Unicode characters outside that range will be escaped using either a single \uXXXX (BMP characters) or a double \uHHHH\uLLLLL escape sequence, as per RFC4627. The resulting encoded JSON text can be treated as a native Unicode string, an ascii-encoded, latin1-encoded or UTF-8 encoded string, or any other superset of ASCII. If C<$enable> is false, then the C method will not escape Unicode characters unless required by the JSON syntax or other flags. This results in a faster and more compact format. See also the section I later in this document. The main use for this flag is to produce JSON texts that can be transmitted over a 7-bit channel, as the encoded JSON texts will not contain any 8 bit characters. JSON::PP->new->ascii(1)->encode([chr 0x10401]) => ["\ud801\udc01"] =head2 latin1 $json = $json->latin1([$enable]) $enabled = $json->get_latin1 If C<$enable> is true (or missing), then the C method will encode the resulting JSON text as latin1 (or iso-8859-1), escaping any characters outside the code range C<0..255>. The resulting string can be treated as a latin1-encoded JSON text or a native Unicode string. The C method will not be affected in any way by this flag, as C by default expects Unicode, which is a strict superset of latin1. If C<$enable> is false, then the C method will not escape Unicode characters unless required by the JSON syntax or other flags. See also the section I later in this document. The main use for this flag is efficiently encoding binary data as JSON text, as most octets will not be escaped, resulting in a smaller encoded size. The disadvantage is that the resulting JSON text is encoded in latin1 (and must correctly be treated as such when storing and transferring), a rare encoding for JSON. It is therefore most useful when you want to store data structures known to contain binary data efficiently in files or databases, not when talking to other JSON encoders/decoders. JSON::PP->new->latin1->encode (["\x{89}\x{abc}"] => ["\x{89}\\u0abc"] # (perl syntax, U+abc escaped, U+89 not) =head2 utf8 $json = $json->utf8([$enable]) $enabled = $json->get_utf8 If C<$enable> is true (or missing), then the C method will encode the JSON result into UTF-8, as required by many protocols, while the C method expects to be handled an UTF-8-encoded string. Please note that UTF-8-encoded strings do not contain any characters outside the range C<0..255>, they are thus useful for bytewise/binary I/O. In future versions, enabling this option might enable autodetection of the UTF-16 and UTF-32 encoding families, as described in RFC4627. If C<$enable> is false, then the C method will return the JSON string as a (non-encoded) Unicode string, while C expects thus a Unicode string. Any decoding or encoding (e.g. to UTF-8 or UTF-16) needs to be done yourself, e.g. using the Encode module. See also the section I later in this document. Example, output UTF-16BE-encoded JSON: use Encode; $jsontext = encode "UTF-16BE", JSON::PP->new->encode ($object); Example, decode UTF-32LE-encoded JSON: use Encode; $object = JSON::PP->new->decode (decode "UTF-32LE", $jsontext); =head2 pretty $json = $json->pretty([$enable]) This enables (or disables) all of the C, C and C (and in the future possibly more) flags in one call to generate the most readable (or most compact) form possible. =head2 indent $json = $json->indent([$enable]) $enabled = $json->get_indent If C<$enable> is true (or missing), then the C method will use a multiline format as output, putting every array member or object/hash key-value pair into its own line, indenting them properly. If C<$enable> is false, no newlines or indenting will be produced, and the resulting JSON text is guaranteed not to contain any C. This setting has no effect when decoding JSON texts. The default indent space length is three. You can use C to change the length. =head2 space_before $json = $json->space_before([$enable]) $enabled = $json->get_space_before If C<$enable> is true (or missing), then the C method will add an extra optional space before the C<:> separating keys from values in JSON objects. If C<$enable> is false, then the C method will not add any extra space at those places. This setting has no effect when decoding JSON texts. You will also most likely combine this setting with C. Example, space_before enabled, space_after and indent disabled: {"key" :"value"} =head2 space_after $json = $json->space_after([$enable]) $enabled = $json->get_space_after If C<$enable> is true (or missing), then the C method will add an extra optional space after the C<:> separating keys from values in JSON objects and extra whitespace after the C<,> separating key-value pairs and array members. If C<$enable> is false, then the C method will not add any extra space at those places. This setting has no effect when decoding JSON texts. Example, space_before and indent disabled, space_after enabled: {"key": "value"} =head2 relaxed $json = $json->relaxed([$enable]) $enabled = $json->get_relaxed If C<$enable> is true (or missing), then C will accept some extensions to normal JSON syntax (see below). C will not be affected in anyway. I. I suggest only to use this option to parse application-specific files written by humans (configuration files, resource files etc.) If C<$enable> is false (the default), then C will only accept valid JSON texts. Currently accepted extensions are: =over 4 =item * list items can have an end-comma JSON I array elements and key-value pairs with commas. This can be annoying if you write JSON texts manually and want to be able to quickly append elements, so this extension accepts comma at the end of such items not just between them: [ 1, 2, <- this comma not normally allowed ] { "k1": "v1", "k2": "v2", <- this comma not normally allowed } =item * shell-style '#'-comments Whenever JSON allows whitespace, shell-style comments are additionally allowed. They are terminated by the first carriage-return or line-feed character, after which more white-space and comments are allowed. [ 1, # this comment not allowed in JSON # neither this one... ] =item * C-style multiple-line '/* */'-comments (JSON::PP only) Whenever JSON allows whitespace, C-style multiple-line comments are additionally allowed. Everything between C and C<*/> is a comment, after which more white-space and comments are allowed. [ 1, /* this comment not allowed in JSON */ /* neither this one... */ ] =item * C++-style one-line '//'-comments (JSON::PP only) Whenever JSON allows whitespace, C++-style one-line comments are additionally allowed. They are terminated by the first carriage-return or line-feed character, after which more white-space and comments are allowed. [ 1, // this comment not allowed in JSON // neither this one... ] =back =head2 canonical $json = $json->canonical([$enable]) $enabled = $json->get_canonical If C<$enable> is true (or missing), then the C method will output JSON objects by sorting their keys. This is adding a comparatively high overhead. If C<$enable> is false, then the C method will output key-value pairs in the order Perl stores them (which will likely change between runs of the same script, and can change even within the same run from 5.18 onwards). This option is useful if you want the same data structure to be encoded as the same JSON text (given the same overall settings). If it is disabled, the same hash might be encoded differently even if contains the same data, as key-value pairs have no inherent ordering in Perl. This setting has no effect when decoding JSON texts. This setting has currently no effect on tied hashes. =head2 allow_nonref $json = $json->allow_nonref([$enable]) $enabled = $json->get_allow_nonref If C<$enable> is true (or missing), then the C method can convert a non-reference into its corresponding string, number or null JSON value, which is an extension to RFC4627. Likewise, C will accept those JSON values instead of croaking. If C<$enable> is false, then the C method will croak if it isn't passed an arrayref or hashref, as JSON texts must either be an object or array. Likewise, C will croak if given something that is not a JSON object or array. Example, encode a Perl scalar as JSON value with enabled C, resulting in an invalid JSON text: JSON::PP->new->allow_nonref->encode ("Hello, World!") => "Hello, World!" =head2 allow_unknown $json = $json->allow_unknown ([$enable]) $enabled = $json->get_allow_unknown If C<$enable> is true (or missing), then C will I throw an exception when it encounters values it cannot represent in JSON (for example, filehandles) but instead will encode a JSON C value. Note that blessed objects are not included here and are handled separately by c. If C<$enable> is false (the default), then C will throw an exception when it encounters anything it cannot encode as JSON. This option does not affect C in any way, and it is recommended to leave it off unless you know your communications partner. =head2 allow_blessed $json = $json->allow_blessed([$enable]) $enabled = $json->get_allow_blessed See L for details. If C<$enable> is true (or missing), then the C method will not barf when it encounters a blessed reference that it cannot convert otherwise. Instead, a JSON C value is encoded instead of the object. If C<$enable> is false (the default), then C will throw an exception when it encounters a blessed object that it cannot convert otherwise. This setting has no effect on C. =head2 convert_blessed $json = $json->convert_blessed([$enable]) $enabled = $json->get_convert_blessed See L for details. If C<$enable> is true (or missing), then C, upon encountering a blessed object, will check for the availability of the C method on the object's class. If found, it will be called in scalar context and the resulting scalar will be encoded instead of the object. The C method may safely call die if it wants. If C returns other blessed objects, those will be handled in the same way. C must take care of not causing an endless recursion cycle (== crash) in this case. The name of C was chosen because other methods called by the Perl core (== not by the user of the object) are usually in upper case letters and to avoid collisions with any C function or method. If C<$enable> is false (the default), then C will not consider this type of conversion. This setting has no effect on C. =head2 filter_json_object $json = $json->filter_json_object([$coderef]) When C<$coderef> is specified, it will be called from C each time it decodes a JSON object. The only argument is a reference to the newly-created hash. If the code references returns a single scalar (which need not be a reference), this value (i.e. a copy of that scalar to avoid aliasing) is inserted into the deserialised data structure. If it returns an empty list (NOTE: I C, which is a valid scalar), the original deserialised hash will be inserted. This setting can slow down decoding considerably. When C<$coderef> is omitted or undefined, any existing callback will be removed and C will not change the deserialised hash in any way. Example, convert all JSON objects into the integer 5: my $js = JSON::PP->new->filter_json_object (sub { 5 }); # returns [5] $js->decode ('[{}]'); # the given subroutine takes a hash reference. # throw an exception because allow_nonref is not enabled # so a lone 5 is not allowed. $js->decode ('{"a":1, "b":2}'); =head2 filter_json_single_key_object $json = $json->filter_json_single_key_object($key [=> $coderef]) Works remotely similar to C, but is only called for JSON objects having a single key named C<$key>. This C<$coderef> is called before the one specified via C, if any. It gets passed the single value in the JSON object. If it returns a single value, it will be inserted into the data structure. If it returns nothing (not even C but the empty list), the callback from C will be called next, as if no single-key callback were specified. If C<$coderef> is omitted or undefined, the corresponding callback will be disabled. There can only ever be one callback for a given key. As this callback gets called less often then the C one, decoding speed will not usually suffer as much. Therefore, single-key objects make excellent targets to serialise Perl objects into, especially as single-key JSON objects are as close to the type-tagged value concept as JSON gets (it's basically an ID/VALUE tuple). Of course, JSON does not support this in any way, so you need to make sure your data never looks like a serialised Perl hash. Typical names for the single object key are C<__class_whatever__>, or C<$__dollars_are_rarely_used__$> or C<}ugly_brace_placement>, or even things like C<__class_md5sum(classname)__>, to reduce the risk of clashing with real hashes. Example, decode JSON objects of the form C<< { "__widget__" => } >> into the corresponding C<< $WIDGET{} >> object: # return whatever is in $WIDGET{5}: JSON::PP ->new ->filter_json_single_key_object (__widget__ => sub { $WIDGET{ $_[0] } }) ->decode ('{"__widget__": 5') # this can be used with a TO_JSON method in some "widget" class # for serialisation to json: sub WidgetBase::TO_JSON { my ($self) = @_; unless ($self->{id}) { $self->{id} = ..get..some..id..; $WIDGET{$self->{id}} = $self; } { __widget__ => $self->{id} } } =head2 shrink $json = $json->shrink([$enable]) $enabled = $json->get_shrink If C<$enable> is true (or missing), the string returned by C will be shrunk (i.e. downgraded if possible). The actual definition of what shrink does might change in future versions, but it will always try to save space at the expense of time. If C<$enable> is false, then JSON::PP does nothing. =head2 max_depth $json = $json->max_depth([$maximum_nesting_depth]) $max_depth = $json->get_max_depth Sets the maximum nesting level (default C<512>) accepted while encoding or decoding. If a higher nesting level is detected in JSON text or a Perl data structure, then the encoder and decoder will stop and croak at that point. Nesting level is defined by number of hash- or arrayrefs that the encoder needs to traverse to reach a given point or the number of C<{> or C<[> characters without their matching closing parenthesis crossed to reach a given character in a string. Setting the maximum depth to one disallows any nesting, so that ensures that the object is only a single hash/object or array. If no argument is given, the highest possible setting will be used, which is rarely useful. See L for more info on why this is useful. =head2 max_size $json = $json->max_size([$maximum_string_size]) $max_size = $json->get_max_size Set the maximum length a JSON text may have (in bytes) where decoding is being attempted. The default is C<0>, meaning no limit. When C is called on a string that is longer then this many bytes, it will not attempt to decode the string but throw an exception. This setting has no effect on C (yet). If no argument is given, the limit check will be deactivated (same as when C<0> is specified). See L for more info on why this is useful. =head2 encode $json_text = $json->encode($perl_scalar) Converts the given Perl value or data structure to its JSON representation. Croaks on error. =head2 decode $perl_scalar = $json->decode($json_text) The opposite of C: expects a JSON text and tries to parse it, returning the resulting simple scalar or reference. Croaks on error. =head2 decode_prefix ($perl_scalar, $characters) = $json->decode_prefix($json_text) This works like the C method, but instead of raising an exception when there is trailing garbage after the first JSON object, it will silently stop parsing there and return the number of characters consumed so far. This is useful if your JSON texts are not delimited by an outer protocol and you need to know where the JSON text ends. JSON::PP->new->decode_prefix ("[1] the tail") => ([1], 3) =head1 FLAGS FOR JSON::PP ONLY The following flags and properties are for JSON::PP only. If you use any of these, you can't make your application run faster by replacing JSON::PP with JSON::XS. If you need these and also speed boost, try L, a fork of JSON::XS by Reini Urban, which supports some of these. =head2 allow_singlequote $json = $json->allow_singlequote([$enable]) $enabled = $json->get_allow_singlequote If C<$enable> is true (or missing), then C will accept invalid JSON texts that contain strings that begin and end with single quotation marks. C will not be affected in anyway. I. I suggest only to use this option to parse application-specific files written by humans (configuration files, resource files etc.) If C<$enable> is false (the default), then C will only accept valid JSON texts. $json->allow_singlequote->decode(qq|{"foo":'bar'}|); $json->allow_singlequote->decode(qq|{'foo':"bar"}|); $json->allow_singlequote->decode(qq|{'foo':'bar'}|); =head2 allow_barekey $json = $json->allow_barekey([$enable]) $enabled = $json->get_allow_barekey If C<$enable> is true (or missing), then C will accept invalid JSON texts that contain JSON objects whose names don't begin and end with quotation marks. C will not be affected in anyway. I. I suggest only to use this option to parse application-specific files written by humans (configuration files, resource files etc.) If C<$enable> is false (the default), then C will only accept valid JSON texts. $json->allow_barekey->decode(qq|{foo:"bar"}|); =head2 allow_bignum $json = $json->allow_bignum([$enable]) $enabled = $json->get_allow_bignum If C<$enable> is true (or missing), then C will convert big integers Perl cannot handle as integer into L objects and convert floating numbers into L objects. C will convert C and C objects into JSON numbers. $json->allow_nonref->allow_bignum; $bigfloat = $json->decode('2.000000000000000000000000001'); print $json->encode($bigfloat); # => 2.000000000000000000000000001 See also L. =head2 loose $json = $json->loose([$enable]) $enabled = $json->get_loose If C<$enable> is true (or missing), then C will accept invalid JSON texts that contain unescaped [\x00-\x1f\x22\x5c] characters. C will not be affected in anyway. I. I suggest only to use this option to parse application-specific files written by humans (configuration files, resource files etc.) If C<$enable> is false (the default), then C will only accept valid JSON texts. $json->loose->decode(qq|["abc def"]|); =head2 escape_slash $json = $json->escape_slash([$enable]) $enabled = $json->get_escape_slash If C<$enable> is true (or missing), then C will explicitly escape I (solidus; C) characters to reduce the risk of XSS (cross site scripting) that may be caused by C<< >> in a JSON text, with the cost of bloating the size of JSON texts. This option may be useful when you embed JSON in HTML, but embedding arbitrary JSON in HTML (by some HTML template toolkit or by string interpolation) is risky in general. You must escape necessary characters in correct order, depending on the context. C will not be affected in anyway. =head2 indent_length $json = $json->indent_length($number_of_spaces) $length = $json->get_indent_length This option is only useful when you also enable C or C. JSON::XS indents with three spaces when you C (if requested by C or C), and the number cannot be changed. JSON::PP allows you to change/get the number of indent spaces with these mutator/accessor. The default number of spaces is three (the same as JSON::XS), and the acceptable range is from C<0> (no indentation; it'd be better to disable indentation by C) to C<15>. =head2 sort_by $json = $json->sort_by($code_ref) $json = $json->sort_by($subroutine_name) If you just want to sort keys (names) in JSON objects when you C, enable C option (see above) that allows you to sort object keys alphabetically. If you do need to sort non-alphabetically for whatever reasons, you can give a code reference (or a subroutine name) to C, then the argument will be passed to Perl's C built-in function. As the sorting is done in the JSON::PP scope, you usually need to prepend C to the subroutine name, and the special variables C<$a> and C<$b> used in the subrontine used by C function. Example: my %ORDER = (id => 1, class => 2, name => 3); $json->sort_by(sub { ($ORDER{$JSON::PP::a} // 999) <=> ($ORDER{$JSON::PP::b} // 999) or $JSON::PP::a cmp $JSON::PP::b }); print $json->encode([ {name => 'CPAN', id => 1, href => 'http://cpan.org'} ]); # [{"id":1,"name":"CPAN","href":"http://cpan.org"}] Note that C affects all the plain hashes in the data structure. If you need finer control, C necessary hashes with a module that implements ordered hash (such as L and L). C and C don't affect the key order in Cd hashes. use Hash::Ordered; tie my %hash, 'Hash::Ordered', (name => 'CPAN', id => 1, href => 'http://cpan.org'); print $json->encode([\%hash]); # [{"name":"CPAN","id":1,"href":"http://cpan.org"}] # order is kept =head1 INCREMENTAL PARSING This section is also taken from JSON::XS. In some cases, there is the need for incremental parsing of JSON texts. While this module always has to keep both JSON text and resulting Perl data structure in memory at one time, it does allow you to parse a JSON stream incrementally. It does so by accumulating text until it has a full JSON object, which it then can decode. This process is similar to using C to see if a full JSON object is available, but is much more efficient (and can be implemented with a minimum of method calls). JSON::PP will only attempt to parse the JSON text once it is sure it has enough text to get a decisive result, using a very simple but truly incremental parser. This means that it sometimes won't stop as early as the full parser, for example, it doesn't detect mismatched parentheses. The only thing it guarantees is that it starts decoding as soon as a syntactically valid JSON text has been seen. This means you need to set resource limits (e.g. C) to ensure the parser will stop parsing in the presence if syntax errors. The following methods implement this incremental parser. =head2 incr_parse $json->incr_parse( [$string] ) # void context $obj_or_undef = $json->incr_parse( [$string] ) # scalar context @obj_or_empty = $json->incr_parse( [$string] ) # list context This is the central parsing function. It can both append new text and extract objects from the stream accumulated so far (both of these functions are optional). If C<$string> is given, then this string is appended to the already existing JSON fragment stored in the C<$json> object. After that, if the function is called in void context, it will simply return without doing anything further. This can be used to add more text in as many chunks as you want. If the method is called in scalar context, then it will try to extract exactly I JSON object. If that is successful, it will return this object, otherwise it will return C. If there is a parse error, this method will croak just as C would do (one can then use C to skip the erroneous part). This is the most common way of using the method. And finally, in list context, it will try to extract as many objects from the stream as it can find and return them, or the empty list otherwise. For this to work, there must be no separators (other than whitespace) between the JSON objects or arrays, instead they must be concatenated back-to-back. If an error occurs, an exception will be raised as in the scalar context case. Note that in this case, any previously-parsed JSON texts will be lost. Example: Parse some JSON arrays/objects in a given string and return them. my @objs = JSON::PP->new->incr_parse ("[5][7][1,2]"); =head2 incr_text $lvalue_string = $json->incr_text This method returns the currently stored JSON fragment as an lvalue, that is, you can manipulate it. This I works when a preceding call to C in I successfully returned an object. Under all other circumstances you must not call this function (I mean it. although in simple tests it might actually work, it I fail under real world conditions). As a special exception, you can also call this method before having parsed anything. That means you can only use this function to look at or manipulate text before or after complete JSON objects, not while the parser is in the middle of parsing a JSON object. This function is useful in two cases: a) finding the trailing text after a JSON object or b) parsing multiple JSON objects separated by non-JSON text (such as commas). =head2 incr_skip $json->incr_skip This will reset the state of the incremental parser and will remove the parsed text from the input buffer so far. This is useful after C died, in which case the input buffer and incremental parser state is left unchanged, to skip the text parsed so far and to reset the parse state. The difference to C is that only text until the parse error occurred is removed. =head2 incr_reset $json->incr_reset This completely resets the incremental parser, that is, after this call, it will be as if the parser had never parsed anything. This is useful if you want to repeatedly parse JSON objects and want to ignore any trailing data, which means you have to reset the parser after each successful decode. =head1 MAPPING Most of this section is also taken from JSON::XS. This section describes how JSON::PP maps Perl values to JSON values and vice versa. These mappings are designed to "do the right thing" in most circumstances automatically, preserving round-tripping characteristics (what you put in comes out as something equivalent). For the more enlightened: note that in the following descriptions, lowercase I refers to the Perl interpreter, while uppercase I refers to the abstract Perl language itself. =head2 JSON -> PERL =over 4 =item object A JSON object becomes a reference to a hash in Perl. No ordering of object keys is preserved (JSON does not preserve object key ordering itself). =item array A JSON array becomes a reference to an array in Perl. =item string A JSON string becomes a string scalar in Perl - Unicode codepoints in JSON are represented by the same codepoints in the Perl string, so no manual decoding is necessary. =item number A JSON number becomes either an integer, numeric (floating point) or string scalar in perl, depending on its range and any fractional parts. On the Perl level, there is no difference between those as Perl handles all the conversion details, but an integer may take slightly less memory and might represent more values exactly than floating point numbers. If the number consists of digits only, JSON::PP will try to represent it as an integer value. If that fails, it will try to represent it as a numeric (floating point) value if that is possible without loss of precision. Otherwise it will preserve the number as a string value (in which case you lose roundtripping ability, as the JSON number will be re-encoded to a JSON string). Numbers containing a fractional or exponential part will always be represented as numeric (floating point) values, possibly at a loss of precision (in which case you might lose perfect roundtripping ability, but the JSON number will still be re-encoded as a JSON number). Note that precision is not accuracy - binary floating point values cannot represent most decimal fractions exactly, and when converting from and to floating point, JSON::PP only guarantees precision up to but not including the least significant bit. When C is enabled, big integer values and any numeric values will be converted into L and L objects respectively, without becoming string scalars or losing precision. =item true, false These JSON atoms become C and C, respectively. They are overloaded to act almost exactly like the numbers C<1> and C<0>. You can check whether a scalar is a JSON boolean by using the C function. =item null A JSON null atom becomes C in Perl. =item shell-style comments (C<< # I >>) As a nonstandard extension to the JSON syntax that is enabled by the C setting, shell-style comments are allowed. They can start anywhere outside strings and go till the end of the line. =back =head2 PERL -> JSON The mapping from Perl to JSON is slightly more difficult, as Perl is a truly typeless language, so we can only guess which JSON type is meant by a Perl value. =over 4 =item hash references Perl hash references become JSON objects. As there is no inherent ordering in hash keys (or JSON objects), they will usually be encoded in a pseudo-random order. JSON::PP can optionally sort the hash keys (determined by the I flag and/or I property), so the same data structure will serialise to the same JSON text (given same settings and version of JSON::PP), but this incurs a runtime overhead and is only rarely useful, e.g. when you want to compare some JSON text against another for equality. =item array references Perl array references become JSON arrays. =item other references Other unblessed references are generally not allowed and will cause an exception to be thrown, except for references to the integers C<0> and C<1>, which get turned into C and C atoms in JSON. You can also use C and C to improve readability. to_json [\0, JSON::PP::true] # yields [false,true] =item JSON::PP::true, JSON::PP::false These special values become JSON true and JSON false values, respectively. You can also use C<\1> and C<\0> directly if you want. =item JSON::PP::null This special value becomes JSON null. =item blessed objects Blessed objects are not directly representable in JSON, but C allows various ways of handling objects. See L, below, for details. =item simple scalars Simple Perl scalars (any scalar that is not a reference) are the most difficult objects to encode: JSON::PP will encode undefined scalars as JSON C values, scalars that have last been used in a string context before encoding as JSON strings, and anything else as number value: # dump as number encode_json [2] # yields [2] encode_json [-3.0e17] # yields [-3e+17] my $value = 5; encode_json [$value] # yields [5] # used as string, so dump as string print $value; encode_json [$value] # yields ["5"] # undef becomes null encode_json [undef] # yields [null] You can force the type to be a string by stringifying it: my $x = 3.1; # some variable containing a number "$x"; # stringified $x .= ""; # another, more awkward way to stringify print $x; # perl does it for you, too, quite often # (but for older perls) You can force the type to be a number by numifying it: my $x = "3"; # some variable containing a string $x += 0; # numify it, ensuring it will be dumped as a number $x *= 1; # same thing, the choice is yours. You cannot currently force the type in other, less obscure, ways. Note that numerical precision has the same meaning as under Perl (so binary to decimal conversion follows the same rules as in Perl, which can differ to other languages). Also, your perl interpreter might expose extensions to the floating point numbers of your platform, such as infinities or NaN's - these cannot be represented in JSON, and it is an error to pass those in. JSON::PP (and JSON::XS) trusts what you pass to C method (or C function) is a clean, validated data structure with values that can be represented as valid JSON values only, because it's not from an external data source (as opposed to JSON texts you pass to C or C, which JSON::PP considers tainted and doesn't trust). As JSON::PP doesn't know exactly what you and consumers of your JSON texts want the unexpected values to be (you may want to convert them into null, or to stringify them with or without normalisation (string representation of infinities/NaN may vary depending on platforms), or to croak without conversion), you're advised to do what you and your consumers need before you encode, and also not to numify values that may start with values that look like a number (including infinities/NaN), without validating. =back =head2 OBJECT SERIALISATION As for Perl objects, JSON::PP only supports a pure JSON representation (without the ability to deserialise the object automatically again). =head3 SERIALISATION What happens when C encounters a Perl object depends on the C, C and C settings, which are used in this order: =over 4 =item 1. C is enabled and the object has a C method. In this case, the C method of the object is invoked in scalar context. It must return a single scalar that can be directly encoded into JSON. This scalar replaces the object in the JSON text. For example, the following C method will convert all L objects to JSON strings when serialised. The fact that these values originally were L objects is lost. sub URI::TO_JSON { my ($uri) = @_; $uri->as_string } =item 2. C is enabled and the object is a C or C. The object will be serialised as a JSON number value. =item 3. C is enabled. The object will be serialised as a JSON null value. =item 4. none of the above If none of the settings are enabled or the respective methods are missing, C throws an exception. =back =head1 ENCODING/CODESET FLAG NOTES This section is taken from JSON::XS. The interested reader might have seen a number of flags that signify encodings or codesets - C, C and C. There seems to be some confusion on what these do, so here is a short comparison: C controls whether the JSON text created by C (and expected by C) is UTF-8 encoded or not, while C and C only control whether C escapes character values outside their respective codeset range. Neither of these flags conflict with each other, although some combinations make less sense than others. Care has been taken to make all flags symmetrical with respect to C and C, that is, texts encoded with any combination of these flag values will be correctly decoded when the same flags are used - in general, if you use different flag settings while encoding vs. when decoding you likely have a bug somewhere. Below comes a verbose discussion of these flags. Note that a "codeset" is simply an abstract set of character-codepoint pairs, while an encoding takes those codepoint numbers and I them, in our case into octets. Unicode is (among other things) a codeset, UTF-8 is an encoding, and ISO-8859-1 (= latin 1) and ASCII are both codesets I encodings at the same time, which can be confusing. =over 4 =item C flag disabled When C is disabled (the default), then C/C generate and expect Unicode strings, that is, characters with high ordinal Unicode values (> 255) will be encoded as such characters, and likewise such characters are decoded as-is, no changes to them will be done, except "(re-)interpreting" them as Unicode codepoints or Unicode characters, respectively (to Perl, these are the same thing in strings unless you do funny/weird/dumb stuff). This is useful when you want to do the encoding yourself (e.g. when you want to have UTF-16 encoded JSON texts) or when some other layer does the encoding for you (for example, when printing to a terminal using a filehandle that transparently encodes to UTF-8 you certainly do NOT want to UTF-8 encode your data first and have Perl encode it another time). =item C flag enabled If the C-flag is enabled, C/C will encode all characters using the corresponding UTF-8 multi-byte sequence, and will expect your input strings to be encoded as UTF-8, that is, no "character" of the input string must have any value > 255, as UTF-8 does not allow that. The C flag therefore switches between two modes: disabled means you will get a Unicode string in Perl, enabled means you get an UTF-8 encoded octet/binary string in Perl. =item C or C flags enabled With C (or C) enabled, C will escape characters with ordinal values > 255 (> 127 with C) and encode the remaining characters as specified by the C flag. If C is disabled, then the result is also correctly encoded in those character sets (as both are proper subsets of Unicode, meaning that a Unicode string with all character values < 256 is the same thing as a ISO-8859-1 string, and a Unicode string with all character values < 128 is the same thing as an ASCII string in Perl). If C is enabled, you still get a correct UTF-8-encoded string, regardless of these flags, just some more characters will be escaped using C<\uXXXX> then before. Note that ISO-8859-1-I strings are not compatible with UTF-8 encoding, while ASCII-encoded strings are. That is because the ISO-8859-1 encoding is NOT a subset of UTF-8 (despite the ISO-8859-1 I being a subset of Unicode), while ASCII is. Surprisingly, C will ignore these flags and so treat all input values as governed by the C flag. If it is disabled, this allows you to decode ISO-8859-1- and ASCII-encoded strings, as both strict subsets of Unicode. If it is enabled, you can correctly decode UTF-8 encoded strings. So neither C nor C are incompatible with the C flag - they only govern when the JSON output engine escapes a character or not. The main use for C is to relatively efficiently store binary data as JSON, at the expense of breaking compatibility with most JSON decoders. The main use for C is to force the output to not contain characters with values > 127, which means you can interpret the resulting string as UTF-8, ISO-8859-1, ASCII, KOI8-R or most about any character set and 8-bit-encoding, and still get the same data structure back. This is useful when your channel for JSON transfer is not 8-bit clean or the encoding might be mangled in between (e.g. in mail), and works because ASCII is a proper subset of most 8-bit and multibyte encodings in use in the world. =back =head1 SEE ALSO The F command line utility for quick experiments. L, L, and L for faster alternatives. L and L for easy migration. L and L for older perl users. RFC4627 (L) =head1 AUTHOR Makamaka Hannyaharamitu, Emakamaka[at]cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2007-2016 by Makamaka Hannyaharamitu This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut JSON_PP $fatpacked{"JSON/PP/Boolean.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'JSON_PP_BOOLEAN'; package JSON::PP::Boolean; use strict; use overload ( "0+" => sub { ${$_[0]} }, "++" => sub { $_[0] = ${$_[0]} + 1 }, "--" => sub { $_[0] = ${$_[0]} - 1 }, fallback => 1, ); $JSON::PP::Boolean::VERSION = '2.97001'; 1; __END__ =head1 NAME JSON::PP::Boolean - dummy module providing JSON::PP::Boolean =head1 SYNOPSIS # do not "use" yourself =head1 DESCRIPTION This module exists only to provide overload resolution for Storable and similar modules. See L for more info about this class. =head1 AUTHOR This idea is from L written by Marc Lehmann =cut JSON_PP_BOOLEAN $fatpacked{"Memoize.pm"} = '#line '.(1+__LINE__).' "'.__FILE__."\"\n".<<'MEMOIZE'; # -*- mode: perl; perl-indent-level: 2; -*- # Memoize.pm # # Transparent memoization of idempotent functions # # Copyright 1998, 1999, 2000, 2001, 2012 M. J. Dominus. # You may copy and distribute this program under the # same terms as Perl itself. If in doubt, # write to mjd-perl-memoize+@plover.com for a license. package Memoize; $VERSION = '1.03'; # Compile-time constants sub SCALAR () { 0 } sub LIST () { 1 } # # Usage memoize(functionname/ref, # { NORMALIZER => coderef, INSTALL => name, # LIST_CACHE => descriptor, SCALAR_CACHE => descriptor } # use Carp; use Exporter; use vars qw($DEBUG); use Config; # Dammit. @ISA = qw(Exporter); @EXPORT = qw(memoize); @EXPORT_OK = qw(unmemoize flush_cache); use strict; my %memotable; my %revmemotable; my @CONTEXT_TAGS = qw(MERGE TIE MEMORY FAULT HASH); my %IS_CACHE_TAG = map {($_ => 1)} @CONTEXT_TAGS; # Raise an error if the user tries to specify one of thesepackage as a # tie for LIST_CACHE my %scalar_only = map {($_ => 1)} qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File); sub memoize { my $fn = shift; my %options = @_; my $options = \%options; unless (defined($fn) && (ref $fn eq 'CODE' || ref $fn eq '')) { croak "Usage: memoize 'functionname'|coderef {OPTIONS}"; } my $uppack = caller; # TCL me Elmo! my $cref; # Code reference to original function my $name = (ref $fn ? undef : $fn); # Convert function names to code references $cref = &_make_cref($fn, $uppack); # Locate function prototype, if any my $proto = prototype $cref; if (defined $proto) { $proto = "($proto)" } else { $proto = "" } # I would like to get rid of the eval, but there seems not to be any # other way to set the prototype properly. The switch here for # 'usethreads' works around a bug in threadperl having to do with # magic goto. It would be better to fix the bug and use the magic # goto version everywhere. my $wrapper = $Config{usethreads} ? eval "sub $proto { &_memoizer(\$cref, \@_); }" : eval "sub $proto { unshift \@_, \$cref; goto &_memoizer; }"; my $normalizer = $options{NORMALIZER}; if (defined $normalizer && ! ref $normalizer) { $normalizer = _make_cref($normalizer, $uppack); } my $install_name; if (defined $options->{INSTALL}) { # INSTALL => name $install_name = $options->{INSTALL}; } elsif (! exists $options->{INSTALL}) { # No INSTALL option provided; use original name if possible $install_name = $name; } else { # INSTALL => undef means don't install } if (defined $install_name) { $install_name = $uppack . '::' . $install_name unless $install_name =~ /::/; no strict; local($^W) = 0; # ``Subroutine $install_name redefined at ...'' *{$install_name} = $wrapper; # Install memoized version } $revmemotable{$wrapper} = "" . $cref; # Turn code ref into hash key # These will be the caches my %caches; for my $context (qw(SCALAR LIST)) { # suppress subsequent 'uninitialized value' warnings $options{"${context}_CACHE"} ||= ''; my $cache_opt = $options{"${context}_CACHE"}; my @cache_opt_args; if (ref $cache_opt) { @cache_opt_args = @$cache_opt; $cache_opt = shift @cache_opt_args; } if ($cache_opt eq 'FAULT') { # no cache $caches{$context} = undef; } elsif ($cache_opt eq 'HASH') { # user-supplied hash my $cache = $cache_opt_args[0]; my $package = ref(tied %$cache); if ($context eq 'LIST' && $scalar_only{$package}) { croak("You can't use $package for LIST_CACHE because it can only store scalars"); } $caches{$context} = $cache; } elsif ($cache_opt eq '' || $IS_CACHE_TAG{$cache_opt}) { # default is that we make up an in-memory hash $caches{$context} = {}; # (this might get tied later, or MERGEd away) } else { croak "Unrecognized option to `${context}_CACHE': `$cache_opt' should be one of (@CONTEXT_TAGS); aborting"; } } # Perhaps I should check here that you didn't supply *both* merge # options. But if you did, it does do something reasonable: They # both get merged to the same in-memory hash. if ($options{SCALAR_CACHE} eq 'MERGE' || $options{LIST_CACHE} eq 'MERGE') { $options{MERGED} = 1; $caches{SCALAR} = $caches{LIST}; } # Now deal with the TIE options { my $context; foreach $context (qw(SCALAR LIST)) { # If the relevant option wasn't `TIE', this call does nothing. _my_tie($context, $caches{$context}, $options); # Croaks on failure } } # We should put some more stuff in here eventually. # We've been saying that for serveral versions now. # And you know what? More stuff keeps going in! $memotable{$cref} = { O => $options, # Short keys here for things we need to access frequently N => $normalizer, U => $cref, MEMOIZED => $wrapper, PACKAGE => $uppack, NAME => $install_name, S => $caches{SCALAR}, L => $caches{LIST}, }; $wrapper # Return just memoized version } # This function tries to load a tied hash class and tie the hash to it. sub _my_tie { my ($context, $hash, $options) = @_; my $fullopt = $options->{"${context}_CACHE"}; # We already checked to make sure that this works. my $shortopt = (ref $fullopt) ? $fullopt->[0] : $fullopt; return unless defined $shortopt && $shortopt eq 'TIE'; carp("TIE option to memoize() is deprecated; use HASH instead") if $^W; my @args = ref $fullopt ? @$fullopt : (); shift @args; my $module = shift @args; if ($context eq 'LIST' && $scalar_only{$module}) { croak("You can't use $module for LIST_CACHE because it can only store scalars"); } my $modulefile = $module . '.pm'; $modulefile =~ s{::}{/}g; eval { require $modulefile }; if ($@) { croak "Memoize: Couldn't load hash tie module `$module': $@; aborting"; } my $rc = (tie %$hash => $module, @args); unless ($rc) { croak "Memoize: Couldn't tie hash to `$module': $!; aborting"; } 1; } sub flush_cache { my $func = _make_cref($_[0], scalar caller); my $info = $memotable{$revmemotable{$func}}; die "$func not memoized" unless defined $info; for my $context (qw(S L)) { my $cache = $info->{$context}; if (tied %$cache && ! (tied %$cache)->can('CLEAR')) { my $funcname = defined($info->{NAME}) ? "function $info->{NAME}" : "anonymous function $func"; my $context = {S => 'scalar', L => 'list'}->{$context}; croak "Tied cache hash for $context-context $funcname does not support flushing"; } else { %$cache = (); } } } # This is the function that manages the memo tables. sub _memoizer { my $orig = shift; # stringized version of ref to original func. my $info = $memotable{$orig}; my $normalizer = $info->{N}; my $argstr; my $context = (wantarray() ? LIST : SCALAR); if (defined $normalizer) { no strict; if ($context == SCALAR) { $argstr = &{$normalizer}(@_); } elsif ($context == LIST) { ($argstr) = &{$normalizer}(@_); } else { croak "Internal error \#41; context was neither LIST nor SCALAR\n"; } } else { # Default normalizer local $^W = 0; $argstr = join chr(28),@_; } if ($context == SCALAR) { my $cache = $info->{S}; _crap_out($info->{NAME}, 'scalar') unless $cache; if (exists $cache->{$argstr}) { return $info->{O}{MERGED} ? $cache->{$argstr}[0] : $cache->{$argstr}; } else { my $val = &{$info->{U}}(@_); # Scalars are considered to be lists; store appropriately if ($info->{O}{MERGED}) { $cache->{$argstr} = [$val]; } else { $cache->{$argstr} = $val; } $val; } } elsif ($context == LIST) { my $cache = $info->{L}; _crap_out($info->{NAME}, 'list') unless $cache; if (exists $cache->{$argstr}) { return @{$cache->{$argstr}}; } else { my @q = &{$info->{U}}(@_); $cache->{$argstr} = \@q; @q; } } else { croak "Internal error \#42; context was neither LIST nor SCALAR\n"; } } sub unmemoize { my $f = shift; my $uppack = caller; my $cref = _make_cref($f, $uppack); unless (exists $revmemotable{$cref}) { croak "Could not unmemoize function `$f', because it was not memoized to begin with"; } my $tabent = $memotable{$revmemotable{$cref}}; unless (defined $tabent) { croak "Could not figure out how to unmemoize function `$f'"; } my $name = $tabent->{NAME}; if (defined $name) { no strict; local($^W) = 0; # ``Subroutine $install_name redefined at ...'' *{$name} = $tabent->{U}; # Replace with original function } undef $memotable{$revmemotable{$cref}}; undef $revmemotable{$cref}; # This removes the last reference to the (possibly tied) memo tables # my ($old_function, $memotabs) = @{$tabent}{'U','S','L'}; # undef $tabent; # # Untie the memo tables if they were tied. # my $i; # for $i (0,1) { # if (tied %{$memotabs->[$i]}) { # warn "Untying hash #$i\n"; # untie %{$memotabs->[$i]}; # } # } $tabent->{U}; } sub _make_cref { my $fn = shift; my $uppack = shift; my $cref; my $name; if (ref $fn eq 'CODE') { $cref = $fn; } elsif (! ref $fn) { if ($fn =~ /::/) { $name = $fn; } else { $name = $uppack . '::' . $fn; } no strict; if (defined $name and !defined(&$name)) { croak "Cannot operate on nonexistent function `$fn'"; } # $cref = \&$name; $cref = *{$name}{CODE}; } else { my $parent = (caller(1))[3]; # Function that called _make_cref croak "Usage: argument 1 to `$parent' must be a function name or reference.\n"; } $DEBUG and warn "${name}($fn) => $cref in _make_cref\n"; $cref; } sub _crap_out { my ($funcname, $context) = @_; if (defined $funcname) { croak "Function `$funcname' called in forbidden $context context; faulting"; } else { croak "Anonymous function called in forbidden $context context; faulting"; } } 1; =head1 NAME Memoize - Make functions faster by trading space for time =head1 SYNOPSIS # This is the documentation for Memoize 1.03 use Memoize; memoize('slow_function'); slow_function(arguments); # Is faster than it was before This is normally all you need to know. However, many options are available: memoize(function, options...); Options include: NORMALIZER => function INSTALL => new_name SCALAR_CACHE => 'MEMORY' SCALAR_CACHE => ['HASH', \%cache_hash ] SCALAR_CACHE => 'FAULT' SCALAR_CACHE => 'MERGE' LIST_CACHE => 'MEMORY' LIST_CACHE => ['HASH', \%cache_hash ] LIST_CACHE => 'FAULT' LIST_CACHE => 'MERGE' =head1 DESCRIPTION `Memoizing' a function makes it faster by trading space for time. It does this by caching the return values of the function in a table. If you call the function again with the same arguments, C jumps in and gives you the value out of the table, instead of letting the function compute the value all over again. Here is an extreme example. Consider the Fibonacci sequence, defined by the following function: # Compute Fibonacci numbers sub fib { my $n = shift; return $n if $n < 2; fib($n-1) + fib($n-2); } This function is very slow. Why? To compute fib(14), it first wants to compute fib(13) and fib(12), and add the results. But to compute fib(13), it first has to compute fib(12) and fib(11), and then it comes back and computes fib(12) all over again even though the answer is the same. And both of the times that it wants to compute fib(12), it has to compute fib(11) from scratch, and then it has to do it again each time it wants to compute fib(13). This function does so much recomputing of old results that it takes a really long time to run---fib(14) makes 1,200 extra recursive calls to itself, to compute and recompute things that it already computed. This function is a good candidate for memoization. If you memoize the `fib' function above, it will compute fib(14) exactly once, the first time it needs to, and then save the result in a table. Then if you ask for fib(14) again, it gives you the result out of the table. While computing fib(14), instead of computing fib(12) twice, it does it once; the second time it needs the value it gets it from the table. It doesn't compute fib(11) four times; it computes it once, getting it from the table the next three times. Instead of making 1,200 recursive calls to `fib', it makes 15. This makes the function about 150 times faster. You could do the memoization yourself, by rewriting the function, like this: # Compute Fibonacci numbers, memoized version { my @fib; sub fib { my $n = shift; return $fib[$n] if defined $fib[$n]; return $fib[$n] = $n if $n < 2; $fib[$n] = fib($n-1) + fib($n-2); } } Or you could use this module, like this: use Memoize; memoize('fib'); # Rest of the fib function just like the original version. This makes it easy to turn memoizing on and off. Here's an even simpler example: I wrote a simple ray tracer; the program would look in a certain direction, figure out what it was looking at, and then convert the `color' value (typically a string like `red') of that object to a red, green, and blue pixel value, like this: for ($direction = 0; $direction < 300; $direction++) { # Figure out which object is in direction $direction $color = $object->{color}; ($r, $g, $b) = @{&ColorToRGB($color)}; ... } Since there are relatively few objects in a picture, there are only a few colors, which get looked up over and over again. Memoizing C sped up the program by several percent. =head1 DETAILS This module exports exactly one function, C. The rest of the functions in this package are None of Your Business. You should say memoize(function) where C is the name of the function you want to memoize, or a reference to it. C returns a reference to the new, memoized version of the function, or C on a non-fatal error. At present, there are no non-fatal errors, but there might be some in the future. If C was the name of a function, then C hides the old version and installs the new memoized version under the old name, so that C<&function(...)> actually invokes the memoized version. =head1 OPTIONS There are some optional options you can pass to C to change the way it behaves a little. To supply options, invoke C like this: memoize(function, NORMALIZER => function, INSTALL => newname, SCALAR_CACHE => option, LIST_CACHE => option ); Each of these options is optional; you can include some, all, or none of them. =head2 INSTALL If you supply a function name with C, memoize will install the new, memoized version of the function under the name you give. For example, memoize('fib', INSTALL => 'fastfib') installs the memoized version of C as C; without the C option it would have replaced the old C with the memoized version. To prevent C from installing the memoized version anywhere, use C undef>. =head2 NORMALIZER Suppose your function looks like this: # Typical call: f('aha!', A => 11, B => 12); sub f { my $a = shift; my %hash = @_; $hash{B} ||= 2; # B defaults to 2 $hash{C} ||= 7; # C defaults to 7 # Do something with $a, %hash } Now, the following calls to your function are all completely equivalent: f(OUCH); f(OUCH, B => 2); f(OUCH, C => 7); f(OUCH, B => 2, C => 7); f(OUCH, C => 7, B => 2); (etc.) However, unless you tell C that these calls are equivalent, it will not know that, and it will compute the values for these invocations of your function separately, and store them separately. To prevent this, supply a C function that turns the program arguments into a string in a way that equivalent arguments turn into the same string. A C function for C above might look like this: sub normalize_f { my $a = shift; my %hash = @_; $hash{B} ||= 2; $hash{C} ||= 7; join(',', $a, map ($_ => $hash{$_}) sort keys %hash); } Each of the argument lists above comes out of the C function looking exactly the same, like this: OUCH,B,2,C,7 You would tell C to use this normalizer this way: memoize('f', NORMALIZER => 'normalize_f'); C knows that if the normalized version of the arguments is the same for two argument lists, then it can safely look up the value that it computed for one argument list and return it as the result of calling the function with the other argument list, even if the argument lists look different. The default normalizer just concatenates the arguments with character 28 in between. (In ASCII, this is called FS or control-\.) This always works correctly for functions with only one string argument, and also when the arguments never contain character 28. However, it can confuse certain argument lists: normalizer("a\034", "b") normalizer("a", "\034b") normalizer("a\034\034b") for example. Since hash keys are strings, the default normalizer will not distinguish between C and the empty string. It also won't work when the function's arguments are references. For example, consider a function C which gets two arguments: A number, and a reference to an array of numbers: g(13, [1,2,3,4,5,6,7]); The default normalizer will turn this into something like C<"13\034ARRAY(0x436c1f)">. That would be all right, except that a subsequent array of numbers might be stored at a different location even though it contains the same data. If this happens, C will think that the arguments are different, even though they are equivalent. In this case, a normalizer like this is appropriate: sub normalize { join ' ', $_[0], @{$_[1]} } For the example above, this produces the key "13 1 2 3 4 5 6 7". Another use for normalizers is when the function depends on data other than those in its arguments. Suppose you have a function which returns a value which depends on the current hour of the day: sub on_duty { my ($problem_type) = @_; my $hour = (localtime)[2]; open my $fh, "$DIR/$problem_type" or die...; my $line; while ($hour-- > 0) $line = <$fh>; } return $line; } At 10:23, this function generates the 10th line of a data file; at 3:45 PM it generates the 15th line instead. By default, C will only see the $problem_type argument. To fix this, include the current hour in the normalizer: sub normalize { join ' ', (localtime)[2], @_ } The calling context of the function (scalar or list context) is propagated to the normalizer. This means that if the memoized function will treat its arguments differently in list context than it would in scalar context, you can have the normalizer function select its behavior based on the results of C. Even if called in a list context, a normalizer should still return a single string. =head2 C, C Normally, C caches your function's return values into an ordinary Perl hash variable. However, you might like to have the values cached on the disk, so that they persist from one run of your program to the next, or you might like to associate some other interesting semantics with the cached values. There's a slight complication under the hood of C: There are actually I caches, one for scalar values and one for list values. When your function is called in scalar context, its return value is cached in one hash, and when your function is called in list context, its value is cached in the other hash. You can control the caching behavior of both contexts independently with these options. The argument to C or C must either be one of the following four strings: MEMORY FAULT MERGE HASH or else it must be a reference to an array whose first element is one of these four strings, such as C<[HASH, arguments...]>. =over 4 =item C C means that return values from the function will be cached in an ordinary Perl hash variable. The hash variable will not persist after the program exits. This is the default. =item C C allows you to specify that a particular hash that you supply will be used as the cache. You can tie this hash beforehand to give it any behavior you want. A tied hash can have any semantics at all. It is typically tied to an on-disk database, so that cached values are stored in the database and retrieved from it again when needed, and the disk file typically persists after your program has exited. See C for more complete details about C. A typical example is: use DB_File; tie my %cache => 'DB_File', $filename, O_RDWR|O_CREAT, 0666; memoize 'function', SCALAR_CACHE => [HASH => \%cache]; This has the effect of storing the cache in a C database whose name is in C<$filename>. The cache will persist after the program has exited. Next time the program runs, it will find the cache already populated from the previous run of the program. Or you can forcibly populate the cache by constructing a batch program that runs in the background and populates the cache file. Then when you come to run your real program the memoized function will be fast because all its results have been precomputed. Another reason to use C is to provide your own hash variable. You can then inspect or modify the contents of the hash to gain finer control over the cache management. =item C This option is no longer supported. It is still documented only to aid in the debugging of old programs that use it. Old programs should be converted to use the C option instead. memoize ... ['TIE', PACKAGE, ARGS...] is merely a shortcut for require PACKAGE; { tie my %cache, PACKAGE, ARGS...; memoize ... [HASH => \%cache]; } =item C C means that you never expect to call the function in scalar (or list) context, and that if C detects such a call, it should abort the program. The error message is one of `foo' function called in forbidden list context at line ... `foo' function called in forbidden scalar context at line ... =item C C normally means that the memoized function does not distinguish between list and sclar context, and that return values in both contexts should be stored together. Both C MERGE> and C MERGE> mean the same thing. Consider this function: sub complicated { # ... time-consuming calculation of $result return $result; } The C function will return the same numeric C<$result> regardless of whether it is called in list or in scalar context. Normally, the following code will result in two calls to C, even if C is memoized: $x = complicated(142); ($y) = complicated(142); $z = complicated(142); The first call will cache the result, say 37, in the scalar cache; the second will cach the list C<(37)> in the list cache. The third call doesn't call the real C function; it gets the value 37 from the scalar cache. Obviously, the second call to C is a waste of time, and storing its return value is a waste of space. Specifying C MERGE> will make C use the same cache for scalar and list context return values, so that the second call uses the scalar cache that was populated by the first call. C ends up being called only once, and both subsequent calls return C<3> from the cache, regardless of the calling context. =head3 List values in scalar context Consider this function: sub iota { return reverse (1..$_[0]) } This function normally returns a list. Suppose you memoize it and merge the caches: memoize 'iota', SCALAR_CACHE => 'MERGE'; @i7 = iota(7); $i7 = iota(7); Here the first call caches the list (1,2,3,4,5,6,7). The second call does not really make sense. C cannot guess what behavior C should have in scalar context without actually calling it in scalar context. Normally C I call C in scalar context and cache the result, but the C 'MERGE'> option says not to do that, but to use the cache list-context value instead. But it cannot return a list of seven elements in a scalar context. In this case C<$i7> will receive the B of the cached list value, namely 7. =head3 Merged disk caches Another use for C is when you want both kinds of return values stored in the same disk file; this saves you from having to deal with two disk files instead of one. You can use a normalizer function to keep the two sets of return values separate. For example: tie my %cache => 'MLDBM', 'DB_File', $filename, ...; memoize 'myfunc', NORMALIZER => 'n', SCALAR_CACHE => [HASH => \%cache], LIST_CACHE => 'MERGE', ; sub n { my $context = wantarray() ? 'L' : 'S'; # ... now compute the hash key from the arguments ... $hashkey = "$context:$hashkey"; } This normalizer function will store scalar context return values in the disk file under keys that begin with C, and list context return values under keys that begin with C. =back =head1 OTHER FACILITIES =head2 C There's an C function that you can import if you want to. Why would you want to? Here's an example: Suppose you have your cache tied to a DBM file, and you want to make sure that the cache is written out to disk if someone interrupts the program. If the program exits normally, this will happen anyway, but if someone types control-C or something then the program will terminate immediately without synchronizing the database. So what you can do instead is $SIG{INT} = sub { unmemoize 'function' }; C accepts a reference to, or the name of a previously memoized function, and undoes whatever it did to provide the memoized version in the first place, including making the name refer to the unmemoized version if appropriate. It returns a reference to the unmemoized version of the function. If you ask it to unmemoize a function that was never memoized, it croaks. =head2 C C will flush out the caches, discarding I the cached data. The argument may be a function name or a reference to a function. For finer control over when data is discarded or expired, see the documentation for C, included in this package. Note that if the cache is a tied hash, C will attempt to invoke the C method on the hash. If there is no C method, this will cause a run-time error. An alternative approach to cache flushing is to use the C option (see above) to request that C use a particular hash variable as its cache. Then you can examine or modify the hash at any time in any way you desire. You may flush the cache by using C<%hash = ()>. =head1 CAVEATS Memoization is not a cure-all: =over 4 =item * Do not memoize a function whose behavior depends on program state other than its own arguments, such as global variables, the time of day, or file input. These functions will not produce correct results when memoized. For a particularly easy example: sub f { time; } This function takes no arguments, and as far as C is concerned, it always returns the same result. C is wrong, of course, and the memoized version of this function will call C