#!/usr/bin/env perl # PODNAME: sets # ABSTRACT: set operations in Perl # __MOBUNDLE_INCLUSION__ BEGIN { my %file_for = ( # __MOBUNDLE_FILES__ # __MOBUNDLE_FILE__ 'App/Sets.pm' => <<'END_OF_FILE', package App::Sets; $App::Sets::VERSION = '0.974'; use strict; use warnings; # ABSTRACT: set operations in Perl use English qw( -no_match_vars ); use 5.010; use Getopt::Long qw< GetOptionsFromArray :config pass_through no_ignore_case bundling >; use Pod::Usage qw< pod2usage >; use Log::Log4perl::Tiny qw< :easy :dead_if_first LOGLEVEL >; use App::Sets::Parser; use App::Sets::Iterator; use App::Sets::Operations; use App::Sets::Sort qw< sort_filehandle >; my %config = ( binmode => ':raw:encoding(UTF-8)', loglevel => 'INFO', parsedebug => 0, ); sub populate_config { my (@args) = @_; $config{sorted} = 1 if $ENV{SETS_SORTED}; $config{trim} = 1 if $ENV{SETS_TRIM}; $config{cache} = $ENV{SETS_CACHE} if exists $ENV{SETS_CACHE}; $config{loglevel} = $ENV{SETS_LOGLEVEL} if exists $ENV{SETS_LOGLEVEL}; $config{parsedebug} = $ENV{SETS_PARSEDEBUG} if exists $ENV{SETS_PARSEDEBUG}; $config{internal_sort} = $ENV{SETS_INTERNAL_SORT} if exists $ENV{SETS_INTERNAL_SORT}; $config{binmode} = $ENV{SETS_BINMODE} if $ENV{SETS_BINMODE}; GetOptionsFromArray( \@args, \%config, qw< man help usage version binmode|b=s cache|cache-sorted|S=s internal_sort|internal-sort|I! loglevel|l=s sorted|s! trim|t! > ) or pod2usage( -verbose => 99, -sections => 'USAGE', ); $App::Sets::VERSION //= '0.972' unless defined $App::Sets::VERSION; pod2usage(message => "$0 $App::Sets::VERSION", -verbose => 99, -sections => ' ') if $config{version}; pod2usage( -verbose => 99, -sections => 'USAGE' ) if $config{usage}; pod2usage( -verbose => 99, -sections => 'USAGE|EXAMPLES|OPTIONS' ) if $config{help}; pod2usage(-verbose => 2) if $config{man}; LOGLEVEL $config{loglevel}; $config{cache} = '.sorted' if exists $config{cache} && !(defined($config{cache}) && length($config{cache})); $config{sorted} = 1 if exists $config{cache}; if (exists $config{cache}) { INFO "using sort cache or generating it when not available"; } elsif ($config{sorted}) { INFO "assuming input files are sorted"; } INFO "trimming away leading/trailing whitespaces" if $config{trim}; pod2usage( -verbose => 99, -sections => 'USAGE', ) unless @args; return @args; } ## end sub populate_config sub run { my $package = shift; my @args = populate_config(@_); my $input; if (@args > 1) { shift @args if $args[0] eq '--'; LOGDIE "only file op file [op file...] " . "with multiple parameters (@args)...\n" unless @args % 2; my @chunks; while (@args) { push @chunks, escape(shift @args); push @chunks, shift @args if @args; } $input = join ' ', @chunks; } ## end if (@args > 1) else { $input = shift @args; } LOGLEVEL('DEBUG') if $config{parsedebug}; DEBUG "parsing >$input<"; my $expression = App::Sets::Parser::parse($input, 0); LOGLEVEL($config{loglevel}); binmode STDOUT, $config{binmode}; my $it = expression($expression); while (defined(my $item = $it->drop())) { print {*STDOUT} $item; print {*STDOUT} "\n" if $config{trim}; } return; } ## end sub run sub escape { my ($text) = @_; $text =~ s{(\W)}{\\$1}gmxs; return $text; } sub expression { my ($expression) = @_; if (ref $expression) { # operation my ($op, $l, $r) = @$expression; my $sub = App::Sets::Operations->can($op); return $sub->(expression($l), expression($r)); } else { # plain file return file($expression); } } ## end sub expression sub file { my ($filename) = @_; LOGDIE "invalid file '$filename'\n" unless -r $filename && !-d $filename; if ($config{cache}) { my $cache_filename = $filename . $config{cache}; if (!-e $cache_filename) { # generate cache file WARN "generating cached sorted file " . "'$cache_filename', might wait a bit..."; my $ifh = sort_filehandle($filename, \%config); open my $ofh, '>', $cache_filename or LOGDIE "open('$cache_filename') for output: $OS_ERROR"; while (<$ifh>) { print {$ofh} $_; } close $ofh or LOGDIE "close('$cache_filename'): $OS_ERROR"; } ## end if (!-e $cache_filename) INFO "using '$cache_filename' (assumed to be sorted) " . "instead of '$filename'"; $filename = $cache_filename; } ## end if ($config{cache}) my $fh; if ($config{sorted}) { INFO "opening '$filename', assuming it is already sorted" unless $config{cache}; open $fh, '<', $filename or LOGDIE "open('$filename'): $OS_ERROR"; } ## end if ($config{sorted}) else { INFO "opening '$filename' and sorting on the fly"; $fh = sort_filehandle($filename, \%config); } return App::Sets::Iterator->new( sub { my $retval = <$fh>; return unless defined $retval; $retval =~ s{\A\s+|\s+\z}{}gmxs if $config{trim}; return $retval; } ); } ## end sub file 1; __END__ =pod =head1 NAME App::Sets - set operations in Perl =head1 VERSION version 0.974 =head1 AUTHOR Flavio Poletti =head1 COPYRIGHT AND LICENSE Copyright (C) 2011-2016 by Flavio Poletti polettix@cpan.org. This module is free software. You can redistribute it and/or modify it under the terms of the Artistic License 2.0. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =cut END_OF_FILE # __MOBUNDLE_FILE__ 'App/Sets/Iterator.pm' => <<'END_OF_FILE', package App::Sets::Iterator; $App::Sets::Iterator::VERSION = '0.974'; use strict; use warnings; # ABSTRACT: convenience iterator sub new { my ($package, $it) = @_; return bless {it => $it}, $package; } sub head { my ($self) = @_; return exists $self->{head} ? $self->{head} : $self->next(); } sub next { my ($self) = @_; return $self->{head} = $self->{it}->(); } sub drop { my ($self) = @_; my $retval = $self->head(); $self->next(); return $retval; } ## end sub drop 1; __END__ =pod =head1 NAME App::Sets::Iterator - convenience iterator =head1 VERSION version 0.974 =head1 AUTHOR Flavio Poletti =head1 COPYRIGHT AND LICENSE Copyright (C) 2011-2016 by Flavio Poletti polettix@cpan.org. This module is free software. You can redistribute it and/or modify it under the terms of the Artistic License 2.0. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =cut END_OF_FILE # __MOBUNDLE_FILE__ 'App/Sets/Parser.pm' => <<'END_OF_FILE', package App::Sets::Parser; $App::Sets::Parser::VERSION = '0.974'; use strict; use warnings; use Carp; use Log::Log4perl::Tiny qw< :easy :dead_if_first >; # ABSTRACT: parse input expressions of operations on sets sub parse { my ($string) = @_; my $retval = first($string, 0); my ($expression, $pos) = $retval ? @$retval : (undef, 0); return $expression if $pos == length $string; my $offending = substr $string, $pos; my ($spaces) = $offending =~ s{\A(\s+)}{}mxs; $pos += length $spaces; my $nchars = 23; $offending = substr($offending, 0, $nchars - 3) . '...' if length($offending) > $nchars; LOGDIE "parse error at char $pos --> $offending\n",; } ## end sub parse sub lrx_head { my $sequence = _sequence(@_); return sub { my $retval = $sequence->(@_) or return; my ($struct, $pos) = @$retval; my ($second, $first_tail) = @{$struct}[1, 3]; if (defined $first_tail->[0]) { my ($root, $parent) = @{$first_tail->[0]}; $parent->[1] = $second->[0]; $struct = $root; } else { $struct = $second->[0]; } return [$struct, $pos]; } } ## end sub lrx_head sub lrx_tail { my $sequence = _sequence('optws', _alternation(_sequence(@_), 'empty')); return sub { my $retval = $sequence->(@_) or return; my ($struct, $pos) = @$retval; $retval = $struct->[1]; if (!defined $retval->[0]) { $retval = undef; } else { # not empty my ($op, $second, $tail) = @{$retval->[0]}[0, 2, 4]; my $node = [$op->[0], undef, $second->[0]]; if (defined $tail->[0]) { my ($root, $parent) = @{$tail->[0]}; $parent->[1] = $node; # link leaf to parent node $retval = [$root, $node]; } else { $retval = [$node, $node]; } } ## end else [ if (!defined $retval->... return [$retval, $pos]; } } ## end sub lrx_tail sub first { return lrx_head(qw< optws second optws first_tail optws >)->(@_); } sub first_tail { return lrx_tail(qw< op_subtract optws second optws first_tail optws >) ->(@_); } sub second { return lrx_head(qw< optws third optws second_tail optws >)->(@_); } sub second_tail { return lrx_tail(qw< op_union optws third optws second_tail optws >) ->(@_); } sub third { return lrx_head(qw< optws fourth optws third_tail optws >)->(@_); } sub third_tail { return lrx_tail(qw< op_intersect optws fourth optws third_tail optws >) ->(@_); } sub fourth { my $retval = _sequence( 'optws', _alternation( _sequence(_string('('), qw< optws first optws >, _string(')')), 'filename', ), 'optws' )->(@_) or return; my ($struct, $pos) = @$retval; my $meat = $struct->[1]; if (ref($meat->[0])) { $retval = $meat->[0][2][0]; } else { $retval = $meat->[0]; } return [$retval, $pos]; } ## end sub fourth sub _op { my ($regex, $retval, $string, $pos) = @_; pos($string) = $pos; return unless $string =~ m{\G($regex)}cgmxs; return [$retval, pos($string)]; } ## end sub _op sub op_intersect { return _op(qr{(?:intersect|[iI&^])}, 'intersect', @_); } sub op_union { return _op(qr{(?:union|[uUvV|+])}, 'union', @_); } sub op_subtract { return _op(qr{(?:minus|less|[\\-])}, 'minus', @_); } sub filename { my ($string, $pos) = @_; DEBUG "filename() >$string< $pos"; pos($string) = $pos; my $retval; if (($retval) = $string =~ m{\G ' ( [^']+ ) '}cgmxs) { return [$retval, pos($string)]; } elsif (($retval) = $string =~ m{\G " ( (?: \\. | [^"])+ ) "}cgmxs) { $retval =~ s{\\(.)}{$1}gmxs; return [$retval, pos($string)]; } elsif (($retval) = $string =~ m{\G ( (?: \\. | [\w.-/])+ )}cgmxs) { $retval =~ s{\\(.)}{$1}gmxs; return [$retval, pos($string)]; } return; } ## end sub filename sub empty { my ($string, $pos) = @_; return [undef, $pos]; } sub is_empty { my ($struct) = @_; return @{$struct->[0]} > 0; } sub ws { my ($string, $pos) = @_; pos($string) = $pos; my ($retval) = $string =~ m{\G (\s+)}cgmxs or return; return [$retval, pos($string)]; } ## end sub ws sub optws { my ($string, $pos) = @_; pos($string) = $pos; my ($retval) = $string =~ m{\G (\s*)}cgmxs; $retval = [$retval || '', pos($string)]; return $retval; } ## end sub optws sub _string { my ($target) = @_; my $len = length $target; return sub { my ($string, $pos) = @_; return unless substr($string, $pos, $len) eq $target; return [$target, $pos + $len]; } } ## end sub _string sub _alternation { my @subs = _resolve(@_); return sub { my ($string, $pos) = @_; for my $sub (@subs) { my $retval = $sub->($string, $pos) || next; return $retval; } return; }; } ## end sub _alternation sub _sequence { my @subs = _resolve(@_); return sub { my ($string, $pos) = @_; my @chunks; for my $sub (@subs) { my $chunk = $sub->($string, $pos) or return; push @chunks, $chunk; $pos = $chunk->[1]; } ## end for my $sub (@subs) return [\@chunks, $pos]; }; } ## end sub _sequence sub _resolve { return map { ref $_ ? $_ : __PACKAGE__->can($_) || LOGDIE "unknown $_" } @_; } 1; __END__ =pod =head1 NAME App::Sets::Parser - parse input expressions of operations on sets =head1 VERSION version 0.974 =begin grammar parse: first first: first op_difference second | second second: second op_union third | third third: third op_intersect fourth | fourth fourth: '(' first ')' | filename filename: double_quoted_filename | single_quoted_filename | unquoted_filename ... Left recursion elimination first: second first_tail first_tail: | op_intersect second first_tail second: third second_tail second_tail: | op_union third second_tail third: fourth third_tail third_tail: | op_difference fourth third_tail =end grammar =cut =head1 AUTHOR Flavio Poletti =head1 COPYRIGHT AND LICENSE Copyright (C) 2011-2016 by Flavio Poletti polettix@cpan.org. This module is free software. You can redistribute it and/or modify it under the terms of the Artistic License 2.0. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =cut END_OF_FILE # __MOBUNDLE_FILE__ 'App/Sets/Operations.pm' => <<'END_OF_FILE', package App::Sets::Operations; use strict; use warnings; # ABSTRACT: set operations in Perl use English qw( -no_match_vars ); use 5.010; use App::Sets::Iterator; sub intersect { my ($l, $r) = @_; my ($lh, $rh); return App::Sets::Iterator->new( sub { while ('necessary') { $lh //= $l->drop() // last; $rh //= $r->drop() // last; if ($lh eq $rh) { my $retval = $lh; $lh = $rh = undef; return $retval; } elsif ($lh gt $rh) { $rh = undef; } else { $lh = undef; } } ## end while ('necessary') return undef; } ); } ## end sub intersect sub union { my ($l, $r) = @_; my ($lh, $rh); return App::Sets::Iterator->new( sub { while (defined($lh = $l->head()) && defined($rh = $r->head())) { if ($lh eq $rh) { $r->drop(); return $l->drop(); } elsif ($lh lt $rh) { return $l->drop(); } else { return $r->drop(); } } ## end while (defined($lh = $l->head... while (defined($lh = $l->drop())) { return $lh; } while (defined($rh = $r->drop())) { return $rh; } return undef; } ); } ## end sub union sub minus { my ($l, $r) = @_; my ($lh, $rh); return App::Sets::Iterator->new( sub { while (defined($lh = $l->head()) && defined($rh = $r->head())) { if ($lh eq $rh) { # shared, drop both $r->drop(); $l->drop(); } elsif ($lh lt $rh) { # only in left, OK! return $l->drop(); } else { # only in right, go on $r->drop(); } } ## end while (defined($lh = $l->head... return $l->drop(); } ); } ## end sub minus 1; __END__ END_OF_FILE # __MOBUNDLE_FILE__ 'App/Sets/Sort.pm' => <<'END_OF_FILE', package App::Sets::Sort; use strict; use warnings; # ABSTRACT: sort handling use English qw( -no_match_vars ); use 5.010; use File::Temp qw< tempfile >; use Fcntl qw< :seek >; use Log::Log4perl::Tiny qw< :easy :dead_if_first >; use base 'Exporter'; our @EXPORT_OK = qw< sort_filehandle internal_sort_filehandle >; our @EXPORT = qw< sort_filehandle >; our %EXPORT_TAGS = ( default => [ @EXPORT ], all => [ @EXPORT_OK ], ); sub _test_external_sort { my $filename; eval { (my $fh, $filename) = tempfile(); # might croak binmode $fh, ':raw'; print {$fh} "one\ntwo\nthree\nfour\n" or die 'whatever'; close $fh or die 'whatever'; } or return; open my $fh, '-|', 'sort', '-u', $filename or return; return unless $fh; my @lines = <$fh>; return unless scalar(@lines) == 4; return unless defined $lines[3]; $lines[3] =~ s{\s+}{}gmxs; return unless $lines[3] eq 'two'; return 1; } sub sort_filehandle { my ($filename, $config) = @_; $config ||= {}; state $has_sort = (!$config->{internal_sort}) && _test_external_sort(); if ($has_sort) { my $fh; eval { open $fh, '-|', 'sort', '-u', $filename } and return $fh; WARN 'cannot use system sort, falling back to internal implementation'; $has_sort = 0; # from now on, use internal sort } return internal_sort_filehandle($filename); } sub internal_sort_filehandle { my ($filename) = @_; # Open input stream open my $ifh, '<', $filename or LOGDIE "open('$filename'): $OS_ERROR"; # Maximum values hints taken from Perl Power Tools' sort my $max_records = $ENV{SETS_MAX_RECORDS} || 200_000; my $max_files = $ENV{SETS_MAX_FILES} || 40; my (@records, @fhs); while (<$ifh>) { chomp; push @records, $_; if (@records >= $max_records) { push @fhs, _flush_to_temp(\@records); _compact(\@fhs) if @fhs >= $max_files - 1; } } push @fhs, _flush_to_temp(\@records) if @records; _compact(\@fhs); return $fhs[0] if @fhs; # seems like the file was empty... so it's sorted seek $ifh, 0, SEEK_SET; return $ifh; } sub _flush_to_temp { my ($records) = @_; my $tfh = tempfile(UNLINK => 1); my $previous; for my $item (sort @$records) { next if defined($previous) && $previous eq $item; print {$tfh} $item, $INPUT_RECORD_SEPARATOR; } @$records = (); seek $tfh, 0, SEEK_SET; return $tfh; } sub _compact { my ($fhs) = @_; return if @$fhs == 1; # where the output will end up my $ofh = tempfile(UNLINK => 1); # convenience hash for tracking all contributors my %its = map { my $fh = $fhs->[$_]; my $head = <$fh>; if (defined $head) { chomp($head); $_ => [ $fh, $head ]; } else { () } } 0 .. $#$fhs; # iterate until all contributors are exhausted while (scalar keys %its) { # select the best (i.e. "lower"), cleanup on the way my ($fk, @keys) = keys %its; my $best = $its{$fk}[1]; for my $key (@keys) { my $head = $its{$key}[1]; $best = $head if $best gt $head; } print {$ofh} $best, $INPUT_RECORD_SEPARATOR; # get rid of the best in all iterators, cleanup on the way KEY: for my $key ($fk, @keys) { my $head = $its{$key}[1]; while ($head eq $best) { $head = readline $its{$key}[0]; if (defined $head) { chomp($its{$key}[1] = $head); } else { delete $its{$key}; next KEY; } } } } # rewind, finalize compacting, return seek $ofh, 0, SEEK_SET; @$fhs = ($ofh); return; } 1; __END__ END_OF_FILE # __MOBUNDLE_FILE__ 'Log/Log4perl/Tiny.pm' => <<'END_OF_FILE', package Log::Log4perl::Tiny; $Log::Log4perl::Tiny::VERSION = '1.2.9'; # ABSTRACT: mimic Log::Log4perl in one single module use warnings; use strict; use Carp; our ($TRACE, $DEBUG, $INFO, $WARN, $ERROR, $FATAL, $OFF, $DEAD); my ($_instance, %name_of, %format_for, %id_for); my $LOGDIE_MESSAGE_ON_STDERR = 1; sub import { my ($exporter, @list) = @_; my ($caller, $file, $line) = caller(); no strict 'refs'; if (grep { $_ eq ':full_or_fake' } @list) { @list = grep { $_ ne ':full_or_fake' } @list; my $sue = 'use Log::Log4perl (@list)'; eval " package $caller; $sue; 1; " and return; unshift @list, ':fake'; } ## end if (grep { $_ eq ':full_or_fake'... my (%done, $level_set); ITEM: for my $item (@list) { next ITEM if $done{$item}; $done{$item} = 1; if ($item =~ /^[a-zA-Z]/mxs) { *{$caller . '::' . $item} = \&{$exporter . '::' . $item}; } elsif ($item eq ':levels') { for my $level (qw( TRACE DEBUG INFO WARN ERROR FATAL OFF DEAD )) { *{$caller . '::' . $level} = \${$exporter . '::' . $level}; } } elsif ($item eq ':subs') { push @list, qw( ALWAYS TRACE DEBUG INFO WARN ERROR FATAL LOGWARN LOGDIE LOGEXIT LOGCARP LOGCLUCK LOGCROAK LOGCONFESS get_logger ); } ## end elsif ($item eq ':subs') elsif ($item =~ /\A : (mimic | mask | fake) \z/mxs) { # module name as a string below to trick Module::ScanDeps if (!'Log::Log4perl'->can('easy_init')) { $INC{'Log/Log4perl.pm'} = __FILE__; *Log::Log4perl::import = sub { }; *Log::Log4perl::easy_init = sub { my ($pack, $conf) = @_; if (ref $conf) { $_instance = __PACKAGE__->new($conf); $_instance->level($conf->{level}) if exists $conf->{level}; $_instance->format($conf->{format}) if exists $conf->{format}; $_instance->format($conf->{layout}) if exists $conf->{layout}; } ## end if (ref $conf) elsif (defined $conf) { $_instance->level($conf); } }; } ## end if (!'Log::Log4perl'->can... } ## end elsif ($item =~ /\A : (mimic | mask | fake) \z/mxs) elsif ($item eq ':easy') { push @list, qw( :levels :subs :fake ); } elsif (lc($item) eq ':dead_if_first') { get_logger()->_set_level_if_first($DEAD); $level_set = 1; } elsif (lc($item) eq ':no_extra_logdie_message') { $LOGDIE_MESSAGE_ON_STDERR = 0; } } ## end for my $item (@list) if (!$level_set) { my $logger = get_logger(); $logger->_set_level_if_first($INFO); $logger->level($logger->level()); } return; } ## end sub import sub new { my $package = shift; my %args = ref($_[0]) ? %{$_[0]} : @_; $args{format} = $args{layout} if exists $args{layout}; my $channels_input = [ fh => \*STDERR ]; if (exists $args{channels}) { $channels_input = $args{channels}; } else { for my $key (qw< file_append file_create file_insecure file fh >) { next unless exists $args{$key}; $channels_input = [ $key => $args{$key} ]; last; } } my $channels = build_channels($channels_input); $channels = $channels->[0] if @$channels == 1; # remove outer shell my $self = bless { fh => $channels, level => $INFO, }, $package; for my $accessor (qw( level fh format )) { next unless defined $args{$accessor}; $self->$accessor($args{$accessor}); } $self->format('[%d] [%5p] %m%n') unless exists $self->{format}; return $self; } ## end sub new sub build_channels { my @pairs = (@_ && ref($_[0])) ? @{$_[0]} : @_; my @channels; while (@pairs) { my ($key, $value) = splice @pairs, 0, 2; # some initial validation croak "build_channels(): undefined key in list" unless defined $key; croak "build_channels(): undefined value for key $key" unless defined $value; # analyze the key-value pair and set the channel accordingly my ($channel, $set_autoflush); if ($key =~ m{\A(?: fh | sub | code | channel )\z}mxs) { $channel = $value; } elsif ($key eq 'file_append') { open $channel, '>>', $value or croak "open('$value') for appending: $!"; $set_autoflush = 1; } elsif ($key eq 'file_create') { open $channel, '>', $value or croak "open('$value') for creating: $!"; $set_autoflush = 1; } elsif ($key =~ m{\A file (?: _insecure )? \z}mxs) { open $channel, $value or croak "open('$value'): $!"; $set_autoflush = 1; } else { croak "unsupported channel key '$key'"; } # autoflush new filehandle if applicable if ($set_autoflush) { my $previous = select($channel); $|++; select($previous); } ## end if (exists $args{file}) # record the channel, on to the next push @channels, $channel; } return \@channels; } sub get_logger { return $_instance ||= __PACKAGE__->new(); } sub LOGLEVEL { return get_logger()->level(@_); } sub LEVELID_FOR { my $level = shift; return $id_for{$level} if exists $id_for{$level}; return; } sub LEVELNAME_FOR { my $id = shift; return $name_of{$id} if exists $name_of{$id}; return $id if exists $id_for{$id}; return; } sub format { my $self = shift; if (@_) { $self->{format} = shift; $self->{args} = \my @args; my $replace = sub { my ($num, $op) = @_; return '%%' if $op eq '%'; return "%%$op" unless defined $format_for{$op}; push @args, $op; return "%$num$format_for{$op}[0]"; }; # transform into real format my $format_chars = join '', keys %format_for; $self->{format} =~ s{ % # format marker ( -? \d* (?:\.\d+)? ) # number ([$format_chars]) # specifier } { $replace->($1, $2); }gsmex; } ## end if (@_) return $self->{format}; } ## end sub format *layout = \&format; sub emit_log { my ($self, $message) = @_; my $fh = $self->{fh}; for my $channel ((ref($fh) eq 'ARRAY') ? (@$fh) : ($fh)) { (ref($channel) eq 'CODE') ? $channel->($message, $self) : print {$channel} $message; } return; } sub log { my $self = shift; return if $self->{level} == $DEAD; my $level = shift; return if $level > $self->{level}; my %data_for = ( level => $level, message => \@_, ); my $message = sprintf $self->{format}, map { $format_for{$_}[1]->(\%data_for); } @{$self->{args}}; return $self->emit_log($message); } ## end sub log sub ALWAYS { return $_instance->log($OFF, @_); } sub _exit { my $self = shift || $_instance; exit $self->{logexit_code} if defined $self->{logexit_code}; exit $Log::Log4perl::LOGEXIT_CODE if defined $Log::Log4perl::LOGEXIT_CODE; exit 1; } ## end sub _exit sub logwarn { my $self = shift; $self->warn(@_); # default warning when nothing is passed to warn push @_, "Warning: something's wrong" unless @_; # add 'at line ' unless argument ends in "\n"; my (undef, $file, $line) = caller(1); push @_, sprintf " at %s line %d.\n", $file, $line if substr($_[-1], -1, 1) ne "\n"; # go for it! CORE::warn(@_) if $LOGDIE_MESSAGE_ON_STDERR; } ## end sub logwarn sub logdie { my $self = shift; $self->fatal(@_); # default die message when nothing is passed to die push @_, "Died" unless @_; # add 'at line ' unless argument ends in "\n"; my (undef, $file, $line) = caller(1); push @_, sprintf " at %s line %d.\n", $file, $line if substr($_[-1], -1, 1) ne "\n"; # go for it! CORE::die(@_) if $LOGDIE_MESSAGE_ON_STDERR; $self->_exit(); } ## end sub logdie sub logexit { my $self = shift; $self->fatal(@_); $self->_exit(); } sub logcarp { my $self = shift; require Carp; $Carp::Internal{$_} = 1 for __PACKAGE__; if ($self->is_warn()) { # avoid unless we're allowed to emit my $message = Carp::shortmess(@_); $self->warn($_) for split m{\n}mxs, $message; } if ($LOGDIE_MESSAGE_ON_STDERR) { local $Carp::CarpLevel = $Carp::CarpLevel + 1; Carp::carp(@_); } return; } ## end sub logcarp sub logcluck { my $self = shift; require Carp; $Carp::Internal{$_} = 1 for __PACKAGE__; if ($self->is_warn()) { # avoid unless we're allowed to emit my $message = Carp::longmess(@_); $self->warn($_) for split m{\n}mxs, $message; } if ($LOGDIE_MESSAGE_ON_STDERR) { local $Carp::CarpLevel = $Carp::CarpLevel + 1; Carp::cluck(@_); } return; } ## end sub logcluck sub logcroak { my $self = shift; require Carp; $Carp::Internal{$_} = 1 for __PACKAGE__; if ($self->is_fatal()) { # avoid unless we're allowed to emit my $message = Carp::shortmess(@_); $self->fatal($_) for split m{\n}mxs, $message; } if ($LOGDIE_MESSAGE_ON_STDERR) { local $Carp::CarpLevel = $Carp::CarpLevel + 1; Carp::croak(@_); } $self->_exit(); } ## end sub logcroak sub logconfess { my $self = shift; require Carp; $Carp::Internal{$_} = 1 for __PACKAGE__; if ($self->is_fatal()) { # avoid unless we're allowed to emit my $message = Carp::longmess(@_); $self->fatal($_) for split m{\n}mxs, $message; } if ($LOGDIE_MESSAGE_ON_STDERR) { local $Carp::CarpLevel = $Carp::CarpLevel + 1; Carp::confess(@_); } $self->_exit(); } ## end sub logconfess sub level { my $self = shift; $self = $_instance unless ref $self; if (@_) { my $level = shift; return unless exists $id_for{$level}; $self->{level} = $id_for{$level}; $self->{_count}++; } ## end if (@_) return $self->{level}; } ## end sub level sub _set_level_if_first { my ($self, $level) = @_; if (!$self->{_count}) { $self->level($level); delete $self->{_count}; } return; } ## end sub _set_level_if_first BEGIN { # Time tracking's start time. Used to be tied to $^T but Log::Log4perl # does differently and uses Time::HiRes if available my $start_time = time(); # default, according to Log::Log4perl my $has_time_hires; eval { require Time::HiRes; $has_time_hires = 1; $start_time = [ Time::HiRes::gettimeofday() ]; }; # For supporting %R my $last_log = $start_time; # %format_for idea from Log::Tiny by J. M. Adler %format_for = ( # specifiers according to Log::Log4perl c => [s => sub { 'main' }], C => [ s => sub { my ($internal_package) = caller 0; for my $i (1 .. 4) { my ($package) = caller $i; last unless defined $package; return $package if $package ne $internal_package; } return '*undef*'; } ], d => [ s => sub { my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(); sprintf '%04d/%02d/%02d %02d:%02d:%02d', $year + 1900, $mon + 1, $mday, $hour, $min, $sec; } ], F => [ s => sub { my ($internal_package) = caller 0; for my $i (1 .. 4) { my ($package, $file) = caller $i; last unless defined $package; return $file if $package ne $internal_package; } return '*undef*'; } ], H => [ s => sub { eval { require Sys::Hostname; Sys::Hostname::hostname() } || ''; } ], l => [ s => sub { my (undef, undef, undef, $subroutine) = caller(4); my (undef, $filename, $line) = caller(3); sprintf '%s %s (%d)', $subroutine, $filename, $line; } ], L => [ d => sub { my ($internal_package) = caller 0; for my $i (1 .. 4) { my ($package, undef, $line) = caller $i; last unless defined $package; return $line if $package ne $internal_package; } return -1; } ], m => [ s => sub { join( (defined $, ? $, : ''), map { ref($_) eq 'CODE' ? $_->() : $_; } @{shift->{message}} ); }, ], M => [ s => sub { my ($internal_package) = caller 0; for my $i (1 .. 4) { my ($package) = caller $i; last unless defined $package; return (caller($i + 1))[3] if $package ne $internal_package; } return '*undef*'; } ], n => [s => sub { "\n" },], p => [s => sub { $name_of{shift->{level}} },], P => [d => sub { $$ },], r => [d => ( $has_time_hires # install sub depending on Time::HiRes ? sub { my ($s, $m) = Time::HiRes::gettimeofday(); $s -= $start_time->[0]; $m = int(($m - $start_time->[1]) / 1000); ($s, $m) = ($s - 1, $m + 1000) if $m < 0; return $m + 1000 * $s; } : sub { return 1000 * (time() - $start_time); } ) ], R => [d => ( $has_time_hires # install sub depending on Time::HiRes ? sub { my ($sx, $mx) = Time::HiRes::gettimeofday(); my $s = $sx - $last_log->[0]; my $m = int(($mx - $last_log->[1]) / 1000); ($s, $m) = ($s - 1, $m + 1000) if $m < 0; $last_log = [ $sx, $mx ]; return $m + 1000 * $s; } : sub { my $l = $last_log; return 1000 * (($last_log = time()) - $l); } ) ], T => [ s => sub { my $level = 4; my @chunks; while (my @caller = caller($level++)) { push @chunks, "$caller[3]() called at $caller[1] line $caller[2]"; } join ', ', @chunks; }, ], ); # From now on we're going to play with GLOBs... no strict 'refs'; for my $name (qw( FATAL ERROR WARN INFO DEBUG TRACE )) { # create the ->level methods *{__PACKAGE__ . '::' . lc($name)} = sub { my $self = shift; return $self->log($$name, @_); }; # create ->is_level and ->isLevelEnabled methods as well *{__PACKAGE__ . '::is' . ucfirst(lc($name)) . 'Enabled'} = *{__PACKAGE__ . '::is_' . lc($name)} = sub { return 0 if $_[0]->{level} == $DEAD || $$name > $_[0]->{level}; return 1; }; } ## end for my $name (qw( FATAL ERROR WARN INFO DEBUG TRACE )) for my $name ( qw( FATAL ERROR WARN INFO DEBUG TRACE LOGWARN LOGDIE LOGEXIT LOGCARP LOGCLUCK LOGCROAK LOGCONFESS ) ) { *{__PACKAGE__ . '::' . $name} = sub { $_instance->can(lc $name)->($_instance, @_); }; } ## end for my $name (qw( FATAL ERROR WARN INFO DEBUG TRACE... for my $accessor (qw( fh logexit_code )) { *{__PACKAGE__ . '::' . $accessor} = sub { my $self = shift; $self = $_instance unless ref $self; $self->{$accessor} = shift if @_; return $self->{$accessor}; }; } ## end for my $accessor (qw( fh logexit_code )) my $index = -1; for my $name (qw( DEAD OFF FATAL ERROR WARN INFO DEBUG TRACE )) { $name_of{$$name = $index} = $name; $id_for{$name} = $index; $id_for{$index} = $index; ++$index; } ## end for my $name (qw( DEAD OFF FATAL ERROR WARN INFO DEBUG TRACE )) get_logger(); # initialises $_instance; } ## end BEGIN 1; # Magic true value required at end of module __END__ =pod =encoding UTF-8 =head1 NAME Log::Log4perl::Tiny - mimic Log::Log4perl in one single module =head1 VERSION version 1.2.9 =head1 DESCRIPTION Yes... yet another logging module. Nothing particularly fancy nor original, too, but a single-module implementation of the features I use most from L for quick things, namely: =over =item * easy mode and stealth loggers (aka log functions C, C, etc.); =item * debug message filtering by log level; =item * line formatting customisation; =item * quick sending of messages to a log file. =back There are many, many things that are not included; probably the most notable one is the ability to provide a configuration file. =head2 Why? I have really nothing against L, to the point that one of the import options is to check whether L is installed and use it if possible. I just needed to crunch the plethora of modules down to a single-file module, so that I can embed it easily in scripts I use in machines where I want to reduce my impact as much as possible. =head2 Log Levels L implements all I L's log levels, without the possibility to change them. The correspondent values are available in the following variables (in order of increasing severity or I): =over =item C<< $TRACE >> =item C<< $DEBUG >> =item C<< $INFO >> =item C<< $WARN >> =item C<< $ERROR >> =item C<< $FATAL >> =back The default log level is C<$INFO>. In addition to the above, the following levels are defined as well: =over =item C<< $OFF >> also in L, useful to turn off all logging except for C =item C<< $DEAD >> not in L, when the threshold log level is set to this value every log is blocked (even when called from the C stealth logger). =back You can import these variables using the C<:levels> import facility, or you can use the directly from the L namespace. They are imported automatically if the C<:easy> import option is specified. =head3 Default Log Level As of version 1.1.0 the default logging level is still C<$INFO> like any previous version, but it is possible to modify this value to C<$DEAD> through the C<:dead_if_first> import key. This import key is useful to load Log::Log4perl in modules that you want to publish but where you don't want to force the end user to actually use it. In other terms, if you do this: package My::Module; use Log::Log4perl::Tiny qw( :easy :dead_if_first ); you will import all the functionalities associated to C<:easy> but will silence the logger off I somewhere else the module is loaded (and imported) without this option. In this way: =over =item * if the user of your module does I import L, all log messages will be dropped (thanks to the log level set to C<$DEAD>) =item * otherwise, if the user imports L without the option, the log level will be set to the default value (unless it has already been explicitly set somewhere else). =back =head2 Easy Mode Overview I love L's easy mode because it lets you set up a sophisticated logging infrastructure with just a few keystrokes: use Log::Log4perl qw( :easy ); Log::Log4perl->easy_init({ file => '>>/var/log/something.log', layout => '[%d] [%-5P:%-5p] %m%n', level => $INFO, }); INFO 'program started, yay!'; use Data::Dumper; DEBUG 'Some stuff in main package', sub { Dumper(\%main::) }; If you want, you can replicate it with just a change in the first line: use Log::Log4perl::Tiny qw( :easy ); Log::Log4perl->easy_init({ file => '>>/var/log/something.log', layout => '[%d] [%-5P:%-5p] %m%n', level => $INFO, }); INFO 'program started, yay!'; use Data::Dumper; DEBUG 'Some stuff in main package', sub { Dumper(\%main::) }; Well... yes, I'm invading the L namespace in order to reduce the needed changes as mush as possible. This is useful when I begin using L and then realise I want to make a single script with all modules embedded. There is also another reason why I put C in L namespace: use Log::Log4perl::Tiny qw( :full_or_fake :easy ); Log::Log4perl->easy_init({ file => '>>/var/log/something.log', layout => '[%d] [%-5P:%-5p] %m%n', level => $INFO, }); INFO 'program started, yay!'; use Data::Dumper; DEBUG 'Some stuff in main package', sub { Dumper(\%main::) }; With import option C, in fact, the module first tries to load L in the caller's namespace with the provided options (except C, of course), returning immediately if it is successful; otherwise, it tries to "fake" L and installs its own logging functions. In this way, if L is available it will be used, but you don't have to change anything if it isn't. Easy mode tries to mimic what L does, or at least the things that (from a purely subjective point of view) are most useful: C and I. =head2 C L only supports three options from the big brother: =over =item C<< level >> the log level threshold. Logs sent at a higher or equal priority (i.e. at a more I level, or equal) will be printed out, the others will be ignored. The default value is C<$INFO>; =item C<< file >> a file name where to send the log lines. For compatibility with L, a 2-arguments C will be performed, which means you can easily set the opening mode, e.g. C<<< >>filename >>>. Note that the 2-arguments C is intrinsically insecure and will trigger the following error when running setuid: Insecure dependency in open while running setuid so be sure to use either C or C instead if you're running setuid. These are extensions added by Log::Log4perl::Tiny to cope with this specific case (and also to allow you avoid the 2-args C anyway). Another Log::Log4perl::Tiny extension added as of version 1.3.0 is the key C where you can pass an array reference with channels descriptions (see L for details). The default is to send logging messages to C; =item C<< layout >> the log line layout (it can also be spelled C, they are synonims). The default value is the following: [%d] [%5p] %m%n which means I. =back If you call C with a single unblessed scalar, it is considered to be the C and it will be set accordingly. Otherwise, you have to pass a hash ref with the keys above. In addition to the above keys, the C method installed by Log::Log4perl::Tiny also accepts all keys defined for L, e.g. C (an alias for C) and the different alternatives to C (C, C and C). =head2 Stealth Loggers Stealth loggers are functions that emit a log message at a given severity; they are installed when C<:easy> mode is turned on (see L). They are named after the corresponding level: =over =item C<< TRACE >> =item C<< DEBUG >> =item C<< INFO >> =item C<< WARN >> =item C<< ERROR >> =item C<< FATAL >> =back Additionally, you get the following logger functions (again, these are in line with L): =over =item C<< ALWAYS >> emit log whatever the configured logging level, apart from C<$DEAD> that disables all logging; =item C<< LOGWARN >> emit log at C level and then C it; =item C<< LOGDIE >> emit log at C level, C and then exit (if C didn't already exit); =item C<< LOGEXIT >> emit log at C level and then exit; =item C<< LOGCARP >> emit log at C level and then call C; =item C<< LOGCLUCK >> emit log at C level and then call C; =item C<< LOGCROAK >> emit log at C level and then call C; =item C<< LOGCONFESS >> emit log at C level and then call C; =back If you want to set the exit code for C above (and C as well, in case C does not exit by itself), you can go "the L way" and set C<$Log::Log4perl::LOGEXIT_CODE>, or set a code with C - but you have to wait to read something about the object-oriented interface before doing this! There is also one additional stealth function that L misses but that I think is of the outmoste importance: C, to set the log level threshold for printing. If you want to be 100% compatible with Log::Log4perl, anyway, you should rather do the following: get_logger()->level(...); # instead of LOGLEVEL(...) This function does not get imported when you specify C<:easy>, anyway, so you have to import it explicitly. This will help you remembering that you are deviating from L. =head2 Emitting Logs To emit a log, you can call any of the stealth logger functions or any of the corresponding log methods. All the parameters that you pass are sent to the output stream as they are, except code references that are first evaluated. This lets you embed costly evaluations (e.g. generate heavy dumps of variabls) inside subroutines, and avoid the cost of evaluation in case the log is filtered out: use Data::Dumper; LOGLEVEL($INFO); # cut DEBUG and TRACE out TRACE 'costly evaluation: ', sub { Dumper($heavy_object) }; # Dumper() is not actually called because DEBUG level is # filtered out If you use the C method, the first parameter is the log level, then the others are interpreted as described above. =head2 Log Line Layout The log line layout sets the contents of a log line. The layout is configured as a C-like string, with placeholder identifiers that are modeled (with simplifications) after L's ones: %c Category of the logging event. %C Fully qualified package (or class) name of the caller %d Current date in yyyy/MM/dd hh:mm:ss format %F File where the logging event occurred %H Hostname %l Fully qualified name of the calling method followed by the callers source the file name and line number between parentheses. %L Line number within the file where the log statement was issued %m The message to be logged %M Method or function where the logging request was issued %n Newline (OS-independent) %p Priority of the logging event %P pid of the current process %r Number of milliseconds elapsed from program start to logging event %R Number of milliseconds elapsed from last logging event including a %R to current logging event %% A literal percent (%) sign Notably, both C<%x> (NDC) and C<%X> (MDC) are missing. Moreover, the extended specifier feature with additional info in braces (like C<%d{HH:mm}>) is missing, i.e. the structure of each specifier above is fixed. (Thanks to C for the cool trick of how to handle the C-like string, which is probably mutuated from C itself according to the comments). =head1 SYNOPSYS use Log::Log4perl::Tiny qw( :easy ); Log::Log4perl->easy_init({ file => '/var/log/something.log', layout => '[%d] [%-5P:%-5p] %m%n', level => $INFO, }); WARN 'something weird happened'; INFO 'just doing it'; DEBUG 'this does not get printed at $INFO level'; # LOGLEVEL isn't in Log::Log4perl, but might come handy LOGLEVEL($DEBUG); # enable debugging for small section # otherwise, "get_logger()->level($DEBUG)", see below DEBUG 'now this gets printed'; LOGLEVEL($INFO); # disable debugging again DEBUG 'skipped, again'; DEBUG 'complex evaluation value:', sub { # evaluation skipped if log level filters DEBUG out }; # Object-oriented interface is available as well my $logger = get_logger(); $logger->level($DEBUG); # enable debugging for small section $logger->debug('whatever you want'); $logger->level($INFO); # disable debugging again # All stealth loggers are available LOGCONFESS 'I cannot accept this, for a whole stack of reasons!'; # Want to change layout? $logger->layout('[%d %p] %m%n'); # or, equivalently $logger->format('[%d %p] %m%n'); # Want to send the output somewhere else? use IO::Handle; open my $fh, '>>', '/path/to/new.log'; $fh->autoflush(); $logger->fh($fh); # Want to multiplex output to different channels? $logger->fh( build_channels( fh => \*STDERR, file_create => '/var/log/lastrun.log', file_append => '/var/log/overall.log', ) ); # Want to handle the output message by yourself? my @queue; # e.g. all log messages will be put here $logger->fh(sub { push @queue, $_[0] }); =head1 INTERFACE You have two interfaces at your disposal, the functional one (with all the stealth logger functions) and the object-oriented one (with explicit actions upon a logger object). Choose your preferred option. =head2 Functional Interface The functional interface sports the following functions (imported automatically when C<:easy> is passed as import option except for C, C and C): =over =item C<< TRACE >> =item C<< DEBUG >> =item C<< INFO >> =item C<< WARN >> =item C<< ERROR >> =item C<< FATAL >> stealth logger functions, each emits a log at the corresponding level; =item C<< ALWAYS >> emit log whatever the configured logging level (except C<$DEAD>); =item C<< LEVELID_FOR >> returns the identifier related to a certain level. The input level can be either a name or an identifier itself. Returns C if it is neither. It can be used e.g. if you want to use L but you only have the level name, not its identifier; =item C<< LEVELNAME_FOR >> returns the name related to a certain level. The input level can be either a name or an identifier itself. Returns C if it is neither. =item C<< LOGWARN >> emit log at C level and then C it; =item C<< LOGDIE >> emit log at C level, C and then exit (if C didn't already exit); =item C<< LOGEXIT >> emit log at C level and then exit; =item C<< LOGCARP >> emit log at C level and then call C; =item C<< LOGCLUCK >> emit log at C level and then call C; =item C<< LOGCROAK >> emit log at C level and then call C; =item C<< LOGCONFESS >> emit log at C level and then call C; =item C<< LOGLEVEL >> (Not in L) (Not imported with C<:easy>) set the minimum log level for sending a log message to the output; =item C<< build_channels >> (Not in L) (Not imported with C<:easy>) build multiple channels for emitting logs. my $channels = build_channels(@key_value_pairs); # OR my $channels = build_channels(\@key_value_pairs); The input is a sequence of key-value pairs, provided either as a list or through a reference to an array containing them. They are not forced into a hash because the same key can appear multiple times to initialize multiple channels. The key specifies the type of the channel, while the value is specific to the key: =over =item B<< fh >> value is a filehandle (or anything that can be passed to the C function) =item B<< sub >> =item B<< code >> value is a reference to a subroutine. This will be called with two positional parameters: the message (already properly formatted) and a reference to the logger message =item B whatever can be passed to keys C or to C/C above =item B<< file >> =item B<< file_insecure >> =item B<< file_create >> =item B<< file_append >> value is the file where log data should be sent. The first one is kept for compliance with Log::Log4perl::easy_init's way of accepting a file. It eventually results in a two-arguments C call, so that you can quickly set how you want to open the file: file => '>>/path/to/appended', # append mode file => '>/path/to/new-file', # create mode You should avoid doing this, because it is intrinsically insecure and will yield an error message when running setuid: Insecure dependency in open while running setuid C is an alias to C, so that you can explicitly signal to the maintainer that you know what you're doing. C and C will use the three-arguments C call and thus they don't trigger the error above when running setuid. As the respective names suggest the former creates the file from scratch (possibly deleting any previous file with the same path) while the latter opens the file in append mode. =back =back =head2 Object-Oriented Interface The functional interface is actually based upon actions on a pre-defined fixed instance of a C object, so you can do the same with a logger object as well: =over =item C<< get_logger >> this function gives you the pre-defined logger instance (i.e. the same used by the stealth logger functions described above). =item C<< new >> if for obscure reasons the default logger isn't what you want, you can get a brand new object! The constructor accepts either a list of key-values or a reference to a hash, supporting the following keys: =over =item B<< channels >> set a list (through an array reference) of channels. See L for additional information. =item B<< file >> =item B<< file_insecure >> =item B<< file_create >> =item B<< file_append >> set the file where the log data will be sent. The first one is kept for compliance with Log::Log4perl::easy_init's way of accepting a file. It eventually results in a two-arguments C, so you might want to take care when running in taint mode. See also L for additional information. This option takes precedence over C described below. =item B<< format >> =item B<< layout >> =item B<< level >> see L<< C >> and the methods below with the same name =item B<< fh >> see method C below =back =back The methods you can call upon the object mimic the functional interface, but with lowercase method names: =over =item C<< trace >> =item C<< debug >> =item C<< info >> =item C<< warn >> =item C<< error >> =item C<< fatal >> logging functions, each emits a log at the corresponding level; =item C<< is_trace >> =item C<< is_debug >> =item C<< is_info >> =item C<< is_warn >> =item C<< is_error >> =item C<< is_fatal >> =item C<< isTraceEnabled >> =item C<< isDebugEnabled >> =item C<< isInfoEnabled >> =item C<< isWarnEnabled >> =item C<< isErrorEnabled >> =item C<< isFatalEnabled >> log level test functions, each returns the status of the corresponding level; =item C<< always >> emit log whatever the configured logging level; =item C<< logwarn >> emit log at C level (if allowed) and C (always); =item C<< logdie >> emit log at C level, C and then exit (if C didn't already exit); =item C<< logexit >> emit log at C level and then exit; =item C<< logcarp >> emit log at C level and then call C; =item C<< logcluck >> emit log at C level and then call C; =item C<< logcroak >> emit log at C level and then call C; =item C<< logconfess >> emit log at C level and then call C; =back The main logging function is actually the following: =over =item C<< log >> the first parameter is the log level, the rest is the message to log apart from references to subroutines that are first evaluated =item C<< emit_log >> emit the message in the first positional parameter to all logging channels =back Additionally, you have the following accessors: =over =item C<< level >> get/set the minimum level for sending messages to the output stream. By default the level is set to C<$INFO>. =item C<< fh >> get/set the output channel. As an extention over L, you can also pass a reference to a subroutine or to an array. If you set a reference to a sub, it will be called with two parameters: the message that would be print and a reference to the logger object that is calling the sub. For example, if you simply want to collect the log messages without actually outputting them anywhere, you can do this: my @messages; get_logger()->fh(sub { my ($message, $logger) = @_; push @messages, $message; return; }); If you set a reference to an array, each item inside will be used for log output; its elements can be either filehandles or sub references, used as described above. This is a handy way to set multiple output channels (it might be implemented externally through a proper subroutine reference of course). By default this parameter is set to be equal to C. =item C<< format >> =item C<< layout >> get/set the line formatting; =item C<< logexit_code >> get/set the exit code to be used with C (and C as well if C doesn't exit). =back =head1 DEPENDENCIES None. =head1 BUGS AND LIMITATIONS No bugs have been reported. Please report any bugs or feature requests through http://rt.cpan.org/ =head1 SEE ALSO L is one of the most useful modules I ever used, go check it! =head1 AUTHOR Flavio Poletti =head1 COPYRIGHT AND LICENSE Copyright (C) 2010-2016 by Flavio Poletti . This module is free software. You can redistribute it and/or modify it under the terms of the Artistic License 2.0. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =cut END_OF_FILE # __MOBUNDLE_FILE__ ); unshift @INC, sub { my ($me, $packfile) = @_; return unless exists $file_for{$packfile}; (my $text = $file_for{$packfile}) =~ s/^\ //gmxs; chop($text); # added \n at the end open my $fh, '<', \$text or die "open(): $!\n"; return $fh; }; } ## end BEGIN # __MOBUNDLE_INCLUSION__ use Log::Log4perl::Tiny qw< :easy >; Log::Log4perl->easy_init( { layout => '[%d] [%-5p] %m%n', level => $INFO, } ); use App::Sets; App::Sets->run(@ARGV); __END__ =pod =head1 NAME sets - set operations in Perl =head1 VERSION version 0.974 =head1 USAGE sets [--usage] [--help] [--man] [--version] sets [--binmode|-b ] [--cache-sorted|-S ] [--internal-sort|-I] [--loglevel|-l ] [--sorted|-s] [--trim|-t] expression... =head1 EXAMPLES # intersect two files sets file1 ^ file2 # things are speedier when files are sorted sets -s sorted-file1 ^ sorted-file2 # you can use a bit caching in case, generating sorted files # automatically for possible multiple or later reuse. For example, # the following is the symmetric difference where the sorting of # the input files will be performed two times only sets -S .sorted '(file1 - file2) + (file2 - file1)' # In the example above, note that expressions with grouping need to be # specified in a single string. # sometimes leading and trailing whitespaces only lead to trouble, so # you can trim data on-the-fly sets -t file1-unix - file2-dos =head1 DESCRIPTION This program lets you perform set operations working on input files. The set operations that can be performed are the following: =over =item B<< intersection >> the binary operation that selects all the elements that are in both the left and the right hand operand. This operation can be specified with any of the following operators: =over =item B<< intersect >> =item B<< i >> =item B<< I >> =item B<< & >> =item B<< ^ >> =back =item B<< union >> the binary operation that selects all the elements that are in either the left or the right hand operand. This operation can be specified with any of the following operators: =over =item B<< union >> =item B<< u >> =item B<< U >> =item B<< v >> =item B<< V >> =item B<< | >> =item B<< + >> =back =item B<< difference >> the binary operation that selects all the elements that are in the left but not in the right hand operand. This operation can be specified with any of the following operators: =over =item B<< minus >> =item B<< less >> =item B<< \ >> =item B<< - >> =back =back Expressions can be grouped with parentheses, so that you can set the precedence of the operations and create complex aggregations. For example, the following expression computes the symmetric difference between the two sets: (set1 - set2) + (set2 - set1) Expressions should be normally entered as a single string that is then parsed. In case of I operations (e.g. one operation on two sets) you can also provide multiple arguments. In other terms, the following invocations should be equivalent: sets 'set1 - set2' sets set1 - set2 Options can be specified only as the first parameters. If your first set begins with a dash, use a double dash to explicitly terminate the list of options, e.g.: sets -- -first-set ^ -second-set In general, anyway, the first non-option argument terminates the list of options as well, so the example above would work also without the C<-->. In the pathological case that your file is named C<-s>, anyway, you would need the explicit termination of options with C<-->. You get the idea. Files with spaces and other weird stuff can be specified by means of quotes or escapes. The following are all valid methods of subtracting C from C: sets "'input file' - 'to remove'" sets '"input file" - "to remove"' sets 'input\ file - to\ remove' sets "input\\ file - to\\ remove" sets input\ file - to\ remove The first two examples use single and double quoting. The third example uses a backslash to escape the spaces, as well as the fourth example in which the escape character is repeated due to the interpolation rules of the shell. The last example leverages upon the shell rules for escaping AND the fact that simple expressions like that can be specified as multiple arguments instead of a single string. =head1 OPTIONS =over =item --binmode | -b I set a string for calling C on STDOUT. By default, C<:raw:encoding(UTF-8)> is set, to normalize newlines handling and expect UTF-8 data in. =item --cache-sorted | -S I input files are sorted and saved into a file with the same name and the I appended, so that if this file exists it is used instead of the input file. In this way it is possible to generate sorted files on the fly and reuse them if available. For example, suppose that you want to remove the items in C from files C and C; in the following invocations: sets file1 - removeme > file1.filtered sets file2 - removeme > file2.filtered we have that file C would be sorted in both calls, while in the following ones: sets -S .sorted file1 - removeme > file1.filtered sets -S .sorted file2 - removeme > file2.filtered it would be sorted only in the first call, that generates C that is then reused by the second call. Of course you're trading disk space for speed here, but most of the times it is exactly what you want to do when you have disk space but little time to wait. This means that most of the times you'll e wanting to use this option, I you're willing to wait more or you already know that input files are sorted (in which case you would use C<< --sorted | -s >> instead). =item --help print a somewhat more verbose help, showing usage, this description of the options and some examples from the synopsis. =item --internal-sort | -I force using the internal sorting facility even if external C is available. Some rough benchmark showed that this is slower about 7% with respect to using the external C, so avoid this if you can. =item --loglevel | -l I set the verbosity of the logging subsystem. Allowed values (in increasing verbosity): C, C, C, C, C and C. =item --man print out the full documentation for the script. =item --sorted | -s in normal mode, input files are sorted on the fly before being used. If you know that I your input files are already sorted, you can spare the extra sorting operation by using this option: sets -s file1.sorted ^ file2.sorted =item --trim | -t if you happen to have leading and/or trailing white spaces (including tabs, carriage returns, etc.) that you want to get rid of, you can turn this option on. This is particularly useful if some files come from the UNIX world and other ones from the DOS world, becaue they have different ideas about terminating a line. =item --usage print a concise usage line and exit. =item --version print the version of the script. =back =head1 ENVIRONMENT Some options can be set from the environment: =over =item C the same as specifying C<< --cache-sorted | -S I >> on the command line. The contents of C is used as the I. =item C the same as specifying C<< --internal-sort | -I >> on the command line. =item C maximum number of (temporary) files to keep when using the internal sorting facility. =item C maximum number of input records to keep in memory when using the internal sorting facility. =item C the same as specifying C<--sorted | -s> on the command line =item C the same as specifying C<--trim | -t> on the command line =back =cut =head1 AUTHOR Flavio Poletti =head1 COPYRIGHT AND LICENSE Copyright (C) 2011-2016 by Flavio Poletti polettix@cpan.org. This module is free software. You can redistribute it and/or modify it under the terms of the Artistic License 2.0. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =cut