#!perl use 5.008; $^W = 1; # use strict; $SIG{INT} = sub { exit(2) }; ${^WIN32_SLOPPY_STAT} = 1; my @Warnings; $SIG{__WARN__} = sub { push @Warnings, @_ }; our $VERSION = '2.24'; our ($Bin_dir, %Col, $Col_Reset, %Env, @FileFind_opts, $File, $Filepath, $HOME_dir, $Msg_rs, $Newline, @P, %Peg_longopt, %Peg_S, $Verbose, $Z); my $Usage = <<"EOT"; Usage: peg [OPTION]... PERLEXPR [FILE]... Try `peg --help' for more information. EOT my (@Before, @Cmdline_dirs, @Cmdline_files, @F, %F, %Globbed, @Ini_files, @Is_ascii_text, @Matched_files, %Opts, @Peg_options_ARGV, @Perlexpr, @Perlexpr_k, @S, %S); my ($Beep, $Binary_file, $Buffer_contents, $Buffer_fh, $Bytes_read, $C, $Code_after_open, $Code_at_end, $Code_before_close, $Code_before_open, $Code_per_line, $Console_width, $Context_line, $Context_line2, $Context_lineno, $Context_lineno2, $Context_matcher, $Context_matcher2, $Count, $CRLF_to_newline, $CRs, $Do_globbing, $Err, $First, $Found, $Guess_encoding, $Implicit_C, $Input_encoding, $Input_record_separator, $Inside_archive, $JJ_gap, $Last_matches_file, $Line_matched, $Match_failed, $Matched, $Matched_before, $Matches, $Max_matches, $MTime, $MTime_new, $MTime_old, $Needs_crlf_layer, $Newline_literal, $Offset, $Opt_d, $Opt_m, $Opt_oo, $Opt_p_expr, $Opt_pp_code, $Opt_pp_expr, $Opt_r_cmd, $Opt_r_fork, $Opt_s, $Opt_ss, $Opt_y, $Opt_yy, $Output_BOM, $Output_encoding, $P, $Perlexpr, $Print_context_matcher, $Printed_Context_line, $Printed_Context_line2, $S_depth, $S_F, $S_FILE, $S_handler_re, $S_nonarchive_re, $Search, $Search_STDIN, $Simple_Perlexpr, $Size, $Slurp, $Slurp_maxsize, $Start_time, $STDIN_is_terminal, $STDOUT_is_terminal, $Wide_chars); my ($Total_bytes, $Total_files, $Total_lines) = (0, 0, 0); my ($DQ, $SQ, $BT, $EM, $GT, $LT, $PC) = qw( " ' ` ! > < % ); my $R_file = 'peg_' . time . ($< || '') . abs($$) . '.txt'; my ($Worker_count, $Worker_work) = (2, 24); my $Is_Win32 = $^O eq 'MSWin32'; my ($After, $Before) = (2, 2); my $Called = caller(); END { close STDOUT or die_("can't close STDOUT: $!") }; load_ini_files(); process_ARGV(); process_options1(); build_Perlexpr(); process_options2(); build_search(); show_debug() if $Opts{D}; run(); $Called ? return : exit(@Matched_files ? 0 : 1); sub eval_ { eval $_[0] } sub chomp_ { $_[0] =~ s/\015?[\012\015]\z// } sub autoflush { select((select(shift), $| = 1)[0]) } sub warn_ { my $msg = join '', @_; chomp_ $msg; print STDERR "peg: $msg\n"; } # warn_ sub die_ { warn_ @_; exit(2); } # die_ sub cwd { require Cwd; local $_ = Cwd::cwd(); s|\\|/|g if $Is_Win32; $_ .= '/' unless m|/\z|; $_ = ucfirst if m|^\w:/|; return $_; } # cwd sub ee { $@ =~ s/^(.*) at .* line \d+.*\z/$1/s; chomp_ $@; $@ .= "\n"; return $@; } # ee sub load_ini_files { $Data::Dumper::Indent = 1; $Data::Dumper::Sortkeys = 1; $Bin_dir = ($_ = $ENV{PEG_BINDIR}) ? $_ : ($0 =~ /^(.*)[\\\/]/) ? $1 : (( require FindBin), $FindBin::RealBin); $HOME_dir = $ENV{HOME} || $ENV{USERPROFILE} || '/'; for ($Bin_dir, $HOME_dir) { s|\\|/|g if $Is_Win32; $_ .= '/' unless m|/\z|; $_ = ucfirst if m|^\w:/|; } @FileFind_opts = ('preprocess' => sub { sort { lc($a) cmp lc($b) } @_ }); $Last_matches_file = "${HOME_dir}.peg_matches"; $Msg_rs = "\003\003\003\000"; my %orig_ENV = %ENV; unless (@ARGV and $ARGV[0] =~ /^-YY/) { my @f = ($_ = $ENV{PEG_INI}) ? ($_) : ("${Bin_dir}peg_ini.pl", "${HOME_dir}.peg_ini.pl", ".peg_ini.pl"); foreach my $f (@f) { next unless -f $f; eval { require $f }; $@ and die_ "bad ini file $f:\n", @Warnings, ⅇ push @Ini_files, $INC{$f}; } } foreach my $k (keys %ENV) { (exists $Env{$k} and (!exists $orig_ENV{$k} or $ENV{$k} ne $orig_ENV{$k})) and warn_ "ini files set $k in both %ENV and %Env"; $Env{$k} = $ENV{$k}; } $Beep = exists $Env{PEG_BEEP} ? "\a" : ""; $Console_width = $Env{PEG_CONSOLE_WIDTH} || 70; $Slurp_maxsize = $Env{PEG_SLURP_MAXSIZE} || 67_108_864; # 2**26 $Do_globbing = (exists $Env{PEG_GLOB} ? $Env{PEG_GLOB} : $Is_Win32); $Peg_longopt{help} = sub { help($_[0]->[0]) }; } # load_ini_files sub process_ARGV { my (@argv, $init_peg_options, %peg_options); my $options = 1; my $context = 'C'; my $pe_type = ''; if (@ARGV == 1 and $ARGV[0] eq '-V') { die_ "v$VERSION Perl $] $^X\n"; } $Opts{$_} = 0 for ('a'..'z', 'A'..'Z', '#', qw(% = + _ / \ { })); if ($_ = $Env{PEG_OPTIONS}) { if (/[\s\"]/) { # handle quoted arguments while (s/^\s+//, length) { if (/^\"/) { # eg. "a double ""quoted"" string" s/^"((?>[^"]*)(?>(?:""(?>[^"])*)*))"(?>\s|$)// or die_ "bad double quoted string in PEG_OPTIONS: $_"; (my $arg = $1) =~ s/""/"/g; push @Peg_options_ARGV, $arg; } else { s/^(\S+)//; push @Peg_options_ARGV, $1; } } } else { $_ = "-$_" unless /^-/; @Peg_options_ARGV = ($_); } @argv = @Peg_options_ARGV; } push @argv, @ARGV; while (defined ($_ = shift @argv)) { # Keep a copy of %Opts at the end of PEG_OPTIONS. %peg_options = %Opts if (@argv == $#ARGV and !$init_peg_options++); # Firstly, some OPTIONs take an argument. if ($Opts{e}) { if ($Opts{e} == 1) { push @Perlexpr, $_ } else { push @Cmdline_files, $_ } $Opts{e} = 0; } elsif ($Opts{f}) { open(my $fin, "<", $_) or die_ "can't open -f file $_: $!"; my %seen; while (<$fin>) { chomp_ $_; next if $_ eq '' or $seen{$_}++; if ($Opts{f} == 1) { push @Perlexpr, $_ } elsif ($Opts{f} == 2) { push @Cmdline_files, $_ } else { ++$F{$_} } } $Opts{f} = 0; } elsif ($Opts{'m'}) { /^[1-9][0-9]*$/ or die_ "-m expected integer argument: $_"; $Max_matches = $_; $Opt_m = $Opts{'m'}; $Opts{'m'} = 0; } elsif ($Opts{M}) { my $time = $_; my ($num, $fix, $interval); # in days if ($time =~ s/\#(\d+(?:\.\d*)?)([smhdw])?$//) { # INTERVAL ($interval, my $units) = ($1, $2 || 'd'); if ($units eq 's') { $interval /= 24*60*60 } elsif ($units eq 'm') { $interval /= 24*60 } elsif ($units eq 'h') { $interval /= 24 } elsif ($units eq 'w') { $interval *= 7 } } if ($time =~ /^(\d+):(\d*)(:(\d*))?(?:-(\d+))?$/) { # EXACT my $sec_specified = defined $3; my ($hrs, $min, $sec, $days) = ($1, $2 || 0, $4 || 0, $5 || 0); die_ "bad -M time: $time" if ($hrs >= 24 or $min >= 60 or $sec >= 60); my @lt = localtime(); my $now = $lt[0] + 60*$lt[1] + 60*60*$lt[2]; my $given = $sec + 60*$min + 60*60*$hrs; $num = $days + (($now - $given) / (24*60*60)); $num < 0 and warn_ "future -M time: $time$Beep"; $fix = 1 / ($sec_specified ? 24*60*60 : 24*60); } elsif ($time =~ m|^(\d+)/(\d+)(?:/(\d+))?$| or $time =~ m|^()(\d+)-(\d+)-(\d+)$|) { # DATE my ($day, $mon, $yr) = length($1) ? ($1, $2, $3) : ($4, $3, $2); die_ "bad -M date: $time" if ($day > 31 or $mon > 12); $yr = (localtime())[5] unless defined $yr; require Time::Local; my $t = Time::Local::timelocal_nocheck(0,0,0,$day,$mon-1,$yr); $num = (time() - $t) / (24*60*60); $num < 0 and warn_ "future -M date: $time$Beep"; $fix = 1; } elsif ($time =~ /^(\d+(?:\.\d*)?)([smhdtw])?$/) { # OFFSET ($num, my $units) = ($1, $2 || 'd'); $fix = 0; if ($units eq 's') { $num /= 24*60*60 } elsif ($units eq 'm') { $num /= 24*60 } elsif ($units eq 'h') { $num /= 24 } elsif ($units eq 't') { $num -= 1-((1.0+(localtime)[2])/24) } elsif ($units eq 'w') { $num *= 7 } } elsif ($time =~ /^(.+)\@$/) { # FILE my $file = $1; $num = -M $file; die_ "-M no such file: $file" unless defined $num; $fix = 1/(24*60*60); } else { die_ "bad -M argument: $time"; } if (defined $interval) { $MTime_old = [$num + $interval, $fix]; $MTime_new = [$num - $interval, $fix]; } elsif ($Opts{M} > 1) { $MTime_old = [$num, $fix]; } else { $MTime_new = [$num, $fix]; } $Opts{M} = 0; } elsif ($Opts{p}) { my $negated = s/^[!\#](?=[\w\.\,\-]+$)//; my $expr; $expr = $Env{"PEG_P_" . uc($_)} if /^\w+$/; # ALIAS $expr ||= '/' . quotemeta($_) . '$/i' if /^[\w\.\,\-]+$/; # EXTENSION $expr ||= $_; # EXPRESSSION $expr =~ s/^-s\s* "\xEF\xBB\xBF", 'utf16be' => "\xFE\xFF", 'utf16le' => "\xFF\xFE", 'utf32be' => "\x00\x00\xFE\xFF", 'utf32le' => "\xFF\xFE\x00\x00", }->{$Output_encoding} || die_ "BOM unknown for -} encoding: $Output_encoding"; $Opts{'}'} = 0; } # Named long options. elsif ($options and /^--?([a-zA-Z-]{3,})$/ and exists $Peg_longopt{$1}) { my $opt = $1; eval { $Peg_longopt{$opt}->(\@argv, \@Cmdline_files) }; $@ and die_ "--$opt: ", ⅇ } # Now check for an OPTION argument. elsif ($options && s/^-(?=.)//) { while (s/^(.)//) { my $opt = $1; if ($opt =~ /^[abcdefhiklmnopqrstvwxyzABCDEFGHIJKLMNOPRSTUVWXZ_=\+\#\/\{\}\\]$/) { # Available: gjuQ # Options set in PEG_OPTIONS do not count towards overloading. if ($peg_options{$opt}) { delete $peg_options{$opt}; $Opts{$opt} = 1; } else { ++$Opts{$opt}; } $context = $opt if ($opt =~ /^[ABC]$/); $pe_type = $opt if ($opt =~ /^[koO]$/); } elsif ($opt =~ /^\d$/) { while (s/^(\d)//) { $opt = (10 * $opt) + $1 } if ($Opts{'m'}) { $Max_matches = $opt; $Opt_m = $Opts{'m'}; $Opts{'m'} = 0; } else { $After = $opt if ($context ne 'B'); $Before = $opt if ($context ne 'A'); $Implicit_C = 1; } } elsif ($opt eq '-') { $options = undef } elsif ($opt eq 'Y') { if (s/^,(.*)$//) { foreach my $o (split //, $1) { $Opts{$o} = 0; $o eq 'C' and $Implicit_C = undef; $o eq 'm' and $Opt_m = undef; $o eq 'M' and $MTime_new = $MTime_old = undef; $o eq 'p' and $Opt_p_expr = $Opt_pp_expr = undef; $o eq 'z' and $Context_matcher = $Context_matcher2 = undef; $o eq '/' and $Input_record_separator = undef; $o eq '{' and $Input_encoding = undef; $o eq '}' and $Output_encoding = undef; $o eq 'P' and $Code_before_close = $Code_before_open = $Code_after_open = $Code_at_end = $Code_per_line = undef; } } else { $Opts{$_} = 0 for keys %Opts; $Code_after_open = $Code_at_end = $Code_per_line = undef; $Code_before_close = $Code_before_open = undef; $Context_matcher = $Context_matcher2 = undef; $Input_encoding = $Output_encoding = undef; $Implicit_C = $Input_record_separator = undef; $Opt_m = $Opt_p_expr = $Opt_pp_expr = undef; $MTime_new = $MTime_old = undef; %F = (); # Leave @Perlexpr, @Cmdline_files } } elsif ($opt eq '%') { require Time::HiRes; $Start_time ||= Time::HiRes::time(); ++$Opts{'%'}; } else { die_ "unknown option -- $opt\n$Usage"; } } } # Typically, first non OPTION argument is the PERLEXPR. elsif (!(@Perlexpr or @Perlexpr_k) or ($options and $pe_type ne '')) { if ($pe_type eq 'k') { push @Perlexpr_k, $_ } else { push @Perlexpr, $_ } } # Arguments which are neither OPTION nor PERLEXPR are FILEs. else { push @Cmdline_files, $_; } } if ($Opts{X} > 1) { while () { chomp_ $_; next if $_ eq ''; if ($pe_type eq 'k') { push @Perlexpr_k, $_ } else { push @Perlexpr, $_ } } $Opts{X} %= 2; } foreach my $opt (qw(e f m M p P z / { })) { die_ "option requires an argument -- $opt" if $Opts{$opt}; } die $Usage unless (@Perlexpr or @Perlexpr_k or $Opts{'='}); } # process_ARGV sub last_matches { my $return_fullpaths = shift; open(my $fin, "<", $Last_matches_file) or die_ "can't open $Last_matches_file: $!"; my $cwd = cwd(); my $drive = ($cwd =~ m|^(\w:)/| ? uc($1) : ''); my (@matches, %seen); while (<$fin>) { chomp_ $_; s/^\Q$cwd//o or ($drive and s/^\Q$drive//o) unless $return_fullpaths; push @matches, $_ unless $seen{$_}++; } return @matches; } # last_matches sub save_matches { return if ($Opt_yy or !@Matched_files or $Search_STDIN); open(my $fout, ">", $Last_matches_file) or (warn_ "can't write to $Last_matches_file: $!"), return; my $cwd = cwd(); my $drive = ($cwd =~ m|^(\w:)/| ? uc($1) : ''); foreach my $f (@Matched_files) { $f =~ s|\\|/|g if $Is_Win32; if ($Is_Win32 and $f =~ m|^\w:/|) { $f = ucfirst($f) } elsif ($f =~ m|^//|) {} # UNC elsif ($f =~ m|^/|) { $f = "$drive$f" } else { $f = "$cwd$f" } print $fout $f, "\n"; } close $fout or warn_ "can't close $Last_matches_file: $!"; } # save_matches sub process_options1 { $Opt_s = ($Opts{'s'} == 1); $Opt_ss = $Opts{'s'}; $Verbose = $Opts{V}; $STDIN_is_terminal = -t STDIN; $STDOUT_is_terminal = -t STDOUT; if (!$STDOUT_is_terminal or $Opts{R}) { $Opts{'#'} = 0 unless $Opts{'#'} > 1; } if ($Output_encoding and $Opts{'#'}) { warn_ "-} prevents colored output" unless $Opt_s; $Opts{'#'} = 0; } if ($Is_Win32 and ($STDOUT_is_terminal or $Opts{'#'}) and !$Output_encoding) { # This is needed to properly handle >127 chars in the correct codepage. eval { require Win32::Console::ANSI; }; if ($@) { $Opts{'#'} and die_ "can't color output:\n", ⅇ unless (exists $Env{PEG_NO_WIN32_CONSOLE_ANSI}) { warn_ "failed to load Win32::Console::ANSI" unless $Opt_s; } } } my %types = qw(f filename c colon l lineno b offset n nonmatch m match z z_context y z_context2); if ($Opts{'#'}) { require Term::ANSIColor; # Default coloring mimics GNU grep 2.5.3's --color. my $peg_color = $Env{PEG_COLOR} || 'b=g,c=c,f=m,l=g,m=dr,z=c'; $Col_Reset = Term::ANSIColor::color('reset'); $Col{$_} = $Col_Reset for values %types; $peg_color =~ s/\s+//g; foreach my $specifier (split /,/, lc $peg_color) { eval { $specifier =~ /^(\w)=(.+)$/ or die; my ($t, $col_def) = ($1, $2); my $type = $types{$t} or die; $Col{$type} = get_col($col_def); }; $@ and die_ "bad specifier '$specifier' in PEG_COLOR: $peg_color"; } } else { $Col{$_} = '' for values %types; $Col_Reset = ''; } if ($Opts{'='}) { my @files = last_matches($Opts{H}); warn_ scalar(@files), " files matched" unless $Opt_s; my $sort = $Opts{t} + ($Opts{l} ? $Opts{l} - 1 : 0); # -=ll := -=lt my $long = $Opts{l} && !$Opts{h}; # -=llh := -=t my ($filtered, $index, @matches, $mtime, $size); foreach my $file (@files) { ++$index; if ($Opt_p_expr) { $_ = $File = $file; unless (eval_ $Opt_p_expr) { $@ and warn_ "-p error: $file: ", ⅇ ++$filtered; next; } } $file =~ s|/|\\|g if $Opts{"\\"}; if ($long or $sort) { my @s = stat($file) or ($Opt_s || warn_ "can't stat $file: $!"), next; ($size, $mtime) = ($s[7], $s[9]); } push @matches, [$mtime, $size, $file, $index]; } $filtered and warn_ "$filtered files filtered by -p" unless $Opt_s; $sort and @matches = sort {( $sort == 1 ? $a->[0] <=> $b->[0] : $sort == 2 ? $b->[0] <=> $a->[0] : $sort == 3 ? $a->[1] <=> $b->[1] : $sort == 4 ? $b->[1] <=> $a->[1] : 0) || $a->[2] cmp $b->[2]; } @matches; foreach my $m (@matches) { if ($long) { my @t = localtime $m->[0]; my $day = qw(Sun Mon Tue Wed Thu Fri Sat)[$t[6]]; my $mon = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)[$t[4]]; printf "%04d %s %2d %s %2d:%02d:%02d %9d ", 1900+$t[5], $mon, $t[3], $day, $t[2], $t[1], $t[0], $m->[1]; } printf "%s%-3d%s ", $Col{lineno}, $m->[3], $Col_Reset if $Opts{n}; print $Col{filename}, $m->[2], $Col_Reset, "\n"; last if (defined $Max_matches and --$Max_matches <= 0); } exit; } # -E overrides -F & -G, and -F overrides -G. if ($Opts{E}) { $Opts{F} = $Opts{G} = 0; } elsif ($Opts{F}) { # Use -FF in PEG_OPTIONS to make it be overrideable by -G on the command line. if ($Opts{F} == 1) { $Opts{G} = 0; } elsif ($Opts{G}) { $Opts{F} = 0; } } if ($Context_matcher2 and !$Context_matcher) { $Context_matcher = $Context_matcher2; $Context_matcher2 = undef; } $Opts{k} = 0 if !@Perlexpr_k; $Print_context_matcher = $Context_matcher; # In order of precedence. if ($Opts{k}) { $Print_context_matcher = $Implicit_C = $Opt_m = undef; $Opts{$_} = 0 for qw(b c h l o A B C J L N O W Z); } if ($Opts{O}) { $Print_context_matcher = $Implicit_C = $Opt_m = undef; $Opts{$_} = 0 for qw(b c h l o A B C J L N W Z); } if ($Opts{Z}) { $Print_context_matcher = $Implicit_C = $Opt_m = undef; $Opts{$_} = 0 for qw(b c h l o A B C L W); } if ($Opts{L}) { $Print_context_matcher = $Implicit_C = $Opt_m = undef; $Opts{$_} = 0 for qw(b c h l o A B C J W); } # GNU grep has -l override -c; peg works the other way around. if ($Opts{c}) { $Print_context_matcher = $Implicit_C = undef; $Opts{$_} = 0 for qw(b l h o A B C J W); } if ($Opts{l}) { $Print_context_matcher = $Implicit_C = $Opt_m = undef; $Opts{$_} = 0 for qw(b h o A B C J W); } if ($Opts{o} > 1) { $Opt_oo = 1; } $Opt_y = ($Opts{'y'} % 2); $Opt_yy = ($Opts{'y'} > 1); $Opts{w} = 0 if $Opts{x}; $Opt_pp_expr = undef unless $Opts{S}; } # process_options1 sub get_col { my $col_def = shift; my %col = qw(r red g green y yellow b blue m magenta c cyan w white k black); $col_def =~ /^(d?)(\w)(o(d?)(\w))?$/ or die; my ($d, $c, $od, $oc) = ($1, $2, $4, $5); die unless (exists $col{$c} and (!$oc or exists $col{$oc})); return '' unless $Opts{'#'}; my $col = $col{$c}; $col = "bold $col" if $d; $col = "$col on_$col{$oc}" if $oc; $col = "underline $col" if $od; return Term::ANSIColor::color("reset $col"); } # get_col sub build_Perlexpr { my ($simple, $warned); my $iwx = ($Opts{i} || $Opts{w} || $Opts{x}); # if $simple then -F else -G # If the PERLEXPR is simple enough, then it is faster to read # the file in one go and perform the match on a single line. $Slurp = 1 unless ($Opts{v} or $Opts{x} or $Opts{E} or $Opts{S} or $Opt_oo or $Code_per_line); # Can we guarantee that after a true PERLEXPR, that ("$`$&$'" eq $_)? $Simple_Perlexpr = !($Opts{v} || $Opts{E} || $Opts{W}); for (@Perlexpr, @Perlexpr_k) { next if $Opts{E}; $simple = /^[\w\s\-\.\,\'\:\;\#]*$/; # Non simple implies -E unless one of -[FGiwx]. if ($simple or $Opts{F}) {} elsif ($iwx or $Opts{G}) { # Beware slurping causing false matches across newlines cf. peg -lG "foo[^x]+bar" $Slurp = undef if /\[\^/; } else { $Slurp = $Simple_Perlexpr = undef; next; # implicit -E } # Beware accidental pattern option eg. peg -i /foo/ bar if (!$simple and $iwx and !($Opts{G} or $Opts{F}) and !$warned++) { warn_ "interpreting as pattern: $_$Beep" unless $Opt_s; } if ($Opts{F} or ($simple and !$Opts{G})) { $_ = quotemeta($_); } else { s|/|\\/|g; # cf. "peg -G '^/' f" vs "peg -F '^/' f" # Must not slurp if PERLEXPR matches real line ends: $Slurp = undef if (/(?:^|[^\\\[])\^/ or /[^\\]\$(?:\W|$)/ or /\\[azZ]/); } $_ = '\b(?:' . $_ . ')\b' if $Opts{w}; # cf. peg -w "a|b" $_ = '^(?:' . $_ . ')$' if $Opts{x}; $_ = '/' . $_ . '/'; $_ .= 'i' if ($Opts{i} and ($Opts{i} == 1 or ($simple and $_ eq lc $_))); } if ($Opts{k}) { $Perlexpr = join ",\n\t", map({"((" . $Perlexpr_k[$_] . ")\t && (\$Match_failed = 1, last))"} (0..$#Perlexpr_k)), map({"(\$Match$_ ||= (" . $Perlexpr[$_] . "))"} (0..$#Perlexpr)); } elsif ($Opts{O}) { $Perlexpr = join ",\n\t", map({"(\$Match$_ ||= (" . $Perlexpr[$_] . "))"} (0..$#Perlexpr)), ('(' . join(' && ', map {"\$Match$_"} (0 .. $#Perlexpr)) . ')'); } elsif ($Opt_oo) { $Perlexpr = join ",\n\t", map({"((" . $Perlexpr_k[$_] . ")\t and \$Match_failed = 1, last)"} (0..$#Perlexpr_k)), map({"((" . $Perlexpr[$_] . ")\t and \$Line_matched = \$Match$_ = 1)"} (0..$#Perlexpr)), "\$Line_matched"; } elsif (@Perlexpr > 1) { $Perlexpr = join "\n\t|| ", map {"($_)"} @Perlexpr; } else { $Perlexpr = $Perlexpr[0]; } $Perlexpr = 'not (' . $Perlexpr . ')' if $Opts{v}; # Check the PERLEXPR is valid Perl code. eval_ "if (0 and ($Perlexpr)) {}"; if ($@) { my $ee = join '', @Warnings, ⅇ if ((@Perlexpr + @Perlexpr_k) > 1) { # Determine first bad expression. foreach my $pe (@Perlexpr, @Perlexpr_k) { @Warnings = (); eval_ "if (0 and ($pe)) {}"; $@ and die_ "error in the Perl expression: $pe\n", @Warnings, ⅇ } } die_ "error in Perl expression: $Perlexpr\n$ee"; } } # build_Perlexpr sub process_options2 { $Opt_m ||= 0; # undef -> 0 # Do we need to convert CRLFs to newlines? $Opts{N} = 1 if defined $Input_record_separator or $Opts{a} > 1; if ($Is_Win32) { if ($Opts{N} or $Input_encoding) { $CRLF_to_newline = 1; } else { foreach my $code ($Perlexpr, $Code_per_line) { next unless defined $code; # Does the PERLEXPR appear to refer to *newlines*? ie. not \z. if ($code =~ /\$\/|\\n|\bchomp\b/ and $code !~ /\# PEG_NEWLINE_NEUTRAL/) { $CRLF_to_newline = 1; last; } } } # Do we need a ":crlf" layer on the output? if ($Opts{Z} or ($CRLF_to_newline and !($Opts{k} or $Opts{l} or $Opts{L} or $Opts{O}))) { $Needs_crlf_layer = 1; } } if ($Is_Win32 and !$Needs_crlf_layer) { $Newline = "\015\012"; $Newline_literal = "\\015\\012"; } else { $Newline = "\n"; $Newline_literal = "\\n"; } foreach my $m ($Context_matcher, $Context_matcher2) { next unless defined $m; eval_ "if (0 and ($m)) {}"; $@ and die_ "bad -z context matcher: $m\n", ⅇ } foreach my $code ($Code_after_open, $Code_at_end, $Code_before_close, $Code_before_open, $Code_per_line) { next unless defined $code; $code =~ s/\bRETURN\b/ "; print \"$Col{filename}\$File$Col_Reset$Newline_literal\"; push \@Matched_files, \$File; return;" /eg; eval_ "if (0) { $code }"; $@ and die_ "bad -P code: $code\n", ⅇ } $Opts{K} = 0 if $Input_encoding; if ($Opts{K} and $Guess_encoding = $Env{PEG_GUESS_ENCODING}) { require Encode::Guess; eval { Encode::Guess->set_suspects(split /\s+/, $Guess_encoding) }; $@ and die_ "bad PEG_GUESS_ENCODING: $Guess_encoding:\n", ⅇ $Encode::Guess::NoUTFAutoGuess = 1; } $Opts{C} = 1 if ($Implicit_C and !($Opts{A} or $Opts{B})); $Opts{A} = $Opts{B} = 1 if $Opts{C}; # If '-+' specified, then ignore the peg result files unless they are # explicitly named on the command line eg. "*/*/peg*" if ($Opts{'+'}) { foreach my $file (@Cmdline_files) { if ($file =~ /\bpeg/) { $Opts{'+'} = 0; last if $Do_globbing; } elsif (!$Do_globbing) { $Opts{'+'} = 1; last; } } $Opts{'+'} = 0 if ($_ = $Opt_p_expr and /peg/); } if (@Cmdline_files) { # The single filename "-" indicates to read STDIN. if (@Cmdline_files == 1 and $Cmdline_files[0] eq '-') { @Cmdline_files = (); } } elsif ($STDIN_is_terminal and !($Opts{r} or $Opts{X} or $Opt_y)) { $Opts{r} = 1; } my ($glob_failed, $found_globbed_file); if ($Do_globbing) { my $dosglob = $Is_Win32 ? !$Env{PEG_USE_BSDGLOB} : $Env{PEG_USE_DOSGLOB}; my ($glob, @f, @glob_results); foreach my $f (@Cmdline_files) { if ($f =~ /\*/ or ($f =~ /\?/ and !($Is_Win32 and $f =~ /^[\\\/]{2}\?[^\?]+\z/)) or (!$dosglob and $f =~ /^~|\[.*\]|\{.*\}/ and not -e $f)) { my $f_orig = $f; $f =~ s|(\*\*+)| join '/', split //, $1 |eg; # **c -> */*c $glob ||= do { if ($dosglob) { require File::DosGlob; sub { my $pat = $_[0]; if ($pat =~ /\s/) { $pat =~ s|\\|\\\\|g; $pat =~ s|([\s'])|\\$1|g; } return File::DosGlob::glob($pat); }; } else { require File::Glob; sub { return File::Glob::bsd_glob($_[0]); }; } }; if (@glob_results = $glob->($f)) { if ($Is_Win32 and $f =~ /^\w:[^\\\/]/) { # Fix drive relative pathnames. # The pattern "D:*c" produces "D:./foo.c". Convert to "D:foo.c". for (@glob_results) { s|^(\w:)\./|$1| } } if ($f =~ /^(?:.*[\\\/])?\*\z/) { # A non specific glob eg. "src/*". foreach my $gf (@glob_results) { $Globbed{$gf} = 1 } } push @f, @glob_results; $found_globbed_file = 1; } else { warn_ "glob failed to match any files for: $f_orig" unless $Opt_s; $glob_failed = 1; } } else { push @f, $f; } } @Cmdline_files = @f; } if ($Opts{d}) { my @files; foreach my $f (@Cmdline_files) { if (-d $f) { push @Cmdline_dirs, $f; } else { push @files, $f; } } if ($Opts{d} > 1 and @files) { @Cmdline_dirs = (); } elsif (@Cmdline_dirs) { @Cmdline_files = @files; $Opt_d = 1; } } if ($Opt_y) { push @Cmdline_files, last_matches(); } unless (@Cmdline_files or $Opts{r} or $Opt_d or $Opts{X} or $Opt_y) { die_ "no files found" if $glob_failed; $Search_STDIN = 1; $Opts{a} ||= 1; $Opts{I} = 0; $Opts{J} = 0; $Opts{_} = 0; $Opt_p_expr = undef; $Slurp = undef; if ($Opts{K}) { warn_ "-K does not work on STDIN" unless $Opt_s; $Opts{K} = 0; } } $Slurp = undef if $Input_record_separator; $Opts{_} = 0 unless ($Opts{R} and $STDOUT_is_terminal and !$Opt_s); $Opts{_} = 0 if (@Cmdline_files == 1 and !($Opts{r} or $Opt_d or $Opt_y or $Opts{S} or $Opts{X})); $Opts{_} = 0 if ($Opts{X} and $STDIN_is_terminal); # cf. "find . | peg -XR_ foo" vs "peg -XR_ foo" if ($Opts{J} >= 2) { my $JJ_mode = 'sss'; # s=separated (gap); c=compact (no gap); h=header; d=disabled. if ($Opts{J} > 2) { $JJ_mode = 'sds'; if ($_ = $Env{PEG_JJ_MODE}) { /^[cdhs]{3}$/ or die_ "bad PEG_JJ_MODE: $_"; $JJ_mode = $_; } } $JJ_mode =~ /^(.)(.)(.)$/; # 1=terminal, 2=!terminal, 3=-R. $JJ_mode = $Opts{R} ? $3 : $STDOUT_is_terminal ? $1 : $2; if ($JJ_mode eq 'd') { $Opts{J} = 0; } elsif ($JJ_mode eq 's') { $JJ_gap = 1; } elsif ($JJ_mode eq 'h') { $Opts{J} = 1; } } if ($Opts{H}) { $Opts{h} = 0; $Opts{J} = 0 if $Opts{J} >= 2; } elsif (@Cmdline_files <= 1 and !($Opt_d or $Opts{r} or $Opts{S} or $Opt_y or $Opts{X} or $found_globbed_file)) { $Opts{h} = 1; $Opts{J} = 0 if $Opts{J} >= 2; } elsif ($Opts{J} >= 2 and $Opts{Z}) { $Opts{J} = 0; } elsif ($Opts{J}) { $Opts{h} = 1; } if ($Opts{I} == 3) { # -III := -I, but -a overrides it. $Opts{I} = $Opts{a} ? 0 : 1; } elsif ($Opts{a} and $Opts{I} == 1) { warn_ "possible conflict between -a and -I$Beep" unless $Opt_s; } if ($Opts{S}) { die_ "-S needs a %Peg_S" unless %Peg_S; my (@archive_exts, @non_archive_exts); while (my ($ext, $code) = each %Peg_S) { die_ "uppercase extension: $ext" if ($ext ne lc $ext); die_ "\$Peg_S{'$ext'} is not a valid CODE ref" unless ref $Peg_S{$ext} eq 'CODE' and defined &{$Peg_S{$ext}}; next if $ext eq '*'; if ($ext =~ s/^\*//) { push @archive_exts, $ext; $Peg_S{$ext} = $code; delete $Peg_S{"*$ext"}; } else { push @non_archive_exts, $ext; } } my $gen_re = sub { return unless @_; return "\\.(?i)(" . (join '|', map quotemeta, sort { length($b) <=> length($a) || $a cmp $b } @_) . ")\\z"; }; if ($Opt_pp_expr) { $Opt_pp_code = "sub pp {\n return 1 unless \@_;\n local \$_ = shift;\n"; $Opt_pp_code .= ' warn_ "V: in pp($_)";' . "\n" if $Verbose; $Opt_pp_code .= " return (($Opt_pp_expr)"; if (my $archive_re = $gen_re->(@archive_exts)) { $Opt_pp_code .= " or /$archive_re/" if $Opts{S} == 1; # -SS := no recurse } $Opt_pp_code .= ");\n}"; } else { $Opt_pp_code = 'sub pp { @_ ? 1 : 0 }'; } eval_ $Opt_pp_code; $@ and die_ "bad -pp code: $Opt_pp_code\n", ⅇ $S_handler_re = $gen_re->(@archive_exts, @non_archive_exts); $S_nonarchive_re = $gen_re->(@non_archive_exts); warn_ "-S cannot guess input encoding" if $Opts{K}; } if ($Opts{t}) { if ($Opts{r} or $Opt_d) { require File::Find; my @dirs = (($Opts{r} ? '.' : ()), @Cmdline_dirs); eval { File::Find::find({ @FileFind_opts, 'wanted' => sub { push @Cmdline_files, $File::Find::name if -f; }}, @dirs); }; $@ and die_ "File::Find::find failed: ", ⅇ $Opts{r} = $Opt_d = 0; } # Sort command line files according to their last modification time. # Always do non existent files last. my $new_first = ($Opts{t} % 2); my $mt; @Cmdline_files = map {$_->[0]} sort { ($new_first ? ($a->[1] <=> $b->[1]) : ($b->[1] <=> $a->[1])) || ($a->[0] cmp $b->[0]) } map { [$_, defined ($mt = -M $_) ? $mt : ($new_first ? 9e9 : -9e9)] } @Cmdline_files; } # XXX On Win32, Open3 does not like a redirected STDOUT. if ($Opts{r} and $STDOUT_is_terminal and $Opt_r_cmd = $Env{PEG_R_CMD}) { # XXX On Win32, use of fork() from a do'd script crashes on exit. if ($_ = $Env{PEG_R_FORK} and $Opts{r} == 1 and !($Called or $Opt_m == 2 or ($Is_Win32 and ($Opts{i} or $Perlexpr =~ m|/.*/i|)) # XXX or $Opts{c} == 2 or (($Opts{l} or $Opts{O}) and $Opts{n}) or $Opts{q} or $Opts{R} or $Opts{S} or $Opts{Z} or (grep { defined and /\# PEG_NO_FORK/ } $Code_after_open, $Code_at_end, $Code_before_close, $Code_before_open, $Code_per_line, $Output_encoding))) { /^(\d),(\d{1,2})$/ and ($Worker_count, $Worker_work) = ($1, $2); $Slurp_maxsize = int($Slurp_maxsize / $Worker_count); $Opt_r_fork = 1; } $Opts{r} = 0; } if ($Opts{K}) { # An ASCII text lookup table: 9=tab, 10=LF, 13=CR, 32-126=isprint @Is_ascii_text = (undef) x 256; $Is_ascii_text[$_] = 1 for (9, 10, 13, 32..126); } if ($Opt_oo) { open($Buffer_fh, "+>", \$Buffer_contents) or die_ "can't open: $!"; binmode($Buffer_fh, $Input_encoding ? ":utf8" : ":raw") or die_ "binmode failed: $!"; } } # process_options2 sub help { my $opt = shift; if (defined $opt) { $opt =~ s/^-?(.).*/$1/; $opt = 'A' if $opt =~ /^[BC\d]$/; my @out; while () { if (/^=item\s+B<-\Q$opt/) { push @out, help_line($_, 0); last; } } die_ "no such option '$opt'" unless @out; my $over = 0; while () { if (/^=over/) { ++$over } elsif (/^=back/) { last unless $over-- > 0 } else { last if (/^=/ and !$over); push @out, help_line($_, $over); } } # Strip consecutive blank lines. print "\n"; my ($is_empty, $last_empty); while (defined ($_ = shift @out)) { $is_empty = /^\s*$/; print unless ($is_empty and ($last_empty or !grep /\S/, @out)); $last_empty = $is_empty; } print "\n"; } elsif (-t STDOUT) { system qq(perldoc "$0"); } else { print qx(perldoc "$0"); } exit; } # help sub help_line { my ($line, $over) = @_; return '' if ($line =~ /^=(over|back)/); my $title = ($line =~ s/^=item\s+//) ? 1 : 0; if ($line =~ /^\S/) { # NB. indented POD is verbatim $line =~ s/\bB<(.+?)>/"$1"/g; # bold $line =~ s/\bI<(.+?)>/*$1*/g; # italic $line =~ s/\bC<([\$\%\@]\S*?)>/$1/g; # code1 $line =~ s/\bC<(\S+?)>/"$1"/g; # code2 $line =~ s/\bC<< (.+?) >>/``$1''/g; # code3a $line =~ s/\bC<(.+?)>/``$1''/g; # code3b $line =~ s/\bL<(.+?)\/(.+)>/$2 in the $1 manpage/g; # link $line =~ s/\b\w<(.+?)>/$1/g; # other } my $indent = ' ' x (2 - $title + 2*$over); my @lines = ("$indent$line"); if ($title and $line !~ /^\*$/) { push @lines, ($indent . ("=" x (length($line) - 1)) . "\n"); } return @lines; } # help_line sub show_debug { my $verbose = $Opts{D} > 1; my $i; if ($verbose) { print "# peg v$VERSION $0\n\n"; print "# Perl version $] $^X\n\n"; print "# cwd => @{[ cwd() ]}\n\n"; } if (@Ini_files) { print "# Ini files =>\n"; foreach my $ini_file (@Ini_files) { print "\t$ini_file\n"; next unless $verbose; open(my $fin, "<", $ini_file) or (print "open failed: $!\n"), next; print " $.:\t$_" while (<$fin>); print "\n"; } print "\n"; } else { print "# No ini files\n\n"; } if ($verbose) { my @env; if (@env = grep { /^PEG_/ and !exists $ENV{$_} } keys %Env) { print "# Env =>\n"; printf "\t%-12s = %s\n", $_, $Env{$_} for sort @env; print "\n"; } if (@env = grep /^PEG_/, keys %ENV) { print "# ENV =>\n"; printf "\t%-12s = %s\n", $_, $ENV{$_} for sort @env; print "\n"; } print "# HOME directory => $HOME_dir\n\n"; print "# Bin directory => $Bin_dir\n\n"; print "# STDIN is not a terminal\n\n" unless $STDIN_is_terminal; print "# STDOUT is not a terminal\n\n" unless $STDOUT_is_terminal; if ($Opts{'#'}) { print "# Colors =>\n"; printf "\t%-12s %s<#>$Col_Reset\n", $_, $Col{$_} for sort keys %Col; print "\n"; } print "# \%INC =>\n"; printf "\t%-24s = %s\n", $_, $INC{$_} for sort keys %INC; print "\n"; print "# keys %Peg_S =>\n", map({"\t$_\n"} sort keys %Peg_S), "\n" if keys %Peg_S; my @longopts = sort grep !/^help$/, keys %Peg_longopt; print "# keys %Peg_longopt =>\n", map({"\t$_\n"} @longopts), "\n" if @longopts; } if (@Peg_options_ARGV) { $i = 0; print "# PEG_OPTIONS =>\n"; print "\t", ++$i, ": ", $_, "\n" for @Peg_options_ARGV; print "\n"; } $i = 0; print "# ARGV =>\n", map({("\t", ++$i, ": ", $_, "\n")} @ARGV), "\n"; print "# Enabled options => "; print join '', map {$_ x $Opts{$_}} sort grep $Opts{$_}, keys %Opts; print "\n\n"; print "# Reading from STDIN\n\n" if $Search_STDIN; print "# PEG_R_CMD => $Opt_r_cmd\n\n" if $Opt_r_cmd; print "# PEG_R_FORK => $Worker_count x $Worker_work\n\n" if $Opt_r_fork; print "# -pp code =>\n\n$Opt_pp_code\n\n" if $Opt_pp_code; if (@Cmdline_files) { print "# Command line files (@{[ scalar @Cmdline_files ]}) =>\n"; print map {"\t$_\n"} @Cmdline_files if (@Cmdline_files < 10 or $verbose); print "\n"; } if (@Cmdline_dirs) { print "# Command line directories (@{[ scalar @Cmdline_dirs ]}) =>\n"; print map {"\t$_\n"} @Cmdline_dirs if (@Cmdline_dirs < 10 or $verbose); print "\n"; } foreach my $v (qw(MTime_new MTime_old)) { my $t = eval "\$$v" or next; printf "# %s => %-24s %s\n\n", $v, $t, (localtime($^T - 24*60*60*$t) || '?'); } print "# Internal Perl code =>\n$Search\n"; print "# Warnings =>\n", @Warnings, $Beep, "\n" if @Warnings; exit; } # show_debug sub near { @_ == 1 or @_ == 2 or die "usage: near(PATTERN|SUB ?,RANGE?)\n"; my $arg = shift; my $arg_is_sub = (ref $arg eq 'CODE'); my $N = 10; my $start = 0; if (@_) { my $range = shift; $range =~ /^(!+)?(\d+|\*)$/ or die "bad RANGE argument to near(): $range\n"; $start = 1 if $1; $N = ($2 eq '*') ? @P : $2; } $N = @P if $N > @P; my ($line, $matched); eval { for (my $i = $start; $i <= $N; ++$i) { $line = ($i == 0) ? $_ : $P[-$i]; # NB. $_ is the current line if ($arg_is_sub) { local $_ = $line; $matched = $arg->(); } else { $matched = ($line =~ /$arg/); } last if $matched; } }; $@ and die "error in near():\n", ⅇ return $matched ? 1 : 0; } # near sub nearq { my $str = shift; die "nearq: expected string: $str\n" if ref $str or !length $str; return near(quotemeta($str), @_); } # nearq sub nearx { my $str = shift; my $range = @_ ? "!$_[0]" : "!10"; die "nearx: expected string: $str\n" if ref $str or !length $str; return near(quotemeta($str), $range); } # nearx { my %regexps; sub nearby { my $N = 10; if (@_ and ref($_[0]) eq 'SCALAR') { $N = ${ +shift } } @_ >= 2 or die "usage: nearby(?\\N,? PAT1, PAT2 ...)\n"; my ($i, $j, $regexp, @regexps); foreach my $pat (@_) { eval { push @regexps, ($regexps{$pat} ||= qr/$pat/) }; $@ and die "error in nearby pattern: $pat\n", ⅇ } my $match_idx = -1; for ($i = 0; $i < @regexps; ++$i) { $regexp = $regexps[$i]; if ($_ =~ /$regexp/) { # NB. $_ is the current line return 1 if ($match_idx != -1); $match_idx = $i; } } return 0 if $match_idx == -1; $N = @P if $N > @P; for ($i = 0; $i < @regexps; ++$i) { next if $i == $match_idx; $regexp = $regexps[$i]; for ($j = 1; $j <= $N; ++$j) { return 1 if ($P[-$j] =~ /$regexp/); } } return 0; } # nearby } sub colorall { my $pattern = shift; die "usage: colorall(PATTERN ?,COLOR_DEFINITION?)" unless length $pattern; my $match_col = $Col{match}; if (@_) { my $col_def = shift; unless (exists $Col{$col_def}) { eval { $Col{$col_def} = get_col($col_def) }; $@ and die "colorall: bad color '$col_def'\n"; } $match_col = $Col{$col_def}; } my $matches = 0 + eval { s/($pattern)/ $match_col . $1 . $Col{nonmatch} /eg; }; $@ and die "error in colorall:\n", ⅇ return $matches; } # colorall sub Z_display { my $file = shift; unless (defined $Z) { warn_ +(defined $file ? "$file: " : ()), "\$Z is not defined" unless $Opt_s; return; } my $file_colon = ''; if (defined $file) { if ($Opts{J}) { print header($file); } else { $file_colon = $Col{filename} . $file . $Col{colon} . ':' . $Col_Reset; } } if ($Opts{Z} >= 3 and $Opts{Z} <= 4) { require Data::Dumper; print $file_colon, "\n" if $file_colon; print Data::Dumper->Dump([$Z], ['Z']), "\n"; } elsif (ref($Z) eq 'HASH') { my $numeric_cmp = 1; foreach my $v (values %$Z) { unless (defined $v and $v =~ /^[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$/) { $numeric_cmp = 0; last; } } # -ZZZZZ := -Z but do not numerically sort the keys. my @keys = ($numeric_cmp and $Opts{Z} <= 2) ? (sort { $Z->{$b} <=> $Z->{$a} || $a cmp $b } keys %$Z) : (sort keys %$Z); my $sep = ($Opts{T} ? "\t=> " : " => "); print $file_colon, "\n" if $file_colon; foreach my $key (@keys) { my $v = $Z->{$key}; chomp_ $key; chomp_ $v; print $key, (defined $v ? "$sep$v" : ()), "\n"; } } elsif (ref($Z) eq 'ARRAY') { print $file_colon, "\n" if $file_colon; foreach my $v (@$Z) { chomp_ $v; print $v, "\n"; } } else { chomp_ $Z; print $file_colon, $Z, "\n"; } } # Z_display sub build_search { my ($gap, $nonmatch_print, $output, $print); my $context = ($Opts{A} || $Opts{B}); if ($Opts{c} or $Opts{k} or $Opts{L} or $Opts{Z}) { $output = undef; } elsif ($Opts{l} or $Opts{O}) { $output = '"'; $output .= "$Col{lineno}\@{[scalar \@Matched_files]}$Col_Reset\\t" if $Opts{n}; $output .= '$File' . ($Opts{l} > 1 ? '\0' : $Newline_literal) . '"'; } else { $output = ''; $output = "\$Offset:" if $Opts{b}; $output = "\$.:$output" if $Opts{n}; $output = "\$File:$output" if !$Opts{h}; $output = "\"$output\$_\"" if $output; } if (defined $output) { $print = 'print' . ($output ? " $output" : '') . ';'; $print .= ' last;' if ($Opts{l} or $Opts{O}); } if ($context) { $output ||= '$_'; $gap = ($Opts{A} ? $After : 0) + ($Opts{B} ? $Before : 0); ($nonmatch_print = $print) =~ s/:/-/g; $output =~ s/:/-/g; $Perlexpr = "(\$Matches < $Max_matches) && ($Perlexpr)" if $Opt_m; } if ($Opts{T}) { my $sep = $Col_Reset . "\\t"; for ($output, $print, $nonmatch_print) { defined and s/^(.*[:\-])/$1$sep/; } } # Are any of the AUTOVARS used? my ($needs_reset, $uses_C, $uses_F, $uses_Filepath, $uses_P, $uses_S); foreach my $code ($Perlexpr, $Code_per_line) { next unless defined $code; $uses_C ||= ($code =~ /\$C\b/); $uses_F ||= ($code =~ /\$F\[|\@F\b/); $uses_P ||= ($code =~ /\bnear(?:by|q|x)?\(|\$P\[|\@P\b/); } foreach my $code ($Perlexpr, $Code_per_line, $Code_before_close) { next unless defined $code; $uses_S ||= ($code =~ /\$S[\[\{]|[\@%]S\b/); } foreach my $code ($Perlexpr, $Code_after_open, $Code_before_close, $Code_before_open, $Code_per_line, $Opt_p_expr) { next unless defined $code; $uses_Filepath ||= ($code =~ /\$Filepath\b/); $needs_reset ||= ($code =~ /[\$\@\%][a-z]/); } if ($uses_C and !$Context_matcher) { warn_ "\$C requires -z option$Beep" unless $Opt_s; $uses_C = undef; } elsif (!($uses_C or $Print_context_matcher)) { $Context_matcher = undef; } if ($Opts{'#'}) { for ($output, $print, $nonmatch_print) { next unless defined; s|\$File\\([n0])|$Col{filename}\$File$Col_Reset\\$1| or s|\$File|$Col{filename}\$File|; s|\$\.|$Col{lineno}\$.|; s|\$Offset|$Col{offset}\$Offset|; s|([:\-])|$Col{colon}$1|g; } for ($output) { last unless defined; if (!length $_) { $_ = '$_' } s|^\$_$|"$Col{nonmatch}\$_$Col_Reset"| or s|\$_|$Col{nonmatch}\$_$Col_Reset|; } for ($print) { last unless defined; last if ($Opts{k} or $Opts{l} or $Opts{L} or $Opts{O}); s|;$||g; my $orig_print = $_; my $ensure_newline = !$Opts{N} ? '' : ($Perlexpr =~ /\bcolorall\b/) ? q[, (/\n(?:\Q] . $Col_Reset . q[\E)?\z/ ? () : "\n")] : q[, (/\n\z/ ? () : "\n")]; my $cl = "$Col{nonmatch}\$`$Col{match}\$&$Col{nonmatch}\$'$Col_Reset"; s|\$_|$cl| or s|^print$|print "$cl"|; if ($Simple_Perlexpr) { $_ .= $ensure_newline . ';'; } else { # Fix the case where $`$&$' is not the same as $_ eg. peg -# "/(...)/ and $_=$1" $orig_print =~ s|^print$|print "$Col{nonmatch}\$_$Col_Reset"| or $orig_print =~ s|\$_|$Col{nonmatch}\$_$Col_Reset|; $_ = q{($_ eq "$`$&$'")} . "\n\t? ($_$ensure_newline)\n\t: ($orig_print$ensure_newline);"; } } for ($nonmatch_print) { last unless defined; my $ncl = "$Col{nonmatch}\$_$Col_Reset"; s|\$_|$ncl| or s|print(;?)$|print "$ncl"$1|; } # Remove redundant color resets: for ($output, $print, $nonmatch_print) { next unless defined; while (s|(\Q$Col_Reset\E[\$\.\:\-\w\s]*)\Q$Col_Reset\E|$1|) {} s/\"\Q$Col_Reset\E/\"/; } } $Count = $Matches = 0; my $Opt_B = ($Opts{B} and $Before > 0); my $print_header = ($Opts{J} and !$Opts{Z}); my $use_First = ($print_header or $context or !($Opts{L} or $Opts{k})); my $Opt_b_bytes = ($Opts{b} and $Opts{b} < 3); my $Opt_b_column = ($Opts{b} and $Opts{b} >= 3); my $Opt_b_unixoffset = ($Opts{b} == 2 and !($Opts{K} or $Input_encoding)); my $assign_Offset = '$Offset = ' . ($Env{PEG_B_HEX} ? 'sprintf "%#x", ' : '') . 'tell(F)' . ($Opt_b_unixoffset ? ' - $CRs' : '') . ';'; my $fix_newline = 's/\015?[\012\015]\z//; $_ .= "\n";'; my $Col_File = $Col{filename} . '$File' . $Col_Reset; # The context matching code's circular buffer method does not work when # multiple lines are treated as one eg. if $Code_per_line merges backslashed # lines into one, then the sequence of $.'s muddles $Before[$. % $Before] # So provide a mechanism to force the use of v1's safe (but slow) push/shift stack method. my $safe_before_context = ($Opt_B and $_ = $Code_per_line and /\# PEG_SAFE_BEFORE_CONTEXT/); # "peg -l +1 ..." is a special case. It skips reading the file. my $plus_one = ($Opts{l} and $Perlexpr eq '+1'); my $needs_Binary_file = (($Opts{I} || !($Opts{a} || $Opts{c} || $Opts{l} || $Opts{L} || $Opts{Z})) && !$plus_one); my $skip_open = ($_ = $Code_before_open and /goto process_file/); my ($sysread_slurp, $irs_slurp, $quick_no_match_test); if ($Slurp and !($plus_one or $skip_open)) { if ($Opts{k} or $Opts{l} or $Opts{L} or $Opts{O}) { if ($Opts{K} or $Input_encoding) { $irs_slurp = 1; } else { $sysread_slurp = 1; } } elsif (!($Opts{K} or $Input_encoding or $Opts{a} > 1)) { $quick_no_match_test = 1; } } my $qfind_only = ($_ = $Opt_r_cmd and /qfind/ and !(@Cmdline_files or $Opt_d or $Opts{X})); my $save_context = ($_ = $Code_per_line and /\$Printed_Context_line/); $Opt_r_fork = undef if $plus_one; $Search = "sub search {\n"; $Search .= " warn_ \"V: in search()\";\n" if $Verbose; $Search .= " local \$/ = $Input_record_separator;\n" if $Input_record_separator; if ($Opts{S}) { $Search .= " if (defined \$S_FILE) {\n"; $Search .= " \$File = \$S_FILE;\n \$S_FILE = undef;\n"; $Search .= " warn_ \"V: called via S() File=\$File\";\n" if $Verbose; $Search .= " \$Filepath = \$File;\n" if $uses_Filepath; $Search .= " \$_ = \$File; return unless ($Opt_pp_expr);\n" if $Opt_pp_expr; $Search .= " *F = \$S_F;\n"; if ($Input_encoding) { my $layer = ":encoding($Input_encoding)"; $layer = ":raw:perlio$layer" if ($Is_Win32 and $Opt_b_bytes); # XXX workaround :crlf & tell() bug $Search .= " binmode(F, '$layer')\n or " . ($Opt_s ? '' : "(warn_ \"binmode '$layer' failed: \$!\"), ") . "return;\n"; } $Search .= " show_progress(\$File);\n" if $Opts{_}; $Search .= " goto process_file;\n"; $Search .= " }\n"; } ####$Search .= q{ print "DBG: _=$_\tF:F:n=$File::Find::name", (0 ? "\tF:F:d=$File::Find::dir\n" : ()) ,"\n";} . "\n"; ####$Search .= q{ print "DBG: cwd=", cwd(), "\n";} . "\n"; if ($Opts{'+'}) { $Search .= " return if /\\bpeg_\\d+\\.txt\\z/;\n"; } elsif ($Opts{R}) { $Search .= " return if /\\b@{[ quotemeta $R_file ]}\\z/;\n"; } $Search .= " \$File = \$File::Find::name;\n"; $Search .= " warn_ \"V: File=\$File\";\n" if $Verbose; unless ($qfind_only) { $Search .= ' $File =~ s|^\.' . ($Is_Win32 ? "[/\\\\]" : '/') . "||;\n"; $Search .= ' $_ =~ s|^\.' . ($Is_Win32 ? "[/\\\\]" : '/') . "||;\n"; # Needed on Win32 for almost "too long" filenames. } $Search .= " \$File =~ s|/|\\\\|g;\n" if $Opts{"\\"} == 1; $Search .= " \$File =~ s|\\\\|/|g;\n" if $Opts{"\\"} == 2; $Search .= " \$Filepath = \$_;\n" if $uses_Filepath; $Search .= " show_progress(\$File);\n" if $Opts{_}; # cf. "peg -S -p .zip foobar". The -p should only apply to files on the # filesystem and not to files within the zip archives. $Search .= " warn_ \"V: applying -p test\";\n" if ($Verbose and ($Opt_p_expr or $Opt_pp_expr)); if ($Opt_p_expr) { if ($Opts{S}) { if ($Opt_pp_expr) { $Search .= " return unless (\$Inside_archive ? ($Opt_pp_expr) : ($Opt_p_expr));\n"; } else { $Search .= " return unless (\$Inside_archive or ($Opt_p_expr));\n"; } } else { $Search .= " return unless ($Opt_p_expr);\n"; } } elsif ($Opt_pp_expr) { $Search .= " return unless (!\$Inside_archive or ($Opt_pp_expr));\n"; } if ($MTime_new or $MTime_old) { # Allow non existant files to trigger "can't open" error. $Search .= " warn_ \"V: applying -M test\";\n" if $Verbose; $Search .= " \$MTime = -M \$_;\n return unless ("; $Search .= '$Inside_archive or ' if $Opts{S}; $Search .= $plus_one ? 'defined $MTime and ' : '!defined $MTime or '; if (!$MTime_old) { $MTime_new = $MTime_new->[0]; $Search .= "\$MTime <= \$MTime_new);\n"; } elsif (!$MTime_new) { $MTime_old = $MTime_old->[0] - $MTime_old->[1]; $Search .= "\$MTime >= \$MTime_old);\n"; } else { if ($MTime_new->[0] > $MTime_old->[0]) { # Ensure range makes sense. ($MTime_new, $MTime_old) = ($MTime_old, $MTime_new); } $MTime_new = $MTime_new->[0] - $MTime_new->[1]; $MTime_old = $MTime_old->[0]; $Search .= "(\$MTime >= \$MTime_new and \$MTime <= \$MTime_old));\n"; } } $Search .= " ++\$Total_files;\n" if $Opts{'%'}; if ($Opts{S}) { my $star_handler = exists $Peg_S{'*'}; $Search .= " if (-f \$_ and \$File =~ /$S_handler_re/) {\n" unless $star_handler; $Search .= ' my $ext = ' . ($star_handler ? "'*'" : 'lc $1') . ";\n"; $Search .= " warn_ \"V: calling '\$ext' -S handler\";\n" if $Verbose; $Search .= ' my $ok = eval { $Peg_S{$ext}->($_, $File) };' . "\n"; $Search .= ' $@ and die_ "-S handler error: $File\n$@";' . "\n"; $Search .= " return if \$ok;\n"; $Search .= " warn_ \"V: -S handler returned false - continuing search\";\n" if $Verbose; $Search .= " }\n" unless $star_handler; $Search .= " return unless ($Opt_pp_expr);\n" if $Opt_pp_expr; } if ($plus_one) { $Search .= " return unless -f \$_;\n" unless ($qfind_only or $Opts{S}); # "peg -l +1 *" should not show directory names $Search .= " $Code_before_open;\n" if $Code_before_open; $Search .= " push \@Matched_files, \$File;\n"; $Search .= " print \"" . ($Opts{n} ? "$Col{lineno}\@{[scalar \@Matched_files]}$Col_Reset\\t" : '') . $Col_File . ($Opts{l} > 1 ? '\0' : $Newline_literal) . "\";\n"; $Search .= " return;\n\n"; } $Search .= " $Code_before_open;\n" if ($Code_before_open and !$plus_one); $Search .= " warn_ \"V: open'ing file\";\n" if $Verbose; if ($Search_STDIN) { $Search .= ' open(F, "<-") or die_ "cannot open STDIN: $!";' . "\n"; if ($Input_encoding) { # NB. doesn't handle BOMs $Search .= " binmode(F, ':encoding($Input_encoding)')\n"; $Search .= " or die_ \"binmode failed on STDIN with $Input_encoding: \$!\";\n"; } elsif ($Is_Win32) { $Search .= " binmode F;\n"; } } else { # Do not check for a directory and then skip the open as we must still # allow for "peg x a_directory" to at least try to read it (sometimes can). # On systems that perform filename globbing there is the dilemma of whether # to warn about not being able to open a directory. There are two distinct # use cases: "peg x *" (no warning is preferable) and "peg x afile adir" # (a warning is preferable). The compromise solution is to warn unless -ss. my $warn_on_failed_open_code = ''; if (%Globbed) { $warn_on_failed_open_code = '(exists $Globbed{$_} and -d $_) or '; # cf. "peg main *" vs "peg main *c" } elsif ($qfind_only) { # qfind does not output directories. } elsif ($Opt_r_cmd or $Opts{r} or $Opt_d or $Opts{X} or (!$Do_globbing and $Opt_ss)) { $warn_on_failed_open_code = '-d $_ or '; # cf. "find . | peg -X foo" } if ($Opts{K}) { $Search .= " \$Wide_chars = 0;\n" if $Opt_oo; $Search .= ' *F = magic_open($_, $File)'; } else { my $layer = ''; if ($Input_encoding) { $layer = ":encoding($Input_encoding)"; $layer = ":raw:perlio$layer" if ($Is_Win32 and $Opt_b_bytes); # XXX workaround :crlf & tell() bug } $Search .= ' open(F, "<' . $layer . '", $_)'; } $Search .= $Opt_s ? " || return;\n" : "\n || ((${warn_on_failed_open_code}print STDERR \"peg: can't open \$File: \$" . ($Opts{K} ? 'Err' : '!') . "\\n\"), return);\n"; if ($Is_Win32 and !($Opts{K} or $Input_encoding or ($_ = $Input_record_separator and /\\n/))) { $Search .= " binmode F;\n"; } } # Stop if the output channel goes eg. if running thro' a pager which quits: $Search .= " print '' or goto done;\n" unless ($STDOUT_is_terminal or $Opts{R}); # NB. need to local-ise $/ if there are any outer readline()s ie. or -X's . $Search .= ' ' . (($Opt_r_cmd or $Opts{X}) ? 'local ' : '') . '$/ = (-s F < ' . $Slurp_maxsize . ') ? undef : "\n";' . "\n" if $irs_slurp; $Search .= " \$Size = -s F;\n \$Slurp = (\$Size < $Slurp_maxsize);\n" if $sysread_slurp; $Search .= "process_file:\n" if ($Opts{S} or $skip_open); if ($Opts{S}) { $Search .= " \$File =~ s|/|\\\\|g;\n" if $Opts{"\\"} == 1; $Search .= " \$File =~ s|\\\\|/|g;\n" if $Opts{"\\"} == 2; } if ($needs_Binary_file) { $Search .= " eval { \$Binary_file = -B F };\n"; $Search .= ' $@ && (' . ($Opt_s ? '' : '(print STDERR "peg: error reading $File: ", &ee), ') . "close(F), return);\n"; $Search .= ' warn_ "V: file is ", ($Binary_file ? "" : "not "), "binary";' . "\n" if $Verbose; $Search .= ' $Binary_file ' . ($Opts{I} == 1 ? '&&' : '||') . " (close(F), return);\n" if $Opts{I}; } $Search .= " reset 'a-z';\n" if $needs_reset; $Search .= " \$After = $After;\n" if $Opts{A}; $Search .= " \@Before = ();\n" if $Opt_B; $Search .= " \$C = undef;\n" if $uses_C; $Search .= " \$Context_line = undef;\n" if $Context_matcher; $Search .= " \$Context_line2 = undef;\n" if $Context_matcher2; $Search .= " \$Count = 0;\n" if ($Opts{c} == 1 or $Opts{c} >= 3); $Search .= " \$Matches = 0;\n" if $Opt_m == 1; $Search .= " \$CRs = 0;\n" if $Opt_b_unixoffset; $Search .= " \$First = 1;\n" if $use_First; $Search .= " \$Found = 0;\n" if $Opts{L}; $Search .= ' ' . join("\n\t= ", map {"\$Match$_"} (0..$#Perlexpr)) . " = 0;\n" if (($Opts{k} and @Perlexpr) or $Opts{O} or $Opt_oo); $Search .= " \$Match_failed = 0;\n" if @Perlexpr_k; $Search .= " \@P = ();\n" if $uses_P; $Search .= " \$Printed_Context_line = '';\n \$Printed_Context_line2 = '';\n" if $save_context; $Search .= " \%S = ();\n" if $uses_S; $Search .= " undef \$Z;\n" if ($Opts{Z} % 2); if ($Opt_oo) { $Search .= ' seek($Buffer_fh, 0, 0) or die_ "seek failed: $!";' . "\n"; $Search .= " \$Buffer_contents = '';\n"; $Search .= ' binmode($Buffer_fh, $Wide_chars ? ":utf8" : ":raw") or die_ "binmode failed: $!";' . "\n" if $Opts{K}; $Search .= " my \$Orig_fh = select;\n select \$Buffer_fh;\n"; } $Search .= " $Code_after_open;\n" if $Code_after_open; ####$Search .= ' print "DBG: $File: ", (join ", ", PerlIO::get_layers(\*F, details => 1)), " pos=", tell(F), "\n";' . "\n"; $Search .= " eval {\n"; if ($quick_no_match_test) { # This is an optimisation based on the assumption that most files do not match. $Search .= " warn_ \"V: doing quick match test\";\n" if $Verbose; $Search .= " \$Size = -s F;\n"; $Search .= " if (\$Size < $Slurp_maxsize) {\n"; # NB. return's below jump to end of enclosing eval block. $Search .= ' sysseek(F, 0, 0) or ' . ($Opt_s ? '' : q{(print STDERR "peg: sysseek failed $File: $!\n"), }) . "return;\n" if $needs_Binary_file; $Search .= ' $Bytes_read = sysread(F, $_, $Size);' . "\n"; $Search .= ' defined $Bytes_read or ' . ($Opt_s ? '' : q{(print STDERR "peg: sysread failed $File: $!\n"), }) . "return;\n"; $Search .= ' $Bytes_read == $Size or ' . ($Opt_s ? '' : q{(print STDERR "peg: slurp failed $File\n"), }) . "return;\n"; $Search .= " return unless ($Perlexpr);\n"; $Search .= ' seek(F, 0, 0) or ' . ($Opt_s ? '' : q{(print STDERR "peg: seek failed $File: $!\n"), }) . "return;\n"; $Search .= " }\n"; } $Search .= " $assign_Offset\n" if $Opt_b_bytes; $Search .= " warn_ \"V: reading file\";\n" if $Verbose; # Reading a file using a single sysread is quicker than using "$/=undef". if ($sysread_slurp) { $Search .= " while (1) {\n"; $Search .= " if (\$Slurp) {\n"; $Search .= " last if \$Slurp == -1; \$Slurp = -1;\n"; $Search .= ' sysseek(F, 0, 0) or ' . ($Opt_s ? '' : q{(print STDERR "peg: sysseek failed $File: $!\n"), }) . "last;\n" if $needs_Binary_file; $Search .= " \$Bytes_read = sysread(F, \$_, \$Size);\n"; $Search .= ' defined $Bytes_read or ' . ($Opt_s ? '' : q{(print STDERR "peg: sysread failed $File: $!\n"), }) . "last;\n"; $Search .= ' $Bytes_read == $Size or ' . ($Opt_s ? '' : q{(print STDERR "peg: slurp failed $File\n"), }) . "last;\n"; $Search .= " } else {\n"; $Search .= " \$_ = readline(*F);\n"; $Search .= " last unless defined;\n"; $Search .= " }\n"; } else { $Search .= " while () {\n"; } $Search .= " \$Line_matched = 0;\n" if $Opt_oo; $Search .= " \$CRs += tr/\\015/\\015/;\n" if $Opt_b_unixoffset; $Search .= " s/\\015?[\\012\\015]\\z/\\n/;\n" if ($CRLF_to_newline and !$Input_record_separator); # Handling "-/ INPUT_RECORD_SEPARATOR" needs special newline handling: # 1. peg -/ "qq(\r)" "/\w$/" macfile.txt -- the $ matches \n's NOT $/'s! # 2. peg -/ "'<'" /=/ foo.xml -- the 'lines' may contain multiple internal newline. Need to fix all of them not just a trailing one. $Search .= " s/(?:\\015\\012|\\012|\\015)/\\n/g; $fix_newline\n" if $Input_record_separator; ####$Search .= q< print "DBG: ", join ' ', unpack("C*", $_), "\n"; next;> . "\n"; $Search .= " ++\$Total_lines;\n" if $Opts{'%'}; if ($Opts{a} > 1) { # 1. fix utf16 strings of ASCII chars # 2. remove unprintable characters $Search .= ' s/((?:\0[[:print:]\t\n\r]){2,})/ my $Str = $1; $Str =~ s|\0+||g; $Str /eg;' . "\n"; $Search .= ' s/[^[:print:]\t\n\r]+/ /g;' . "\n"; } $Search .= " \$P = \$_;\n" if ($uses_P or $Opts{W}); if ($Context_matcher) { my $needs_local; # if the context matching code modifies $_. # Provide buyout from expensive "local $_ = $_" if context matcher: # a) modifies $_ in order to change the context line, AND # b) does not care that this will be used by PERLEXPR. unless ($Context_matcher =~ /\# PEG_FAST_Z_CONTEXT/) { foreach my $code ($Context_matcher, $Context_matcher2) { next unless defined $code; if ($code =~ /\$_\s*\.?=[^~]/) { $needs_local = 1; last; } # Allow "$var =~ s/foo/bar/", but not a $_ modifiying "s/foo/bar/". while ($code =~ /(.*?)\bs\//g) { unless ($1 =~ /\$(?:_\w+|[a-zA-Z]\w*)\s*=~\s*\z/) { $needs_local = 1; last; } } } } $Search .= " {local \$_ = \$_;\n" if $needs_local; $Search .= " if ($Context_matcher) {\n"; $Search .= " \$C = \$_;\n" if $uses_C; $Search .= " \$Context_line = \$_;\n"; $Search .= " \$Context_lineno = \$.;\n"; $Search .= " }\n"; if ($Context_matcher2) { $Search .= " if ($Context_matcher2) {\n"; $Search .= " \$Context_line = undef;\n" unless $Env{PEG_Z_INDEPENDENT}; $Search .= " \$Context_line2 = \$_;\n"; $Search .= " \$Context_lineno2 = \$.;\n"; $Search .= " }\n"; } $Search .= " }\n" if $needs_local; } $Search .= " \@F = split;\n" if $uses_F; $Search .= ' @S = (); while (/(\w+)/g) { push @S, $1; ++$S{$1} };' . "\n" if $uses_S; $Search .= " $Code_per_line;\n" if $Code_per_line; $Search .= " shift \@Before if (\@Before > $Before);\n" if $safe_before_context; $Search .= " \$Offset = 1;\n" if ($Opt_b_column and $context); # Need to clear $& to avoid possible false coloring of a matched line where $& is due to the context match and not PERLEXPR. $Search .= " 'X' =~ /X/;\n" if (($Opts{'#'} and ($Context_matcher or $Code_per_line) and !$Simple_Perlexpr) or $Opt_b_column); $Search .= " study;\n" if ((@Perlexpr + @Perlexpr_k) >= 20); if ($Opts{W}) { $Search .= " \$Matched = ($Perlexpr) ? 1 : 0;\n"; $Search .= " \$_ = \$P;\n"; $Search .= " if (\$Matched) {\n"; } else { $Search .= " if ($Perlexpr) {\n"; } $Search .= " $::Code_on_match\n" if $::Code_on_match; # undocumented hook NB. code should localise $& etc. $Search .= " next;\n" if $Opts{k}; unless ($Opts{L}) { $Search .= " exit;\n" if $Opts{q}; $Search .= ' $First && push @Matched_files, $File;' . "\n" unless $Opt_oo; } $Search .= " \$Binary_file && ((print \"Binary file $Col_File matches$Newline_literal\"), last);\n" unless ($Opts{a} or $Opts{c} or $Opts{I} == 1 or $Opts{l} or $Opts{L} or $Opts{Z}); $Search .= " \$Offset = \$-[0] + 1;\n" if $Opt_b_column; $Search .= " $fix_newline\n" if ($Opts{N} and !$Opts{'#'}); $Search .= " ++\$Count;\n" if $Opts{c}; $Search .= " ++\$Matches;\n" if $Opt_m; $Search .= " \$Found = 1;\n last;\n" if $Opts{L}; if ($print_header) { $Search .= " print "; if ($Opts{J} == 1) { $Search .= "header(\$File)"; } else { # NB. can't always rely on @Matched_files. $Search .= ($Opt_r_fork or $Opt_oo) ? "\"$Newline_literal\", " : "+(\@Matched_files > 1 ? \"$Newline_literal\" : ''), " if $JJ_gap; $Search .= "\"$Col_File$Newline_literal\""; } $Search .= " if \$First;\n"; } if ($context) { # Insert "--" separator when appropriate.. # NB. can't rely on $Matched_before if fork'ing. $Search .= " print \"--$Newline_literal\" if ("; $Search .= $Opts{J} ? '(!$First && ' : (($Opt_r_fork ? '' : '$Matched_before++ && ') . '($First || '); $Search .= "(\$After > $gap)));\n"; } if ($Print_context_matcher) { if ($Context_matcher2) { my $fmt = $Env{PEG_CONTEXT_FORMAT2} || '++++ ($.) $_'; $fmt =~ s|\$_\b|\$Context_line2|; $fmt =~ s|\$\.|\$Context_lineno2|; $Search .= " if (defined \$Context_line2) {\n"; $Search .= " \$Printed_Context_line2 = \$Context_line2;\n" if $save_context; $Search .= " \$Context_line2 =~ s/\\015?[\\012\\015]\\z//;\n"; # inline chomp_ $Search .= " print \"$Col{z_context2}$fmt$Col_Reset$Newline_literal\";\n"; $Search .= " \$Context_line2 = undef;\n"; $Search .= " }\n"; } my $fmt = $Env{PEG_CONTEXT_FORMAT} || '**** ($.) $_'; $fmt =~ s|\$_\b|\$Context_line|; $fmt =~ s|\$\.|\$Context_lineno|; $Search .= " if (defined \$Context_line) {\n"; $Search .= " \$Printed_Context_line = \$Context_line;\n" if $save_context; $Search .= " \$Context_line =~ s/\\015?[\\012\\015]\\z//;\n"; # inline chomp_ $Search .= " print \"$Col{z_context}$fmt$Col_Reset$Newline_literal\";\n"; $Search .= " \$Context_line = undef;\n"; $Search .= " }\n"; } $Search .= $safe_before_context ? " print \@Before;\n" : " print grep defined, \@Before[(\$. % $Before)..@{[$Before-1]}, 0..((\$. % $Before)-1)];\n" if $Opt_B; $Search .= " $print\n" if $print; $Search .= " " . ($Opt_m == 1 ? 'last' : 'goto done') . " if \$Matches >= $Max_matches;\n" if ($Opt_m and !$context); $Search .= " \$After = 0;\n" if $context; $Search .= " \@Before = ();\n" if $Opt_B; $Search .= " \$First = 0;\n" if $use_First; $Search .= " }\n"; $Search .= " elsif (++\$After <= $After) {\n" if $Opts{A}; $Search .= " $fix_newline\n" if ($Opts{A} and $Opts{N}); $Search .= " $nonmatch_print\n }\n" if $Opts{A}; $Search .= " else {\n" if ($Opts{B} or ($context and $Opt_m)); $Search .= " " . ($Opt_m > 1 ? 'goto done' : 'last') . " if (\$Matches >= $Max_matches);\n" if ($Opt_m and $context); $Search .= " ++\$After;\n" if (!$Opts{A} and $Opts{B}); $Search .= " $fix_newline\n" if ($Opt_B and $Opts{N}); $Search .= $safe_before_context ? " push \@Before, $output;\n" : " \$Before[\$. % $Before] = $output;\n" if $Opt_B; $Search .= " }\n" if ($Opts{B} or ($context and $Opt_m)); $Search .= " $assign_Offset\n" if $Opt_b_bytes; $Search .= " push \@P, \$P;\n" if $uses_P; $Search .= " }\n"; # NB. in the event of an exception, we can't print $_ as it's contents may trigger another exception in the output IO! $Search .= ' }' . ($Opt_s ? '' : '; $@ and (print STDERR "\npeg: error at line $. of $File:\n", &ee)' . ($Opt_r_fork ? '' : ', exit(2)')) . ";\n"; $Search .= ' $Total_bytes += ' . (($sysread_slurp or $quick_no_match_test) ? '$Size' : 'tell(F)') . ";\n" if $Opts{'%'}; $Search .= " $Code_before_close;\n" if $Code_before_close; $Search .= " close(F);\n" unless $Search_STDIN; if ($Opt_oo) { $Search .= " select \$Orig_fh;\n if ("; $Search .= "!\$Match_failed\n\t&& " if @Perlexpr_k; $Search .= join "\n\t&& ", map "\$Match$_", (0 .. $#Perlexpr); $Search .= ") {\n"; $Search .= " if (\$Wide_chars) {\n" if $Opts{K}; if ($Opts{K} or $Input_encoding) { # NB. $Buffer_contents is a *byte* string. $Search .= ' seek($Buffer_fh, 0, 0) or die_ "seek failed: $!";' . "\n"; $Search .= " my \$buf;\n"; $Search .= " print \$buf while read(\$Buffer_fh, \$buf, 2048) > 0;\n"; } $Search .= " } else {\n" if $Opts{K}; $Search .= " print \$Buffer_contents;\n" unless $Input_encoding; $Search .= " }\n" if $Opts{K}; $Search .= " push \@Matched_files, \$File;\n }\n"; } $Search .= " goto done if (\$Matches >= $Max_matches);\n" if ($context and $Opt_m > 1); if ($Opts{k}) { $Search .= " if (!\$Match_failed" . join("", map({"\n\t&& \$Match$_"} (0..$#Perlexpr))) . ") {\n"; $Search .= " exit;\n" if $Opts{q}; $Search .= " print \"$Col_File$Newline_literal\";\n"; $Search .= " push \@Matched_files, \$File;\n"; $Search .= " }\n"; } if ($Opts{c} == 1 or $Opts{c} >= 3) { $Search .= ' print "' . ($Opts{h} ? '' : "$Col_File$Col{colon}:$Col_Reset") . "\$Count$Newline_literal\""; $Search .= " if \$Count" if ($Opts{c} >= 3); $Search .= ";\n"; } $Search .= " Z_display(" . ((!$Opts{h} or $Opts{J}) ? '$File' : '') . ");\n" if ($Opts{Z} % 2); if ($Opts{L}) { $Search .= " unless (\$Found) {\n"; $Search .= " exit;\n" if $Opts{q}; $Search .= " print \"$Col_File$Newline_literal\";\n"; $Search .= " push \@Matched_files, \$File;\n"; $Search .= " }\n"; } $Search .= ' warn_ "V: done search()\n\n";' . "\n" if $Verbose; $Search .= "}\n"; $Search =~ s/^(\s*warn_ \"V:)/$1 \$\$/gm if ($Verbose and $Opt_r_fork); eval_ $Search; $@ and die_ "error while eval'ing:\n\n$Search\n", @Warnings, ⅇ } # build_search sub header { my $file = shift; my $border = $Newline . $Col{colon} . (":" x (6 + length($file))) . $Col_Reset . $Newline; my $cc = $Col{colon} . "::" . $Col_Reset; return $border . $cc . " " . $Col{filename} . $file . $Col_Reset . " " . $cc . $border . $Newline; } # header # magic_open() - attempt to open a file using the 'correct' encoding. # # Ensure there is no :crlf layer on the filehandle. # 1. We want the CRs. # 2. The :crlf layer interferes badly with encodings. For example, # tell()'s result on "<:encoding(utf16le):crlf" filehandles # are not aligned to the original file's bytes. # sub magic_open { my ($file, $fullpath) = @_; # Open the file for reading *binary*, but ensure no :crlf layer. open(my $fh, "<:raw:perlio", $file) or $Err = $!, return; my $len = read($fh, my $data, 8); defined $len or $Err = "read failed: $!", return; return $fh if $len < 2; my @res; # (encoding, start_offset) my @b = unpack("C*", $data); # Look for a BOM. if ($b[0]==0xEF and $len >= 3 and $b[1]==0xBB and $b[2]==0xBF) { @res = ('utf8', 3); } elsif ($b[0]==0xFF and $b[1]==0xFE) { if (!($len % 4) and !$b[2] and !$b[3]) { @res = ('utf32le', 4) } elsif (!($len % 2)) { @res = ('utf16le', 2) } } elsif ($b[0]==0xFE and $b[1]==0xFF and !($len % 2)) { @res = ('utf16be', 2); } elsif (!$b[0] and !$b[1] and $b[2]==0xFE and $b[3]==0xFF and !($len % 4)) { @res = ('utf32be', 4); } # OK - cannot find a BOM, perhaps it's ASCII text encoded in UTF(16|32). elsif ((!$b[0] or !$b[1]) and !($len % 2)) { if ( (!$b[1] and $Is_ascii_text[$b[0]] and ($len==2 or (!$b[3] and $Is_ascii_text[$b[2]] and ($len==4 or (!$b[5] and $Is_ascii_text[$b[4]] and ($len==6 or (!$b[7] and $Is_ascii_text[$b[6]])))))))) { @res = ('utf16le', 0); } elsif (!$b[0] and $Is_ascii_text[$b[1]] and ($len==2 or (!$b[2] and $Is_ascii_text[$b[3]] and ($len==4 or (!$b[4] and $Is_ascii_text[$b[5]] and ($len==6 or (!$b[6] and $Is_ascii_text[$b[7]]))))))) { @res = ('utf16be', 0); } elsif ($len==4 or $len==8) { if ( (!$b[1] and !$b[2] and !$b[3] and $Is_ascii_text[$b[0]]) and ($len==4 or (!$b[5] and !$b[6] and !$b[7] and $Is_ascii_text[$b[4]]))) { @res = ('utf32le', 0) } elsif ( (!$b[0] and !$b[1] and !$b[2] and $Is_ascii_text[$b[3]]) and ($len==4 or (!$b[4] and !$b[5] and !$b[6] and $Is_ascii_text[$b[7]]))) { @res = ('utf32be', 0) } } } my ($encoding, $start_offset); if (@res) { ($encoding, $start_offset) = @res; warn_ "assuming $encoding: $fullpath" unless $Opt_ss; } else { $start_offset = 0; if ($Guess_encoding) { seek($fh, 0, 0) or $Err = "seek failed: $!", return; $len = read($fh, $data, 4096); defined $len or $Err = "read failed: $!", return; my $enc_obj = Encode::Guess::guess_encoding($data); if (ref $enc_obj and $enc_obj->name ne 'ascii') { $encoding = $enc_obj->name; warn_ "guessing $encoding: $fullpath" unless $Opt_ss; } } } seek($fh, $start_offset, 0) or $Err = "seek failed: $!", return; if ($encoding) { my $layer = ":encoding($encoding)"; eval { binmode($fh, $layer) or die "binmode failed: $!"; $Wide_chars = 1; }; $@ and die_ "$encoding encoding error $fullpath:\n", ⅇ } ####print "DBG: $fullpath: ", (join ', ', PerlIO::get_layers($fh, details => 1)), "\n"; return $fh; } # magic_open { my $last_file; sub show_progress { my $file = shift; my $N = $Console_width - 10; # Ensure $file fits the terminal width. # Try: a/b/c/def -> a/b/~/def -> a/~/def -> ~/def -> ~/d~f $file =~ s|\\|/|g if $Is_Win32; if (length($file) > $N) { my $fits; if ($file =~ m|^(.+)/([^/]+)\z|) { my ($root, $tail) = ($1, $2); while ($root =~ s|/+[^/]+\z||) { if (length($root) + length($tail) < $N) { $file = "$root/~/$tail"; $fits = 1; last; } } if (!$fits and length($tail) < $N) { $file = "~/$tail"; $fits = 1; } } unless ($fits) { my $N_2 = int($N / 2); $file =~ s|^.+/|~/|; $file = substr($file, 0, $N_2) . "~" . substr($file, length($file) - $N_2); } } if (defined $last_file) { # When consecutive filenames truncate the same, show progress has been made. $file .= '*' if $file eq $last_file; # Don't reprint their common prefix to prevent flickering. my $lower = 0; # lower <= common < upper my $upper = 1 + ((length($file) < length($last_file)) ? length($file) : length($last_file)); while ($upper - $lower > 1) { my $try = int(($lower + $upper) / 2); if (substr($file, 0, $try) eq substr($last_file, 0, $try)) { $lower = $try } else { $upper = $try } } my $common = $lower; my $out = ''; my $overhang = length($last_file) - length($file); if ($overhang > 0) { # erase overhanging characters of last_file $out .= "\b" x $overhang; $out .= " " x $overhang; } $out .= "\b" x (length($last_file) - $common); $out .= substr $file, $common; print STDOUT $out; } else { print STDOUT "peg: $file"; } $last_file = $file; } # show_progress } sub S { my ($fh, $filename, $within_archive) = @_; if ($within_archive and ($Opts{S} == 1 ? $filename =~ /$S_handler_re/o : ($S_nonarchive_re and $filename =~ /$S_nonarchive_re/o))) { my $ext = lc $1; require File::Temp; my ($fout, $tempfile) = File::Temp::tempfile("peg-S-XXXXX", SUFFIX => ".$ext", UNLINK => 1); binmode $fout; my ($len, $buf); while ($len = sysread($fh, $buf, 65_536)) { syswrite($fout, $buf, $len) or die "error writing to tempfile: $!\n"; } close $fout or die "can't close tempfile: $!\n"; close $fh; warn_ "V: S() calling $ext handler with ('$tempfile', '$filename')\n" if $Verbose; ++$Inside_archive; $Peg_S{$ext}->($tempfile, $filename); --$Inside_archive; unlink $tempfile; } else { ($S_F, $S_FILE) = ($fh, $filename); warn_ "V: S() calling search('$filename')\n" if $Verbose; search(); } } # S sub search_files { foreach my $file (@{$_[0]}) { $_ = $File::Find::name = $file; search(); } } # search_files sub run { $/ = "\n"; my $cwd = cwd(); my $STDERR_contents; my $Profile = $Env{PEG_PROFILE} || 0; # A simple profiling mechanism warn_ "PROFILE = $Profile$Beep" if $Profile > 0; if ($Opts{R}) { require Fcntl; my $mode = (&Fcntl::O_WRONLY | &Fcntl::O_EXCL | &Fcntl::O_CREAT); my $R_dir = $cwd; unless (sysopen(OUT, $R_file, $mode, 0600)) { warn_ "can't create -R file in current directory: $!" unless $Opt_s; $R_dir = $HOME_dir; sysopen(OUT, "$R_dir$R_file", $mode, 0600) or die_ "can't create -R file: $!"; } warn_ "-R: $R_dir$R_file" unless $Opt_s; select OUT; if ($Opts{_}) { autoflush(\*STDOUT); # Save STDERR output till the end to avoid clobbering the progress output. open(OLDERR, ">&", \*STDERR) or die_ "can't save STDERR: $!"; close STDERR; open(STDERR, ">", \$STDERR_contents) or (print STDOUT "peg: can't redirect STDERR: $!\n"), exit(2); $SIG{__DIE__} = sub { return if $_[0] =~ m|Encode/ConfigLocal|; show_progress("!error!"); print STDOUT "\n"; close STDERR; open(STDERR, ">&OLDERR") or (print STDOUT "peg: can't restore STDERR: $!\n"), exit(2); my $err = join '', "\n", $STDERR_contents, "\n", @_; $err =~ s/[\012\015]+/\n/gs; print STDERR $err; exit(2); }; show_progress("*start*"); } } # Flush output unless we know it's going to a file. $| = ($Opts{R} or $Opts{U}) ? 0 : 1; my $layer; if ($Output_encoding) { if ($Output_BOM) { binmode select(); print $Output_BOM; } $layer = ":encoding($Output_encoding)"; if ($Is_Win32) { if ($CRLF_to_newline) { if ($Output_encoding eq 'utf8') { # Leave implicit :crlf layer on output. } else { $layer = ':pop' . $layer . ':crlf'; # Reposition :crlf layer. } } else { $layer = ':pop' . $layer; # Remove redundant :crlf layer. } } } elsif ($Needs_crlf_layer) { $layer = ':crlf'; } else { $layer = ':raw'; # NB. needed on Win32 to prevent CRLF -> CRCRLF! } eval { binmode(select(), $layer) or die "binmode failed: $!"; }; $@ and die_ "failed to binmode output using '$layer':\n", ⅇ ####print "DBG: output '$layer' => ", (join ', ', PerlIO::get_layers(select(), details => 1)), "\n"; if ($Opts{R} > 1) { my $header = <<"EOT"; ## # TIME : @{[ scalar localtime ]} # CWD : @{[ $cwd ]} # ARGV : @{[ join "\n# ", @ARGV ]} ## EOT $header =~ s/\n/$Newline/g; print $header; } # Ensure we don't leave console incorrectly colored if interrupted. # SIGQUIT (Ctrl-Pause on Win32) saves the files matched so far. $SIG{INT} = $SIG{QUIT} = $SIG{TERM} = sub { # XXX Win32 fix for unread STDIN contents leaking to shell when run via bat file. if ($Is_Win32 and !$STDIN_is_terminal) { while () {} } print $Col_Reset; chdir($cwd) && save_matches() if $_[0] eq 'QUIT'; exit(2); }; $SIG{__WARN__} = sub {}; # Ignore warnings from here on in. start:; search_files(\@Cmdline_files); if ($Opt_r_cmd) { require IPC::Open3; require IO::File; *R_CMD_ERR = IO::File->new_tmpfile; # Ensure that the R_CMD process does not become a zombie if an # interrupt occurs before it has finished and triggers peg to exit. # In particular, it is possible for an interrupt to occur after # open3() has created the R_CMD process but before it has returned. my $interrupt = 0; my $pid = do { local $SIG{INT} = sub { $interrupt = 1 }; eval { IPC::Open3::open3(undef, \*R_CMD_OUT, ">&R_CMD_ERR", $Opt_r_cmd); }; }; $@ and die_ "failed to run $Opt_r_cmd\n", ⅇ my $finished; eval "END { kill('KILL', $pid) unless \$finished }"; $SIG{INT}->('INT') if $interrupt; warn_ "created process $pid for $Opt_r_cmd" unless $Opt_ss; if ($Opt_r_fork) { fork_workers(); } else { while () { s/\015?[\012\015]\z//; # inline chomp_ $File::Find::name = $_; search(); } } seek(R_CMD_ERR, 0, 0) or die_ "seek failed: $!"; while () { warn_($_) unless $Opt_ss; } close R_CMD_ERR; close R_CMD_OUT; while (wait() != -1) {} $finished = 1; } if ($Opts{r} or $Opt_d) { require File::Find; local $SIG{__WARN__} = sub { my $err = $_[0]; $err =~ s/^(.*) at .* line \d+.*\z/$1/s; $err =~ s/^Can't opendir\((?:\.\/)?(.+)\): /can't opendir $1: /; $err = lcfirst $err if $err =~ /^C\w+\'t /; # If search() is in the call stack then File::Find::name is valid. for (my $i = 0; my @cs = caller($i); ++$i) { if ($cs[3] eq 'main::search') { return if $i == 2; # Ignore warnings from PERLEXPR $err = $File::Find::name . ": " . $err; $err =~ s|^\./||; last; } } warn_ $err; } unless $Opt_ss; my @dirs = (($Opts{r} ? '.' : ()), @Cmdline_dirs); eval { File::Find::find({ @FileFind_opts, 'wanted' => \&search }, @dirs); }; $@ and warn_ "File::Find::find failed: ", ⅇ chdir($cwd) or die_ "can't chdir back to $cwd: $!"; } if ($Opts{X}) { # Avoid interleaving the file list and output on the terminal. if ($STDOUT_is_terminal and $STDIN_is_terminal and !$Opts{R}) { warn_ "buffering up -X file list" unless $Opt_s; my @files; while () { chomp_ $_; next if $_ eq ''; push @files, $_; } search_files(\@files); } else { while () { chomp_ $_; next if $_ eq ''; $File::Find::name = $_; search(); } } } if ($Search_STDIN) { search_files(['-']); } # Finished searching specified files. if (--$Profile > 0) { goto start; } done:; if ($Code_at_end) { eval_ $Code_at_end; $@ and warn_ "-PPPP code gave an error:\n", ⅇ } if ($Opts{c} == 2) { print $Count, $Newline; } elsif ($Opts{Z} and !($Opts{Z} % 2)) { Z_display(); } print $Col_Reset; if ($Opts{R}) { if ($Opts{_}) { show_progress("*done*"); print STDOUT "\n"; open(STDERR, ">&OLDERR") or (print STDOUT "peg: can't restore STDERR: $!\n"), exit(2); print STDERR $STDERR_contents; } select STDOUT; close OUT or warn_ "failed to close -R file: $!"; } chdir($cwd) or die_ "can't chdir back to $cwd: $!"; save_matches(); if ($Opts{'%'}) { my $total_time = sprintf "%.2f", (0.1 + Time::HiRes::time() - $Start_time); my $rate = int($Total_bytes / $total_time); if ($rate >= 1048576) { $rate = int($rate / 1048576) . 'M' } elsif ($rate >= 1024) { $rate = int($rate / 1024) . 'K' } for ($Total_files, $Total_lines) { 1 while s/^(\d+)(\d{3})/$1,$2/; # commify } warn_ "took $total_time seconds: $Total_lines lines in $Total_files files \@ $rate bytes/sec"; } } # run sub fork_workers { warn_ "forking $Worker_count by $Worker_work" unless $Opt_ss; require PerlIO::scalar; pipe(my $from_workers, my $to_boss) or die_ "pipe failed: $!"; autoflush($to_boss); my %workers; for (1 .. $Worker_count) { pipe(my $from_boss, my $to_worker) and pipe(my $from_worker2, my $to_boss2) or die_ "pipe failed: $!"; autoflush($to_worker); autoflush($to_boss2); my $pid = fork; die_ "fork failed: $!" unless defined $pid; if ($pid) { $workers{$pid} = [$to_worker, $from_worker2]; close $from_boss; close $to_boss2; } else { close $from_workers; close $from_worker2; close $to_worker; close R_CMD_OUT; close R_CMD_ERR; $SIG{PIPE} = sub { exit }; send_msg($to_boss, $$); worker($from_boss, $to_boss, $to_boss2); exit; } } close $to_boss; my $INT_handler = $SIG{INT}; local @SIG{qw(INT PIPE)}; $SIG{INT} = $SIG{PIPE} = sub { foreach my $pid (keys %workers) { my ($to_worker, $from_worker2) = @{$workers{$pid}}; close $to_worker; close $from_worker2; } close $from_workers; while (wait() != -1) {} $INT_handler->('INT'); }; my (@files, $r_cmd_done); my $get_files = sub { return if $r_cmd_done; for (1 .. $Worker_work) { my $file = ; defined $file or $r_cmd_done = 1, return; $file =~ s/\015?[\012\015]\z//; # inline chomp_ push @files, $file; } }; $get_files->(); while (my $pid = receive_msg($from_workers)) { my $msg_waiting = ($pid =~ s/^!//); my ($to_worker, $from_worker2) = @{$workers{$pid}}; my $msg = $msg_waiting ? receive_msg($from_worker2) : ''; if (@files) { send_msg($to_worker, join "\000", @files); @files = (); } else { close $to_worker; close $from_worker2; delete $workers{$pid}; } if (length $msg) { $msg =~ /^(.*?)\001/sg; print STDERR $1 if length $1; $msg =~ /(.*?)\001/sg; push @Matched_files, split /\000/, $1; $msg =~ /(.*)/sg; print $1 if length $1; } $get_files->(); } } # fork_workers sub worker { my ($from_boss, $to_boss, $to_boss2) = @_; open(my $outfh, ">", \my $out) or die_ "can't open: $!"; select $outfh; close STDERR; open(STDERR, ">", \my $err) or die_ "can't redirect STDERR: $!"; while (my $files = receive_msg($from_boss)) { seek(STDERR, 0, 0) and seek($outfh, 0, 0) or (print STDOUT "peg: can't seek: $!\n"), exit; $err = $out = ''; @Matched_files = (); foreach my $file (split /\000/, $files) { $File::Find::name = $_ = $file; search(); } if (length $out or length $err or @Matched_files) { my $msg = $err . "\001" . join("\000", @Matched_files) . "\001" . $out; send_msg($to_boss, "!$$"); send_msg($to_boss2, $msg); } else { send_msg($to_boss, $$); } } } # worker sub receive_msg { my $fh = shift; local $/ = $Msg_rs; my $msg = <$fh>; chomp $msg if defined $msg; return $msg; } # receive_msg sub send_msg { my ($fh, $msg) = @_; print $fh $msg, $Msg_rs; } # send_msg # Avoid "used only once" warnings. 1 or ($Data::Dumper::Indent, $Data::Dumper::Sortkeys, $Encode::Guess::NoUTFAutoGuess, $FindBin::RealBin, $::OLDERR); __END__ =head1 NAME peg - Perl expression grep =head1 SYNOPSIS peg [OPTION]... PERLEXPR [FILE]... =head1 DESCRIPTION B is a file search tool similar to the UNIX program B. It uses a Perl expression to match lines from a list of input files. Internally, B works in a manner similar to the following pseudo-Perl: foreach $File ( FILEs ) { if (open F, "<", $File) { while () { if ( PERLEXPR ) { print; } } } } Thus, each input line is available as the Perl variable C<$_>, and this will be printed if PERLEXPR is true. In particular, to match lines according to a Perl regular expression pattern, it is necessary to place it within the pattern matching operator, which defaults to searching C<$_>. For example, C. To simplify specifying the PERLEXPR in the common case of searching for plain text, the following rules are used to determine how it is treated if none of B<-E>, B<-F> or B<-G> are specified: =over 4 =item 1. If the PERLEXPR is a I string (which is defined as matching C), then the PERLEXPR is taken as a literal string to search for ie. B<-F> is assumed. =item 2. If the PERLEXPR is not simple, and any of B<-i>, B<-w> or B<-x> are specified, then the PERLEXPR is taken as a regular expression pattern to match against ie. B<-G> is assumed. =item 3. Otherwise, the PERLEXPR is assumed to be a Perl expression ie. B<-E> is assumed. =back The above rules allow for C to be run as the more natural C. To assist in quoting battles against less enlightened shells, B provides the following variables: ($DQ, $SQ, $BT, $EM, $GT, $LT, $PC) = qw( " ' ` ! > < % ) B will C any Perl variables beginning with a lowercase letter prior to searching each file. If no files are specified then if STDIN is attached to the terminal then B<-r> is assumed else B reads from standard input. =head1 OPTIONS The options include equivalents to all of standard B, and most of the B extensions. Note that some are subtly different. They can be grouped I in the argument list (except after B<-->, or after options that take an argument). For example, C, C and C are all equivalent. Some options are overloaded to have different behaviour if they are specified more than once. Options can also be set via the environment variable B. If less than two files are specified, then B<-h> is assumed. =head2 Selection and interpretation of PERLEXPR =over 4 =item B<-E> Assume PERLEXPR is a Perl expression. =item B<-F> Assume PERLEXPR is a fixed literal string. Thus, C is equivalent to C. =item B<-G> Assume PERLEXPR is a Perl regular expression I. Thus, C is equivalent to C. =item B<-e> I =over 4 =item B<-e PERLEXPR> Specify a PERLEXPR to match. If used more than once, then it is equivalent to using B<-o>. For example, C, C, and C are all equivalent. =item B<-ee FILE> Specify a file to search. For example, C, will search for the string B<-text> in the file B<-filename>. =back =item B<-f> I =over 4 =item B<-f FILE> FILE is a file containing further PERLEXPRs. Lines will be adjudged to match if they match any of the PERLEXPRs. =item B<-ff FILE> FILE is a file containing files to search. =item B<-fff FILE> FILE is a file whose lines are added as keys to the Perl associative array variable C<%F>. =back =item B<-i> I =over 4 =item B<-i> Ignore case distinctions. Enables B<-G>. =item B<-ii> I matching. Ignore case distinctions if PERLEXPR is entirely lowercase. =back =item B<-o> I =over 4 =item B<-o> Non option arguments following the B<-o> option up until B<--> are interpreted as further PERLEXPRs. Lines will be adjudged to match if they match any of the PERLEXPRs. For example, C is equivalent to C. =item B<-oo> Similar to B<-o>, but only prints the results from files that contain a match to all the PERLEXPRs. =back =item B<-O> Similar to B<-ol>, except each PERLEXPR must match at least once. =item B<-k> Similar to B<-O>, except each PERLEXPR must not match anywhere within the file. It can be thought of as being to B<-O> what B<-L> is to B<-l>. For example, C will print the names of the files that contain all of aa, bb and cc, but none of AA, BB or CC. =item B<-v> Negates the sense of PERLEXPR. =item B<-w> Force PERLEXPR to match only whole I. Enables B<-G>. =item B<-x> Force PERLEXPR to match only whole I. Enables B<-G>. =back =head2 File selection =over 4 =item B<-d> I =over 4 =item B<-d> Any directories in the file list will be searched recursively for files to process. =item B<-dd> The same as B<-d> except it only applies if all the files specified are directories. =back =item B<-I> I =over 4 =item B<-I> Do not process binary files. =item B<-II> Process I binary files. =back =item B<-K> Attempt to I detect each file's encoding. It does this by looking at the first few bytes of the file for: =over 4 =item * A byte order mark (BOM) at the start of the file. Supports utf16le, utf16be, utf32le, utf32be and utf8. =item * No byte order mark, but first few bytes look like the file is ASCII encoded in UTF-16 or UTF-32. =back If this does not succeed, then an encoding can still be determined if the environment variable B is set. This should be a whitespace delimited list of encodings, for example C. These are then tried by B to see if the encoding can be determined. =item B<-M> I This takes a B