| #!/usr/bin/env perl |
| |
| # Detect bare 'grep' used as a test assertion where 'test_grep' |
| # should be used, and '! test_grep' where 'test_grep !' should |
| # be used. |
| # |
| # The shared shell parser tokenizes test bodies so that 'grep' |
| # inside heredocs, command substitutions like $(grep ...), and |
| # quoted strings is collapsed into a single token and never seen |
| # by our check. A line-oriented approach would need to track |
| # heredoc delimiters, nested $() depth, and cross-line pipe |
| # state to avoid false positives on patterns like: |
| # |
| # write_script foo.sh <<-\EOF |
| # grep pattern file # data, not an assertion |
| # EOF |
| # |
| # The Lexer already handles these. |
| |
| use warnings; |
| use strict; |
| use File::Basename; |
| do(dirname($0) . "/lib-shell-parser.pl") |
| or die "$0: failed to load lib-shell-parser.pl: $@$!\n"; |
| |
| my $exit_code = 0; |
| |
| # GrepLintParser inherits ScriptParser's ability to find |
| # test_expect_success/failure blocks and call check_test() |
| # on each body. We override check_test() to walk the token |
| # stream looking for bare grep assertions. |
| package GrepLintParser; |
| |
| our @ISA = ('ScriptParser'); |
| |
| # After these tokens, the next token is a command word. |
| # For example, in 'echo foo && grep bar file', the 'grep' |
| # after '&&' is at command position and should be flagged. |
| my %cmd_start = map { $_ => 1 } qw(&& || ; ;; do then else elif), "\n", '{', '('; |
| |
| # Tokens indicating grep's output is piped or redirected. |
| my %filter_op = map { $_ => 1 } qw(| > >> <); |
| |
| # A token is at "command word" position if the shell would |
| # interpret it as a program name rather than an argument. |
| # Only 'grep' at command position is an assertion we should |
| # flag; 'grep' as an argument ('test_must_fail grep') or |
| # value ('for cmd in grep sed') is not. |
| sub is_command_word { |
| my ($tokens, $pos) = @_; |
| return 1 if $pos == 0; |
| for (my $j = $pos - 1; $j >= 0; $j--) { |
| my $t = $tokens->[$j]->[0]; |
| # After a separator or pipe, a new command starts. |
| return 1 if $cmd_start{$t} || $t eq '|'; |
| # After '}' or ')', what follows is a separator or |
| # redirect on the compound command, not a new command. |
| return 0 if $t eq '}' || $t eq ')'; |
| # '!' is a prefix that does not consume command |
| # position; keep scanning to find what precedes it. |
| next if $t eq '!'; |
| # Any other word means we are past the command word. |
| return 0; |
| } |
| return 1; |
| } |
| |
| # Some bare greps are intentional (e.g. file may not exist, |
| # data filter). A '# lint-ok' annotation on the source line |
| # suppresses the warning. |
| sub lint_ok { |
| my ($raw_lines, $ln) = @_; |
| if ($ln < 1 || $ln > @$raw_lines) { |
| warn "lint_ok: line number $ln out of range (1.." . |
| scalar(@$raw_lines) . ")\n"; |
| return 0; |
| } |
| return $raw_lines->[$ln - 1] =~ /lint-ok/; |
| } |
| |
| # Grep is a filter (not an assertion) if it receives piped |
| # input or sends its output to a pipe or redirect. Check |
| # both directions from grep's position in the token stream. |
| sub is_filter { |
| my ($tokens, $pos) = @_; |
| # Backward: is grep receiving piped input? |
| # Newlines don't break pipes ('cmd |\n grep' is one |
| # pipeline), so skip past them. |
| for (my $j = $pos - 1; $j >= 0; $j--) { |
| my $t = $tokens->[$j]->[0]; |
| return 1 if $t eq '|'; |
| next if $t eq "\n"; |
| last if $cmd_start{$t} || $t eq '}' || $t eq ')'; |
| } |
| # Forward: is grep piping or redirecting output? |
| # Unlike the backward scan, we do not skip newlines here: |
| # a bare newline is a command boundary, and redirects or |
| # pipes must appear on the same line as grep (or after a |
| # line continuation, which the Lexer consumes). |
| for (my $j = $pos + 1; $j < @$tokens; $j++) { |
| my $t = $tokens->[$j]->[0]; |
| return 0 if $cmd_start{$t}; |
| return 1 if $filter_op{$t}; |
| } |
| return 0; |
| } |
| |
| # Map a body-relative line number to a file line number. |
| # For double-quoted bodies, backslash-continuation lines |
| # (\<newline>) are consumed by the Lexer without appearing |
| # in the body text, so the inner parser sees fewer lines |
| # than the source file has. We walk the source lines to |
| # count continuations and adjust accordingly. |
| sub body_to_file_line { |
| my ($body_lineno, $body_token, $raw_lines, $body_start) = @_; |
| my $body_text = $body_token->[0]; |
| my $body_end_line = $body_token->[4]; |
| unless ($body_start && $body_start >= 1) { |
| warn "body_start is not a positive integer\n"; |
| return $body_lineno; |
| } |
| my $file_lineno = $body_lineno + $body_start - 1; |
| # Only double-quoted bodies have line splices. |
| return $file_lineno unless $body_text =~ /^"/; |
| my $adj = 0; |
| my $lines_seen = 0; |
| unless ($body_end_line && $body_end_line >= $body_start) { |
| warn "body_end_line is not set for double-quoted body\n"; |
| return $file_lineno; |
| } |
| my $end = $body_end_line; |
| if ($end > @$raw_lines) { |
| warn "body_end_line ($end) exceeds file length (" . |
| scalar(@$raw_lines) . ")\n"; |
| return $file_lineno; |
| } |
| my $src_ln = $body_start; |
| while ($src_ln <= $end && $lines_seen < $body_lineno) { |
| my $line = $raw_lines->[$src_ln - 1]; |
| # Odd trailing backslashes = continuation (\<nl>). |
| # Even = escaped backslashes (\\), not a continuation. |
| if ($line =~ /(\\*)$/ && length($1) % 2 == 1) { |
| $adj++; |
| } else { |
| $lines_seen++; |
| } |
| $src_ln++; |
| } |
| if ($lines_seen < $body_lineno) { |
| warn "body_lineno ($body_lineno) not found within body range " . |
| "($body_start..$end)\n"; |
| } |
| return $file_lineno + $adj; |
| } |
| |
| # ScriptParser calls this for each test body found in the script. |
| sub check_test { |
| my $self = shift @_; |
| my $title = ScriptParser::unwrap(shift @_); |
| my $body_token = shift @_; |
| my $body_start = $body_token->[3]; |
| my $body = ScriptParser::unwrap($body_token); |
| # Handle heredoc-style test bodies: |
| # test_expect_success 'title' - <<\EOF |
| # grep pattern file |
| # EOF |
| # The '-' signals that the body follows as a heredoc. |
| if ($body eq '-') { |
| my $herebody = shift @_; |
| if ($herebody) { |
| $body = $herebody->{content}; |
| $body_start = $herebody->{start_line}; |
| } |
| } |
| return unless $body; |
| |
| my $raw_lines = $self->{raw_lines}; |
| |
| # The outer parser gives us the body as an opaque string. |
| # Parse it to get individual tokens with command boundaries. |
| my $parser = ShellParser->new(\$body); |
| my @tokens = $parser->parse(); |
| |
| my $file = $self->{file}; |
| |
| for (my $i = 0; $i < @tokens; $i++) { |
| my $text = $tokens[$i]->[0]; |
| next unless is_command_word(\@tokens, $i); |
| |
| my $token_lineno = $tokens[$i]->[3]; |
| unless (defined($token_lineno) && $token_lineno >= 1) { |
| warn "token has no line number\n"; |
| next; |
| } |
| my $file_lineno = body_to_file_line( |
| $token_lineno, |
| $body_token, $raw_lines, $body_start); |
| |
| # '!' negates the exit code without consuming command |
| # position. '! test_grep' is an anti-pattern because |
| # test_grep only prints diagnostics on grep failure, |
| # and '!' inverts after that decision is already made. |
| if ($text eq '!') { |
| if ($i + 1 < @tokens && |
| $tokens[$i + 1]->[0] eq 'test_grep' && |
| !lint_ok($raw_lines, $file_lineno)) { |
| print "$file:$file_lineno: error: ", |
| 'use "test_grep !" instead of ', |
| '"! test_grep"', "\n"; |
| $exit_code = 1; |
| } |
| next; |
| } |
| |
| # Bare grep as a command (not a filter) is a test |
| # assertion that should use test_grep for better |
| # failure diagnostics. |
| if ($text eq 'grep' && |
| !is_filter(\@tokens, $i) && |
| !lint_ok($raw_lines, $file_lineno)) { |
| print "$file:$file_lineno: error: ", |
| "bare grep outside pipeline ", |
| "(use test_grep)\n"; |
| $exit_code = 1; |
| } |
| } |
| } |
| |
| package main; |
| |
| for my $file (@ARGV) { |
| open(my $fh, '<:unix:crlf', $file) or die "$0: $file: $!\n"; |
| my @raw_lines = <$fh>; |
| close $fh; |
| my $s = join('', @raw_lines); |
| my $parser = GrepLintParser->new(\$s); |
| $parser->{file} = $file; |
| $parser->{raw_lines} = \@raw_lines; |
| $parser->parse(); |
| } |
| exit $exit_code; |