t/greplint.pl - git - Git at Google

 #!/usr/bin/env perl

 # Detect bare 'grep' used as a test assertion where 'test_grep'
 # should be used, and '! test_grep' where 'test_grep !' should
 # be used.
 #
 # The shared shell parser tokenizes test bodies so that 'grep'
 # inside heredocs, command substitutions like $(grep ...), and
 # quoted strings is collapsed into a single token and never seen
 # by our check.  A line-oriented approach would need to track
 # heredoc delimiters, nested $() depth, and cross-line pipe
 # state to avoid false positives on patterns like:
 #
 #   write_script foo.sh <<-\EOF
 #   grep pattern file    # data, not an assertion
 #   EOF
 #
 # The Lexer already handles these.

 use warnings;
 use strict;
 use File::Basename;
 do(dirname($0) . "/lib-shell-parser.pl")
 	or die "$0: failed to load lib-shell-parser.pl: $@$!\n";

 my $exit_code = 0;

 # GrepLintParser inherits ScriptParser's ability to find
 # test_expect_success/failure blocks and call check_test()
 # on each body.  We override check_test() to walk the token
 # stream looking for bare grep assertions.
 package GrepLintParser;

 our @ISA = ('ScriptParser');

 # After these tokens, the next token is a command word.
 # For example, in 'echo foo && grep bar file', the 'grep'
 # after '&&' is at command position and should be flagged.
 my %cmd_start = map { $_ => 1 } qw(&& || ; ;; do then else elif), "\n", '{', '(';

 # Tokens indicating grep's output is piped or redirected.
 my %filter_op = map { $_ => 1 } qw(| > >> <);

 # A token is at "command word" position if the shell would
 # interpret it as a program name rather than an argument.
 # Only 'grep' at command position is an assertion we should
 # flag; 'grep' as an argument ('test_must_fail grep') or
 # value ('for cmd in grep sed') is not.
 sub is_command_word {
 	my ($tokens, $pos) = @_;
 	return 1 if $pos == 0;
 	for (my $j = $pos - 1; $j >= 0; $j--) {
 		my $t = $tokens->[$j]->[0];
 		# After a separator or pipe, a new command starts.
 		return 1 if $cmd_start{$t} || $t eq '|';
 		# After '}' or ')', what follows is a separator or
 		# redirect on the compound command, not a new command.
 		return 0 if $t eq '}' || $t eq ')';
 		# '!' is a prefix that does not consume command
 		# position; keep scanning to find what precedes it.
 		next if $t eq '!';
 		# Any other word means we are past the command word.
 		return 0;
 	}
 	return 1;
 }

 # Some bare greps are intentional (e.g. file may not exist,
 # data filter).  A '# lint-ok' annotation on the source line
 # suppresses the warning.
 sub lint_ok {
 	my ($raw_lines, $ln) = @_;
 	if ($ln < 1 || $ln > @$raw_lines) {
 		warn "lint_ok: line number $ln out of range (1.." .
 		    scalar(@$raw_lines) . ")\n";
 		return 0;
 	}
 	return $raw_lines->[$ln - 1] =~ /lint-ok/;
 }

 # Grep is a filter (not an assertion) if it receives piped
 # input or sends its output to a pipe or redirect.  Check
 # both directions from grep's position in the token stream.
 sub is_filter {
 	my ($tokens, $pos) = @_;
 	# Backward: is grep receiving piped input?
 	# Newlines don't break pipes ('cmd |\n grep' is one
 	# pipeline), so skip past them.
 	for (my $j = $pos - 1; $j >= 0; $j--) {
 		my $t = $tokens->[$j]->[0];
 		return 1 if $t eq '|';
 		next if $t eq "\n";
 		last if $cmd_start{$t} || $t eq '}' || $t eq ')';
 	}
 	# Forward: is grep piping or redirecting output?
 	# Unlike the backward scan, we do not skip newlines here:
 	# a bare newline is a command boundary, and redirects or
 	# pipes must appear on the same line as grep (or after a
 	# line continuation, which the Lexer consumes).
 	for (my $j = $pos + 1; $j < @$tokens; $j++) {
 		my $t = $tokens->[$j]->[0];
 		return 0 if $cmd_start{$t};
 		return 1 if $filter_op{$t};
 	}
 	return 0;
 }

 # Map a body-relative line number to a file line number.
 # For double-quoted bodies, backslash-continuation lines
 # (\<newline>) are consumed by the Lexer without appearing
 # in the body text, so the inner parser sees fewer lines
 # than the source file has.  We walk the source lines to
 # count continuations and adjust accordingly.
 sub body_to_file_line {
 	my ($body_lineno, $body_token, $raw_lines, $body_start) = @_;
 	my $body_text = $body_token->[0];
 	my $body_end_line = $body_token->[4];
 	unless ($body_start && $body_start >= 1) {
 		warn "body_start is not a positive integer\n";
 		return $body_lineno;
 	}
 	my $file_lineno = $body_lineno + $body_start - 1;
 	# Only double-quoted bodies have line splices.
 	return $file_lineno unless $body_text =~ /^"/;
 	my $adj = 0;
 	my $lines_seen = 0;
 	unless ($body_end_line && $body_end_line >= $body_start) {
 		warn "body_end_line is not set for double-quoted body\n";
 		return $file_lineno;
 	}
 	my $end = $body_end_line;
 	if ($end > @$raw_lines) {
 		warn "body_end_line ($end) exceeds file length (" .
 		    scalar(@$raw_lines) . ")\n";
 		return $file_lineno;
 	}
 	my $src_ln = $body_start;
 	while ($src_ln <= $end && $lines_seen < $body_lineno) {
 		my $line = $raw_lines->[$src_ln - 1];
 		# Odd trailing backslashes = continuation (\<nl>).
 		# Even = escaped backslashes (\\), not a continuation.
 		if ($line =~ /(\\*)$/ && length($1) % 2 == 1) {
 			$adj++;
 		} else {
 			$lines_seen++;
 		}
 		$src_ln++;
 	}
 	if ($lines_seen < $body_lineno) {
 		warn "body_lineno ($body_lineno) not found within body range " .
 		    "($body_start..$end)\n";
 	}
 	return $file_lineno + $adj;
 }

 # ScriptParser calls this for each test body found in the script.
 sub check_test {
 	my $self = shift @_;
 	my $title = ScriptParser::unwrap(shift @_);
 	my $body_token = shift @_;
 	my $body_start = $body_token->[3];
 	my $body = ScriptParser::unwrap($body_token);
 	# Handle heredoc-style test bodies:
 	#   test_expect_success 'title' - <<\EOF
 	#   grep pattern file
 	#   EOF
 	# The '-' signals that the body follows as a heredoc.
 	if ($body eq '-') {
 		my $herebody = shift @_;
 		if ($herebody) {
 			$body = $herebody->{content};
 			$body_start = $herebody->{start_line};
 		}
 	}
 	return unless $body;

 	my $raw_lines = $self->{raw_lines};

 	# The outer parser gives us the body as an opaque string.
 	# Parse it to get individual tokens with command boundaries.
 	my $parser = ShellParser->new(\$body);
 	my @tokens = $parser->parse();

 	my $file = $self->{file};

 	for (my $i = 0; $i < @tokens; $i++) {
 		my $text = $tokens[$i]->[0];
 		next unless is_command_word(\@tokens, $i);

 		my $token_lineno = $tokens[$i]->[3];
 		unless (defined($token_lineno) && $token_lineno >= 1) {
 			warn "token has no line number\n";
 			next;
 		}
 		my $file_lineno = body_to_file_line(
 			$token_lineno,
 			$body_token, $raw_lines, $body_start);

 		# '!' negates the exit code without consuming command
 		# position.  '! test_grep' is an anti-pattern because
 		# test_grep only prints diagnostics on grep failure,
 		# and '!' inverts after that decision is already made.
 		if ($text eq '!') {
 			if ($i + 1 < @tokens &&
 			    $tokens[$i + 1]->[0] eq 'test_grep' &&
 			    !lint_ok($raw_lines, $file_lineno)) {
 				print "$file:$file_lineno: error: ",
 				    'use "test_grep !" instead of ',
 				    '"! test_grep"', "\n";
 				$exit_code = 1;
 			}
 			next;
 		}

 		# Bare grep as a command (not a filter) is a test
 		# assertion that should use test_grep for better
 		# failure diagnostics.
 		if ($text eq 'grep' &&
 		    !is_filter(\@tokens, $i) &&
 		    !lint_ok($raw_lines, $file_lineno)) {
 			print "$file:$file_lineno: error: ",
 			    "bare grep outside pipeline ",
 			    "(use test_grep)\n";
 			$exit_code = 1;
 		}
 	}
 }

 package main;

 for my $file (@ARGV) {
 	open(my $fh, '<:unix:crlf', $file) or die "$0: $file: $!\n";
 	my @raw_lines = <$fh>;
 	close $fh;
 	my $s = join('', @raw_lines);
 	my $parser = GrepLintParser->new(\$s);
 	$parser->{file} = $file;
 	$parser->{raw_lines} = \@raw_lines;
 	$parser->parse();
 }
 exit $exit_code;
	#!/usr/bin/env perl

	# Detect bare 'grep' used as a test assertion where 'test_grep'
	# should be used, and '! test_grep' where 'test_grep !' should
	# be used.
	#
	# The shared shell parser tokenizes test bodies so that 'grep'
	# inside heredocs, command substitutions like $(grep ...), and
	# quoted strings is collapsed into a single token and never seen
	# by our check. A line-oriented approach would need to track
	# heredoc delimiters, nested $() depth, and cross-line pipe
	# state to avoid false positives on patterns like:
	#
	# write_script foo.sh <<-\EOF
	# grep pattern file # data, not an assertion
	# EOF
	#
	# The Lexer already handles these.

	use warnings;
	use strict;
	use File::Basename;
	do(dirname($0) . "/lib-shell-parser.pl")
	or die "$0: failed to load lib-shell-parser.pl: $@$!\n";

	my $exit_code = 0;

	# GrepLintParser inherits ScriptParser's ability to find
	# test_expect_success/failure blocks and call check_test()
	# on each body. We override check_test() to walk the token
	# stream looking for bare grep assertions.
	package GrepLintParser;

	our @ISA = ('ScriptParser');

	# After these tokens, the next token is a command word.
	# For example, in 'echo foo && grep bar file', the 'grep'
	# after '&&' is at command position and should be flagged.
	my %cmd_start = map { $_ => 1 } qw(&& \|\| ; ;; do then else elif), "\n", '{', '(';

	# Tokens indicating grep's output is piped or redirected.
	my %filter_op = map { $_ => 1 } qw(\| > >> <);

	# A token is at "command word" position if the shell would
	# interpret it as a program name rather than an argument.
	# Only 'grep' at command position is an assertion we should
	# flag; 'grep' as an argument ('test_must_fail grep') or
	# value ('for cmd in grep sed') is not.
	sub is_command_word {
	my ($tokens, $pos) = @_;
	return 1 if $pos == 0;
	for (my $j = $pos - 1; $j >= 0; $j--) {
	my $t = $tokens->[$j]->[0];
	# After a separator or pipe, a new command starts.
	return 1 if $cmd_start{$t} \|\| $t eq '\|';
	# After '}' or ')', what follows is a separator or
	# redirect on the compound command, not a new command.
	return 0 if $t eq '}' \|\| $t eq ')';
	# '!' is a prefix that does not consume command
	# position; keep scanning to find what precedes it.
	next if $t eq '!';
	# Any other word means we are past the command word.
	return 0;
	}
	return 1;
	}

	# Some bare greps are intentional (e.g. file may not exist,
	# data filter). A '# lint-ok' annotation on the source line
	# suppresses the warning.
	sub lint_ok {
	my ($raw_lines, $ln) = @_;
	if ($ln < 1 \|\| $ln > @$raw_lines) {
	warn "lint_ok: line number $ln out of range (1.." .
	scalar(@$raw_lines) . ")\n";
	return 0;
	}
	return $raw_lines->[$ln - 1] =~ /lint-ok/;
	}

	# Grep is a filter (not an assertion) if it receives piped
	# input or sends its output to a pipe or redirect. Check
	# both directions from grep's position in the token stream.
	sub is_filter {
	my ($tokens, $pos) = @_;
	# Backward: is grep receiving piped input?
	# Newlines don't break pipes ('cmd \|\n grep' is one
	# pipeline), so skip past them.
	for (my $j = $pos - 1; $j >= 0; $j--) {
	my $t = $tokens->[$j]->[0];
	return 1 if $t eq '\|';
	next if $t eq "\n";
	last if $cmd_start{$t} \|\| $t eq '}' \|\| $t eq ')';
	}
	# Forward: is grep piping or redirecting output?
	# Unlike the backward scan, we do not skip newlines here:
	# a bare newline is a command boundary, and redirects or
	# pipes must appear on the same line as grep (or after a
	# line continuation, which the Lexer consumes).
	for (my $j = $pos + 1; $j < @$tokens; $j++) {
	my $t = $tokens->[$j]->[0];
	return 0 if $cmd_start{$t};
	return 1 if $filter_op{$t};
	}
	return 0;
	}

	# Map a body-relative line number to a file line number.
	# For double-quoted bodies, backslash-continuation lines
	# (\<newline>) are consumed by the Lexer without appearing
	# in the body text, so the inner parser sees fewer lines
	# than the source file has. We walk the source lines to
	# count continuations and adjust accordingly.
	sub body_to_file_line {
	my ($body_lineno, $body_token, $raw_lines, $body_start) = @_;
	my $body_text = $body_token->[0];
	my $body_end_line = $body_token->[4];
	unless ($body_start && $body_start >= 1) {
	warn "body_start is not a positive integer\n";
	return $body_lineno;
	}
	my $file_lineno = $body_lineno + $body_start - 1;
	# Only double-quoted bodies have line splices.
	return $file_lineno unless $body_text =~ /^"/;
	my $adj = 0;
	my $lines_seen = 0;
	unless ($body_end_line && $body_end_line >= $body_start) {
	warn "body_end_line is not set for double-quoted body\n";
	return $file_lineno;
	}
	my $end = $body_end_line;
	if ($end > @$raw_lines) {
	warn "body_end_line ($end) exceeds file length (" .
	scalar(@$raw_lines) . ")\n";
	return $file_lineno;
	}
	my $src_ln = $body_start;
	while ($src_ln <= $end && $lines_seen < $body_lineno) {
	my $line = $raw_lines->[$src_ln - 1];
	# Odd trailing backslashes = continuation (\<nl>).
	# Even = escaped backslashes (\\), not a continuation.
	if ($line =~ /(\\*)$/ && length($1) % 2 == 1) {
	$adj++;
	} else {
	$lines_seen++;
	}
	$src_ln++;
	}
	if ($lines_seen < $body_lineno) {
	warn "body_lineno ($body_lineno) not found within body range " .
	"($body_start..$end)\n";
	}
	return $file_lineno + $adj;
	}

	# ScriptParser calls this for each test body found in the script.
	sub check_test {
	my $self = shift @_;
	my $title = ScriptParser::unwrap(shift @_);
	my $body_token = shift @_;
	my $body_start = $body_token->[3];
	my $body = ScriptParser::unwrap($body_token);
	# Handle heredoc-style test bodies:
	# test_expect_success 'title' - <<\EOF
	# grep pattern file
	# EOF
	# The '-' signals that the body follows as a heredoc.
	if ($body eq '-') {
	my $herebody = shift @_;
	if ($herebody) {
	$body = $herebody->{content};
	$body_start = $herebody->{start_line};
	}
	}
	return unless $body;

	my $raw_lines = $self->{raw_lines};

	# The outer parser gives us the body as an opaque string.
	# Parse it to get individual tokens with command boundaries.
	my $parser = ShellParser->new(\$body);
	my @tokens = $parser->parse();

	my $file = $self->{file};

	for (my $i = 0; $i < @tokens; $i++) {
	my $text = $tokens[$i]->[0];
	next unless is_command_word(\@tokens, $i);

	my $token_lineno = $tokens[$i]->[3];
	unless (defined($token_lineno) && $token_lineno >= 1) {
	warn "token has no line number\n";
	next;
	}
	my $file_lineno = body_to_file_line(
	$token_lineno,
	$body_token, $raw_lines, $body_start);

	# '!' negates the exit code without consuming command
	# position. '! test_grep' is an anti-pattern because
	# test_grep only prints diagnostics on grep failure,
	# and '!' inverts after that decision is already made.
	if ($text eq '!') {
	if ($i + 1 < @tokens &&
	$tokens[$i + 1]->[0] eq 'test_grep' &&
	!lint_ok($raw_lines, $file_lineno)) {
	print "$file:$file_lineno: error: ",
	'use "test_grep !" instead of ',
	'"! test_grep"', "\n";
	$exit_code = 1;
	}
	next;
	}

	# Bare grep as a command (not a filter) is a test
	# assertion that should use test_grep for better
	# failure diagnostics.
	if ($text eq 'grep' &&
	!is_filter(\@tokens, $i) &&
	!lint_ok($raw_lines, $file_lineno)) {
	print "$file:$file_lineno: error: ",
	"bare grep outside pipeline ",
	"(use test_grep)\n";
	$exit_code = 1;
	}
	}
	}

	package main;

	for my $file (@ARGV) {
	open(my $fh, '<:unix:crlf', $file) or die "$0: $file: $!\n";
	my @raw_lines = <$fh>;
	close $fh;
	my $s = join('', @raw_lines);
	my $parser = GrepLintParser->new(\$s);
	$parser->{file} = $file;
	$parser->{raw_lines} = \@raw_lines;
	$parser->parse();
	}
	exit $exit_code;