|  | #!/usr/bin/env perl | 
|  | # | 
|  | # Copyright (c) 2021-2022 Eric Sunshine <sunshine@sunshineco.com> | 
|  | # | 
|  | # This tool scans shell scripts for test definitions and checks those tests for | 
|  | # problems, such as broken &&-chains, which might hide bugs in the tests | 
|  | # themselves or in behaviors being exercised by the tests. | 
|  | # | 
|  | # Input arguments are pathnames of shell scripts containing test definitions, | 
|  | # or globs referencing a collection of scripts. For each problem discovered, | 
|  | # the pathname of the script containing the test is printed along with the test | 
|  | # name and the test body with a `?!FOO?!` annotation at the location of each | 
|  | # detected problem, where "FOO" is a tag such as "AMP" which indicates a broken | 
|  | # &&-chain. Returns zero if no problems are discovered, otherwise non-zero. | 
|  |  | 
|  | use warnings; | 
|  | use strict; | 
|  | use Config; | 
|  | use File::Glob; | 
|  | use Getopt::Long; | 
|  |  | 
|  | my $jobs = -1; | 
|  | my $show_stats; | 
|  | my $emit_all; | 
|  |  | 
|  | # Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3 | 
|  | # "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although | 
|  | # similar to lexical analyzers for other languages, this one differs in a few | 
|  | # substantial ways due to quirks of the shell command language. | 
|  | # | 
|  | # For instance, in many languages, newline is just whitespace like space or | 
|  | # TAB, but in shell a newline is a command separator, thus a distinct lexical | 
|  | # token. A newline is significant and returned as a distinct token even at the | 
|  | # end of a shell comment. | 
|  | # | 
|  | # In other languages, `1+2` would typically be scanned as three tokens | 
|  | # (`1`, `+`, and `2`), but in shell it is a single token. However, the similar | 
|  | # `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well. | 
|  | # In shell, several characters with special meaning lose that meaning when not | 
|  | # surrounded by whitespace. For instance, the negation operator `!` is special | 
|  | # when standing alone surrounded by whitespace; whereas in `foo!uucp` it is | 
|  | # just a plain character in the longer token "foo!uucp". In many other | 
|  | # languages, `"string"/foo:'string'` might be scanned as five tokens ("string", | 
|  | # `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token. | 
|  | # | 
|  | # The lexical analyzer for the shell command language is also somewhat unusual | 
|  | # in that it recursively invokes the parser to handle the body of `$(...)` | 
|  | # expressions which can contain arbitrary shell code. Such expressions may be | 
|  | # encountered both inside and outside of double-quoted strings. | 
|  | # | 
|  | # The lexical analyzer is responsible for consuming shell here-doc bodies which | 
|  | # extend from the line following a `<<TAG` operator until a line consisting | 
|  | # solely of `TAG`. Here-doc consumption begins when a newline is encountered. | 
|  | # It is legal for multiple here-doc `<<TAG` operators to be present on a single | 
|  | # line, in which case their bodies must be present one following the next, and | 
|  | # are consumed in the (left-to-right) order the `<<TAG` operators appear on the | 
|  | # line. A special complication is that the bodies of all here-docs must be | 
|  | # consumed when the newline is encountered even if the parse context depth has | 
|  | # changed. For instance, in `cat <<A && x=$(cat <<B &&\n`, bodies of here-docs | 
|  | # "A" and "B" must be consumed even though "A" was introduced outside the | 
|  | # recursive parse context in which "B" was introduced and in which the newline | 
|  | # is encountered. | 
|  | package Lexer; | 
|  |  | 
|  | sub new { | 
|  | my ($class, $parser, $s) = @_; | 
|  | bless { | 
|  | parser => $parser, | 
|  | buff => $s, | 
|  | lineno => 1, | 
|  | heretags => [] | 
|  | } => $class; | 
|  | } | 
|  |  | 
|  | sub scan_heredoc_tag { | 
|  | my $self = shift @_; | 
|  | ${$self->{buff}} =~ /\G(-?)/gc; | 
|  | my $indented = $1; | 
|  | my $token = $self->scan_token(); | 
|  | return "<<$indented" unless $token; | 
|  | my $tag = $token->[0]; | 
|  | $tag =~ s/['"\\]//g; | 
|  | push(@{$self->{heretags}}, $indented ? "\t$tag" : "$tag"); | 
|  | return "<<$indented$tag"; | 
|  | } | 
|  |  | 
|  | sub scan_op { | 
|  | my ($self, $c) = @_; | 
|  | my $b = $self->{buff}; | 
|  | return $c unless $$b =~ /\G(.)/sgc; | 
|  | my $cc = $c . $1; | 
|  | return scan_heredoc_tag($self) if $cc eq '<<'; | 
|  | return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/; | 
|  | pos($$b)--; | 
|  | return $c; | 
|  | } | 
|  |  | 
|  | sub scan_sqstring { | 
|  | my $self = shift @_; | 
|  | ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc; | 
|  | my $s = $1; | 
|  | $self->{lineno} += () = $s =~ /\n/sg; | 
|  | return "'" . $s; | 
|  | } | 
|  |  | 
|  | sub scan_dqstring { | 
|  | my $self = shift @_; | 
|  | my $b = $self->{buff}; | 
|  | my $s = '"'; | 
|  | while (1) { | 
|  | # slurp up non-special characters | 
|  | $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc; | 
|  | # handle special characters | 
|  | last unless $$b =~ /\G(.)/sgc; | 
|  | my $c = $1; | 
|  | $s .= '"', last if $c eq '"'; | 
|  | $s .= '$' . $self->scan_dollar(), next if $c eq '$'; | 
|  | if ($c eq '\\') { | 
|  | $s .= '\\', last unless $$b =~ /\G(.)/sgc; | 
|  | $c = $1; | 
|  | $self->{lineno}++, next if $c eq "\n"; # line splice | 
|  | # backslash escapes only $, `, ", \ in dq-string | 
|  | $s .= '\\' unless $c =~ /^[\$`"\\]$/; | 
|  | $s .= $c; | 
|  | next; | 
|  | } | 
|  | die("internal error scanning dq-string '$c'\n"); | 
|  | } | 
|  | $self->{lineno} += () = $s =~ /\n/sg; | 
|  | return $s; | 
|  | } | 
|  |  | 
|  | sub scan_balanced { | 
|  | my ($self, $c1, $c2) = @_; | 
|  | my $b = $self->{buff}; | 
|  | my $depth = 1; | 
|  | my $s = $c1; | 
|  | while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) { | 
|  | $s .= $1; | 
|  | $depth++, next if $s =~ /\Q$c1\E$/; | 
|  | $depth--; | 
|  | last if $depth == 0; | 
|  | } | 
|  | $self->{lineno} += () = $s =~ /\n/sg; | 
|  | return $s; | 
|  | } | 
|  |  | 
|  | sub scan_subst { | 
|  | my $self = shift @_; | 
|  | my @tokens = $self->{parser}->parse(qr/^\)$/); | 
|  | $self->{parser}->next_token(); # closing ")" | 
|  | return @tokens; | 
|  | } | 
|  |  | 
|  | sub scan_dollar { | 
|  | my $self = shift @_; | 
|  | my $b = $self->{buff}; | 
|  | return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...)) | 
|  | return '(' . join(' ', map {$_->[0]} $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...) | 
|  | return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...} | 
|  | return $1 if $$b =~ /\G(\w+)/gc; # $var | 
|  | return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc. | 
|  | return ''; | 
|  | } | 
|  |  | 
|  | sub swallow_heredocs { | 
|  | my $self = shift @_; | 
|  | my $b = $self->{buff}; | 
|  | my $tags = $self->{heretags}; | 
|  | while (my $tag = shift @$tags) { | 
|  | my $start = pos($$b); | 
|  | my $indent = $tag =~ s/^\t// ? '\\s*' : ''; | 
|  | $$b =~ /(?:\G|\n)$indent\Q$tag\E(?:\n|\z)/gc; | 
|  | my $body = substr($$b, $start, pos($$b) - $start); | 
|  | $self->{lineno} += () = $body =~ /\n/sg; | 
|  | } | 
|  | } | 
|  |  | 
|  | sub scan_token { | 
|  | my $self = shift @_; | 
|  | my $b = $self->{buff}; | 
|  | my $token = ''; | 
|  | my ($start, $startln); | 
|  | RESTART: | 
|  | $startln = $self->{lineno}; | 
|  | $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline) | 
|  | $start = pos($$b) || 0; | 
|  | $self->{lineno}++, return ["\n", $start, pos($$b), $startln, $startln] if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment | 
|  | while (1) { | 
|  | # slurp up non-special characters | 
|  | $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc; | 
|  | # handle special characters | 
|  | last unless $$b =~ /\G(.)/sgc; | 
|  | my $c = $1; | 
|  | pos($$b)--, last if $c =~ /^[ \t]$/; # whitespace ends token | 
|  | pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/; | 
|  | $token .= $self->scan_sqstring(), next if $c eq "'"; | 
|  | $token .= $self->scan_dqstring(), next if $c eq '"'; | 
|  | $token .= $c . $self->scan_dollar(), next if $c eq '$'; | 
|  | $self->{lineno}++, $self->swallow_heredocs(), $token = $c, last if $c eq "\n"; | 
|  | $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/; | 
|  | $token = $c, last if $c =~ /^[(){}]$/; | 
|  | if ($c eq '\\') { | 
|  | $token .= '\\', last unless $$b =~ /\G(.)/sgc; | 
|  | $c = $1; | 
|  | $self->{lineno}++, next if $c eq "\n" && length($token); # line splice | 
|  | $self->{lineno}++, goto RESTART if $c eq "\n"; # line splice | 
|  | $token .= '\\' . $c; | 
|  | next; | 
|  | } | 
|  | die("internal error scanning character '$c'\n"); | 
|  | } | 
|  | return length($token) ? [$token, $start, pos($$b), $startln, $self->{lineno}] : undef; | 
|  | } | 
|  |  | 
|  | # ShellParser parses POSIX shell scripts (with minor extensions for Bash). It | 
|  | # is a recursive descent parser very roughly modeled after section 2.10 "Shell | 
|  | # Grammar" of POSIX chapter 2 "Shell Command Language". | 
|  | package ShellParser; | 
|  |  | 
|  | sub new { | 
|  | my ($class, $s) = @_; | 
|  | my $self = bless { | 
|  | buff => [], | 
|  | stop => [], | 
|  | output => [] | 
|  | } => $class; | 
|  | $self->{lexer} = Lexer->new($self, $s); | 
|  | return $self; | 
|  | } | 
|  |  | 
|  | sub next_token { | 
|  | my $self = shift @_; | 
|  | return pop(@{$self->{buff}}) if @{$self->{buff}}; | 
|  | return $self->{lexer}->scan_token(); | 
|  | } | 
|  |  | 
|  | sub untoken { | 
|  | my $self = shift @_; | 
|  | push(@{$self->{buff}}, @_); | 
|  | } | 
|  |  | 
|  | sub peek { | 
|  | my $self = shift @_; | 
|  | my $token = $self->next_token(); | 
|  | return undef unless defined($token); | 
|  | $self->untoken($token); | 
|  | return $token; | 
|  | } | 
|  |  | 
|  | sub stop_at { | 
|  | my ($self, $token) = @_; | 
|  | return 1 unless defined($token); | 
|  | my $stop = ${$self->{stop}}[-1] if @{$self->{stop}}; | 
|  | return defined($stop) && $token->[0] =~ $stop; | 
|  | } | 
|  |  | 
|  | sub expect { | 
|  | my ($self, $expect) = @_; | 
|  | my $token = $self->next_token(); | 
|  | return $token if defined($token) && $token->[0] eq $expect; | 
|  | push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token->[0] : "<end-of-input>") . "'\n"); | 
|  | $self->untoken($token) if defined($token); | 
|  | return (); | 
|  | } | 
|  |  | 
|  | sub optional_newlines { | 
|  | my $self = shift @_; | 
|  | my @tokens; | 
|  | while (my $token = $self->peek()) { | 
|  | last unless $token->[0] eq "\n"; | 
|  | push(@tokens, $self->next_token()); | 
|  | } | 
|  | return @tokens; | 
|  | } | 
|  |  | 
|  | sub parse_group { | 
|  | my $self = shift @_; | 
|  | return ($self->parse(qr/^}$/), | 
|  | $self->expect('}')); | 
|  | } | 
|  |  | 
|  | sub parse_subshell { | 
|  | my $self = shift @_; | 
|  | return ($self->parse(qr/^\)$/), | 
|  | $self->expect(')')); | 
|  | } | 
|  |  | 
|  | sub parse_case_pattern { | 
|  | my $self = shift @_; | 
|  | my @tokens; | 
|  | while (defined(my $token = $self->next_token())) { | 
|  | push(@tokens, $token); | 
|  | last if $token->[0] eq ')'; | 
|  | } | 
|  | return @tokens; | 
|  | } | 
|  |  | 
|  | sub parse_case { | 
|  | my $self = shift @_; | 
|  | my @tokens; | 
|  | push(@tokens, | 
|  | $self->next_token(), # subject | 
|  | $self->optional_newlines(), | 
|  | $self->expect('in'), | 
|  | $self->optional_newlines()); | 
|  | while (1) { | 
|  | my $token = $self->peek(); | 
|  | last unless defined($token) && $token->[0] ne 'esac'; | 
|  | push(@tokens, | 
|  | $self->parse_case_pattern(), | 
|  | $self->optional_newlines(), | 
|  | $self->parse(qr/^(?:;;|esac)$/)); # item body | 
|  | $token = $self->peek(); | 
|  | last unless defined($token) && $token->[0] ne 'esac'; | 
|  | push(@tokens, | 
|  | $self->expect(';;'), | 
|  | $self->optional_newlines()); | 
|  | } | 
|  | push(@tokens, $self->expect('esac')); | 
|  | return @tokens; | 
|  | } | 
|  |  | 
|  | sub parse_for { | 
|  | my $self = shift @_; | 
|  | my @tokens; | 
|  | push(@tokens, | 
|  | $self->next_token(), # variable | 
|  | $self->optional_newlines()); | 
|  | my $token = $self->peek(); | 
|  | if (defined($token) && $token->[0] eq 'in') { | 
|  | push(@tokens, | 
|  | $self->expect('in'), | 
|  | $self->optional_newlines()); | 
|  | } | 
|  | push(@tokens, | 
|  | $self->parse(qr/^do$/), # items | 
|  | $self->expect('do'), | 
|  | $self->optional_newlines(), | 
|  | $self->parse_loop_body(), | 
|  | $self->expect('done')); | 
|  | return @tokens; | 
|  | } | 
|  |  | 
|  | sub parse_if { | 
|  | my $self = shift @_; | 
|  | my @tokens; | 
|  | while (1) { | 
|  | push(@tokens, | 
|  | $self->parse(qr/^then$/), # if/elif condition | 
|  | $self->expect('then'), | 
|  | $self->optional_newlines(), | 
|  | $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body | 
|  | my $token = $self->peek(); | 
|  | last unless defined($token) && $token->[0] eq 'elif'; | 
|  | push(@tokens, $self->expect('elif')); | 
|  | } | 
|  | my $token = $self->peek(); | 
|  | if (defined($token) && $token->[0] eq 'else') { | 
|  | push(@tokens, | 
|  | $self->expect('else'), | 
|  | $self->optional_newlines(), | 
|  | $self->parse(qr/^fi$/)); # else body | 
|  | } | 
|  | push(@tokens, $self->expect('fi')); | 
|  | return @tokens; | 
|  | } | 
|  |  | 
|  | sub parse_loop_body { | 
|  | my $self = shift @_; | 
|  | return $self->parse(qr/^done$/); | 
|  | } | 
|  |  | 
|  | sub parse_loop { | 
|  | my $self = shift @_; | 
|  | return ($self->parse(qr/^do$/), # condition | 
|  | $self->expect('do'), | 
|  | $self->optional_newlines(), | 
|  | $self->parse_loop_body(), | 
|  | $self->expect('done')); | 
|  | } | 
|  |  | 
|  | sub parse_func { | 
|  | my $self = shift @_; | 
|  | return ($self->expect('('), | 
|  | $self->expect(')'), | 
|  | $self->optional_newlines(), | 
|  | $self->parse_cmd()); # body | 
|  | } | 
|  |  | 
|  | sub parse_bash_array_assignment { | 
|  | my $self = shift @_; | 
|  | my @tokens = $self->expect('('); | 
|  | while (defined(my $token = $self->next_token())) { | 
|  | push(@tokens, $token); | 
|  | last if $token->[0] eq ')'; | 
|  | } | 
|  | return @tokens; | 
|  | } | 
|  |  | 
|  | my %compound = ( | 
|  | '{' => \&parse_group, | 
|  | '(' => \&parse_subshell, | 
|  | 'case' => \&parse_case, | 
|  | 'for' => \&parse_for, | 
|  | 'if' => \&parse_if, | 
|  | 'until' => \&parse_loop, | 
|  | 'while' => \&parse_loop); | 
|  |  | 
|  | sub parse_cmd { | 
|  | my $self = shift @_; | 
|  | my $cmd = $self->next_token(); | 
|  | return () unless defined($cmd); | 
|  | return $cmd if $cmd->[0] eq "\n"; | 
|  |  | 
|  | my $token; | 
|  | my @tokens = $cmd; | 
|  | if ($cmd->[0] eq '!') { | 
|  | push(@tokens, $self->parse_cmd()); | 
|  | return @tokens; | 
|  | } elsif (my $f = $compound{$cmd->[0]}) { | 
|  | push(@tokens, $self->$f()); | 
|  | } elsif (defined($token = $self->peek()) && $token->[0] eq '(') { | 
|  | if ($cmd->[0] !~ /\w=$/) { | 
|  | push(@tokens, $self->parse_func()); | 
|  | return @tokens; | 
|  | } | 
|  | my @array = $self->parse_bash_array_assignment(); | 
|  | $tokens[-1]->[0] .= join(' ', map {$_->[0]} @array); | 
|  | $tokens[-1]->[2] = $array[$#array][2] if @array; | 
|  | } | 
|  |  | 
|  | while (defined(my $token = $self->next_token())) { | 
|  | $self->untoken($token), last if $self->stop_at($token); | 
|  | push(@tokens, $token); | 
|  | last if $token->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/; | 
|  | } | 
|  | push(@tokens, $self->next_token()) if $tokens[-1]->[0] ne "\n" && defined($token = $self->peek()) && $token->[0] eq "\n"; | 
|  | return @tokens; | 
|  | } | 
|  |  | 
|  | sub accumulate { | 
|  | my ($self, $tokens, $cmd) = @_; | 
|  | push(@$tokens, @$cmd); | 
|  | } | 
|  |  | 
|  | sub parse { | 
|  | my ($self, $stop) = @_; | 
|  | push(@{$self->{stop}}, $stop); | 
|  | goto DONE if $self->stop_at($self->peek()); | 
|  | my @tokens; | 
|  | while (my @cmd = $self->parse_cmd()) { | 
|  | $self->accumulate(\@tokens, \@cmd); | 
|  | last if $self->stop_at($self->peek()); | 
|  | } | 
|  | DONE: | 
|  | pop(@{$self->{stop}}); | 
|  | return @tokens; | 
|  | } | 
|  |  | 
|  | # TestParser is a subclass of ShellParser which, beyond parsing shell script | 
|  | # code, is also imbued with semantic knowledge of test construction, and checks | 
|  | # tests for common problems (such as broken &&-chains) which might hide bugs in | 
|  | # the tests themselves or in behaviors being exercised by the tests. As such, | 
|  | # TestParser is only called upon to parse test bodies, not the top-level | 
|  | # scripts in which the tests are defined. | 
|  | package TestParser; | 
|  |  | 
|  | use base 'ShellParser'; | 
|  |  | 
|  | sub new { | 
|  | my $class = shift @_; | 
|  | my $self = $class->SUPER::new(@_); | 
|  | $self->{problems} = []; | 
|  | return $self; | 
|  | } | 
|  |  | 
|  | sub find_non_nl { | 
|  | my $tokens = shift @_; | 
|  | my $n = shift @_; | 
|  | $n = $#$tokens if !defined($n); | 
|  | $n-- while $n >= 0 && $$tokens[$n]->[0] eq "\n"; | 
|  | return $n; | 
|  | } | 
|  |  | 
|  | sub ends_with { | 
|  | my ($tokens, $needles) = @_; | 
|  | my $n = find_non_nl($tokens); | 
|  | for my $needle (reverse(@$needles)) { | 
|  | return undef if $n < 0; | 
|  | $n = find_non_nl($tokens, $n), next if $needle eq "\n"; | 
|  | return undef if $$tokens[$n]->[0] !~ $needle; | 
|  | $n--; | 
|  | } | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | sub match_ending { | 
|  | my ($tokens, $endings) = @_; | 
|  | for my $needles (@$endings) { | 
|  | next if @$tokens < scalar(grep {$_ ne "\n"} @$needles); | 
|  | return 1 if ends_with($tokens, $needles); | 
|  | } | 
|  | return undef; | 
|  | } | 
|  |  | 
|  | sub parse_loop_body { | 
|  | my $self = shift @_; | 
|  | my @tokens = $self->SUPER::parse_loop_body(@_); | 
|  | # did loop signal failure via "|| return" or "|| exit"? | 
|  | return @tokens if !@tokens || grep {$_->[0] =~ /^(?:return|exit|\$\?)$/} @tokens; | 
|  | # did loop upstream of a pipe signal failure via "|| echo 'impossible | 
|  | # text'" as the final command in the loop body? | 
|  | return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]); | 
|  | # flag missing "return/exit" handling explicit failure in loop body | 
|  | my $n = find_non_nl(\@tokens); | 
|  | push(@{$self->{problems}}, ['LOOP', $tokens[$n]]); | 
|  | return @tokens; | 
|  | } | 
|  |  | 
|  | my @safe_endings = ( | 
|  | [qr/^(?:&&|\|\||\||&)$/], | 
|  | [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], | 
|  | [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/], | 
|  | [qr/^(?:exit|return|continue)$/], | 
|  | [qr/^(?:exit|return|continue)$/, qr/^;$/]); | 
|  |  | 
|  | sub accumulate { | 
|  | my ($self, $tokens, $cmd) = @_; | 
|  | my $problems = $self->{problems}; | 
|  |  | 
|  | # no previous command to check for missing "&&" | 
|  | goto DONE unless @$tokens; | 
|  |  | 
|  | # new command is empty line; can't yet check if previous is missing "&&" | 
|  | goto DONE if @$cmd == 1 && $$cmd[0]->[0] eq "\n"; | 
|  |  | 
|  | # did previous command end with "&&", "|", "|| return" or similar? | 
|  | goto DONE if match_ending($tokens, \@safe_endings); | 
|  |  | 
|  | # if this command handles "$?" specially, then okay for previous | 
|  | # command to be missing "&&" | 
|  | for my $token (@$cmd) { | 
|  | goto DONE if $token->[0] =~ /\$\?/; | 
|  | } | 
|  |  | 
|  | # if this command is "false", "return 1", or "exit 1" (which signal | 
|  | # failure explicitly), then okay for all preceding commands to be | 
|  | # missing "&&" | 
|  | if ($$cmd[0]->[0] =~ /^(?:false|return|exit)$/) { | 
|  | @$problems = grep {$_->[0] ne 'AMP'} @$problems; | 
|  | goto DONE; | 
|  | } | 
|  |  | 
|  | # flag missing "&&" at end of previous command | 
|  | my $n = find_non_nl($tokens); | 
|  | push(@$problems, ['AMP', $tokens->[$n]]) unless $n < 0; | 
|  |  | 
|  | DONE: | 
|  | $self->SUPER::accumulate($tokens, $cmd); | 
|  | } | 
|  |  | 
|  | # ScriptParser is a subclass of ShellParser which identifies individual test | 
|  | # definitions within test scripts, and passes each test body through TestParser | 
|  | # to identify possible problems. ShellParser detects test definitions not only | 
|  | # at the top-level of test scripts but also within compound commands such as | 
|  | # loops and function definitions. | 
|  | package ScriptParser; | 
|  |  | 
|  | use base 'ShellParser'; | 
|  |  | 
|  | sub new { | 
|  | my $class = shift @_; | 
|  | my $self = $class->SUPER::new(@_); | 
|  | $self->{ntests} = 0; | 
|  | return $self; | 
|  | } | 
|  |  | 
|  | # extract the raw content of a token, which may be a single string or a | 
|  | # composition of multiple strings and non-string character runs; for instance, | 
|  | # `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d` | 
|  | sub unwrap { | 
|  | my $token = (@_ ? shift @_ : $_)->[0]; | 
|  | # simple case: 'sqstring' or "dqstring" | 
|  | return $token if $token =~ s/^'([^']*)'$/$1/; | 
|  | return $token if $token =~ s/^"([^"]*)"$/$1/; | 
|  |  | 
|  | # composite case | 
|  | my ($s, $q, $escaped); | 
|  | while (1) { | 
|  | # slurp up non-special characters | 
|  | $s .= $1 if $token =~ /\G([^\\'"]*)/gc; | 
|  | # handle special characters | 
|  | last unless $token =~ /\G(.)/sgc; | 
|  | my $c = $1; | 
|  | $q = undef, next if defined($q) && $c eq $q; | 
|  | $q = $c, next if !defined($q) && $c =~ /^['"]$/; | 
|  | if ($c eq '\\') { | 
|  | last unless $token =~ /\G(.)/sgc; | 
|  | $c = $1; | 
|  | $s .= '\\' if $c eq "\n"; # preserve line splice | 
|  | } | 
|  | $s .= $c; | 
|  | } | 
|  | return $s | 
|  | } | 
|  |  | 
|  | sub check_test { | 
|  | my $self = shift @_; | 
|  | my ($title, $body) = map(unwrap, @_); | 
|  | $self->{ntests}++; | 
|  | my $parser = TestParser->new(\$body); | 
|  | my @tokens = $parser->parse(); | 
|  | my $problems = $parser->{problems}; | 
|  | return unless $emit_all || @$problems; | 
|  | my $c = main::fd_colors(1); | 
|  | my $lineno = $_[1]->[3]; | 
|  | my $start = 0; | 
|  | my $checked = ''; | 
|  | for (sort {$a->[1]->[2] <=> $b->[1]->[2]} @$problems) { | 
|  | my ($label, $token) = @$_; | 
|  | my $pos = $token->[2]; | 
|  | $checked .= substr($body, $start, $pos - $start) . " ?!$label?! "; | 
|  | $start = $pos; | 
|  | } | 
|  | $checked .= substr($body, $start); | 
|  | $checked =~ s/^/$lineno++ . ' '/mge; | 
|  | $checked =~ s/^\d+ \n//; | 
|  | $checked =~ s/(\s) \?!/$1?!/mg; | 
|  | $checked =~ s/\?! (\s)/?!$1/mg; | 
|  | $checked =~ s/(\?![^?]+\?!)/$c->{rev}$c->{red}$1$c->{reset}/mg; | 
|  | $checked =~ s/^\d+/$c->{dim}$&$c->{reset}/mg; | 
|  | $checked .= "\n" unless $checked =~ /\n$/; | 
|  | push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked"); | 
|  | } | 
|  |  | 
|  | sub parse_cmd { | 
|  | my $self = shift @_; | 
|  | my @tokens = $self->SUPER::parse_cmd(); | 
|  | return @tokens unless @tokens && $tokens[0]->[0] =~ /^test_expect_(?:success|failure)$/; | 
|  | my $n = $#tokens; | 
|  | $n-- while $n >= 0 && $tokens[$n]->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/; | 
|  | $self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body | 
|  | $self->check_test($tokens[2], $tokens[3]) if $n > 2;  # prereq title body | 
|  | return @tokens; | 
|  | } | 
|  |  | 
|  | # main contains high-level functionality for processing command-line switches, | 
|  | # feeding input test scripts to ScriptParser, and reporting results. | 
|  | package main; | 
|  |  | 
|  | my $getnow = sub { return time(); }; | 
|  | my $interval = sub { return time() - shift; }; | 
|  | if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) { | 
|  | $getnow = sub { return [Time::HiRes::gettimeofday()]; }; | 
|  | $interval = sub { return Time::HiRes::tv_interval(shift); }; | 
|  | } | 
|  |  | 
|  | # Restore TERM if test framework set it to "dumb" so 'tput' will work; do this | 
|  | # outside of get_colors() since under 'ithreads' all threads use %ENV of main | 
|  | # thread and ignore %ENV changes in subthreads. | 
|  | $ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM}; | 
|  |  | 
|  | my @NOCOLORS = (bold => '', rev => '', dim => '', reset => '', blue => '', green => '', red => ''); | 
|  | my %COLORS = (); | 
|  | sub get_colors { | 
|  | return \%COLORS if %COLORS; | 
|  | if (exists($ENV{NO_COLOR})) { | 
|  | %COLORS = @NOCOLORS; | 
|  | return \%COLORS; | 
|  | } | 
|  | if ($ENV{TERM} =~ /xterm|xterm-\d+color|xterm-new|xterm-direct|nsterm|nsterm-\d+color|nsterm-direct/) { | 
|  | %COLORS = (bold  => "\e[1m", | 
|  | rev   => "\e[7m", | 
|  | dim   => "\e[2m", | 
|  | reset => "\e[0m", | 
|  | blue  => "\e[34m", | 
|  | green => "\e[32m", | 
|  | red   => "\e[31m"); | 
|  | return \%COLORS; | 
|  | } | 
|  | if (system("tput sgr0 >/dev/null 2>&1") == 0 && | 
|  | system("tput bold >/dev/null 2>&1") == 0 && | 
|  | system("tput rev  >/dev/null 2>&1") == 0 && | 
|  | system("tput dim  >/dev/null 2>&1") == 0 && | 
|  | system("tput setaf 1 >/dev/null 2>&1") == 0) { | 
|  | %COLORS = (bold  => `tput bold`, | 
|  | rev   => `tput rev`, | 
|  | dim   => `tput dim`, | 
|  | reset => `tput sgr0`, | 
|  | blue  => `tput setaf 4`, | 
|  | green => `tput setaf 2`, | 
|  | red   => `tput setaf 1`); | 
|  | return \%COLORS; | 
|  | } | 
|  | %COLORS = @NOCOLORS; | 
|  | return \%COLORS; | 
|  | } | 
|  |  | 
|  | my %FD_COLORS = (); | 
|  | sub fd_colors { | 
|  | my $fd = shift; | 
|  | return $FD_COLORS{$fd} if exists($FD_COLORS{$fd}); | 
|  | $FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS}; | 
|  | return $FD_COLORS{$fd}; | 
|  | } | 
|  |  | 
|  | sub ncores { | 
|  | # Windows | 
|  | return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS}); | 
|  | # Linux / MSYS2 / Cygwin / WSL | 
|  | do { local @ARGV='/proc/cpuinfo'; return scalar(grep(/^processor[\s\d]*:/, <>)); } if -r '/proc/cpuinfo'; | 
|  | # macOS & BSD | 
|  | return qx/sysctl -n hw.ncpu/ if $^O =~ /(?:^darwin$|bsd)/; | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | sub show_stats { | 
|  | my ($start_time, $stats) = @_; | 
|  | my $walltime = $interval->($start_time); | 
|  | my ($usertime) = times(); | 
|  | my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0); | 
|  | my $c = fd_colors(2); | 
|  | print(STDERR $c->{green}); | 
|  | for (@$stats) { | 
|  | my ($worker, $nscripts, $ntests, $nerrs) = @$_; | 
|  | print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n"); | 
|  | $total_workers++; | 
|  | $total_scripts += $nscripts; | 
|  | $total_tests += $ntests; | 
|  | $total_errs += $nerrs; | 
|  | } | 
|  | printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); | 
|  | } | 
|  |  | 
|  | sub check_script { | 
|  | my ($id, $next_script, $emit) = @_; | 
|  | my ($nscripts, $ntests, $nerrs) = (0, 0, 0); | 
|  | while (my $path = $next_script->()) { | 
|  | $nscripts++; | 
|  | my $fh; | 
|  | unless (open($fh, "<", $path)) { | 
|  | $emit->("?!ERR?! $path: $!\n"); | 
|  | next; | 
|  | } | 
|  | my $s = do { local $/; <$fh> }; | 
|  | close($fh); | 
|  | my $parser = ScriptParser->new(\$s); | 
|  | 1 while $parser->parse_cmd(); | 
|  | if (@{$parser->{output}}) { | 
|  | my $c = fd_colors(1); | 
|  | my $s = join('', @{$parser->{output}}); | 
|  | $emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s); | 
|  | $nerrs += () = $s =~ /\?![^?]+\?!/g; | 
|  | } | 
|  | $ntests += $parser->{ntests}; | 
|  | } | 
|  | return [$id, $nscripts, $ntests, $nerrs]; | 
|  | } | 
|  |  | 
|  | sub exit_code { | 
|  | my $stats = shift @_; | 
|  | for (@$stats) { | 
|  | my ($worker, $nscripts, $ntests, $nerrs) = @$_; | 
|  | return 1 if $nerrs; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | Getopt::Long::Configure(qw{bundling}); | 
|  | GetOptions( | 
|  | "emit-all!" => \$emit_all, | 
|  | "jobs|j=i" => \$jobs, | 
|  | "stats|show-stats!" => \$show_stats) or die("option error\n"); | 
|  | $jobs = ncores() if $jobs < 1; | 
|  |  | 
|  | my $start_time = $getnow->(); | 
|  | my @stats; | 
|  |  | 
|  | my @scripts; | 
|  | push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV); | 
|  | unless (@scripts) { | 
|  | show_stats($start_time, \@stats) if $show_stats; | 
|  | exit; | 
|  | } | 
|  |  | 
|  | unless ($Config{useithreads} && eval { | 
|  | require threads; threads->import(); | 
|  | require Thread::Queue; Thread::Queue->import(); | 
|  | 1; | 
|  | }) { | 
|  | push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); | 
|  | show_stats($start_time, \@stats) if $show_stats; | 
|  | exit(exit_code(\@stats)); | 
|  | } | 
|  |  | 
|  | my $script_queue = Thread::Queue->new(); | 
|  | my $output_queue = Thread::Queue->new(); | 
|  |  | 
|  | sub next_script { return $script_queue->dequeue(); } | 
|  | sub emit { $output_queue->enqueue(@_); } | 
|  |  | 
|  | sub monitor { | 
|  | while (my $s = $output_queue->dequeue()) { | 
|  | print($s); | 
|  | } | 
|  | } | 
|  |  | 
|  | my $mon = threads->create({'context' => 'void'}, \&monitor); | 
|  | threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs; | 
|  |  | 
|  | $script_queue->enqueue(@scripts); | 
|  | $script_queue->end(); | 
|  |  | 
|  | for (threads->list()) { | 
|  | push(@stats, $_->join()) unless $_ == $mon; | 
|  | } | 
|  |  | 
|  | $output_queue->end(); | 
|  | $mon->join(); | 
|  |  | 
|  | show_stats($start_time, \@stats) if $show_stats; | 
|  | exit(exit_code(\@stats)); |