From f6b2baa3e883c174e3e0fa79bc9e739d2b0971dd Mon Sep 17 00:00:00 2001 From: Nico Huber Date: Fri, 2 Apr 2021 21:23:32 +0200 Subject: [PATCH] util/kconfig_lint: Turn handle_expressions() into a parser I wished there was a way to do this in smaller steps, but with every line fixed an error somewhere else became visible. Here is a (probably incomplete) list of the issues: * Only one set of parentheses was supported. This is a hard to solve problem without a real parser (one solution is to use an recursive RE, see below). * The precedence order was wrong. Might have been adapted just to give a positive result for the arbitrary state of the tree. * Numbered match variables (e.g. $1, $2, etc.) are not local. Calling handle_expressions() recursively once with $1, then with $2, resulted in using the final $2 after the first recursive call (garbage, practically). Also, symbol and expression parsing was mixed, making things harder to follow. To remedy the issues: * Split handle_symbol() out. It is called with whitespace stripped, to keep the uglier REs in handle_expressions(). * Match balanced parentheses and quotes when splitting expressions. In this recursive RE /(\((?:[^\(\)]++|(?-1))*\))/ the `(?-1)` references the outer-most group, thus the whole expression itself. So it matches a pair of parentheses with a mix of non-parentheses and the recursive rule itself inside. This allows us to: * Order the expression matches according to their precedence rules. Now we can match ` '||' ` first as we should and everything else falls into its place. * Remove the bail-out that silenced the undefined behavior. Change-Id: Ibc1be79adc07792f0721f0dc08b50422b6da88a9 Signed-off-by: Nico Huber Reviewed-on: https://review.coreboot.org/c/coreboot/+/52067 Tested-by: build bot (Jenkins) Reviewed-by: Angel Pons --- util/lint/kconfig_lint | 93 +++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/util/lint/kconfig_lint b/util/lint/kconfig_lint index 43a3ca684e..7f042f6aff 100755 --- a/util/lint/kconfig_lint +++ b/util/lint/kconfig_lint @@ -947,57 +947,74 @@ sub handle_if_line { } #------------------------------------------------------------------------------- -# handle_expressions - log which symbols are being used +# handle_symbol - log which symbols are being used +#------------------------------------------------------------------------------- +sub handle_symbol { + my ( $symbol, $filename, $line_no ) = @_; + + #filter constant symbols first + if ( $symbol =~ /^[yn]$/ ) { # constant y/n + return; + } + if ( $symbol =~ /^-?(?:0x)?\p{XDigit}+$/ ) { # int/hex values + return; + } + if ( $symbol =~ /^"[^"]*"$/ ) { # string values + return; + } + + if ( $symbol =~ /^([A-Za-z0-9_]+)$/ ) { # actual symbol + add_referenced_symbol( $1, $filename, $line_no, 'expression' ); + } + else { + show_error("Unrecognized expression: expected symbol, " + . "found '$symbol' in $filename line $line_no."); + } +} + +#------------------------------------------------------------------------------- +# handle_expressions - find symbols in expressions #------------------------------------------------------------------------------- sub handle_expressions { my ( $exprline, $inside_config, $filename, $line_no ) = @_; - return unless ($exprline); + my $strip = qr/\s*(.*[^\s]+)\s*/; - #filter constant symbols first - if ( $exprline =~ /^\s*"?([yn])"?\s*$/ ) { # constant y/n - return; - } - elsif ( $exprline =~ /^\s*"?((?:-)\d+)"?\s*$/ ) { # int values - return; - } - elsif ( $exprline =~ /^\s*"?((?:-)?(?:0x)?\p{XDigit})+"?\s*$/ ) { # hex values - return; - } - elsif ( $exprline =~ /^\s*("[^"]*")\s*$/ ) { # String values - return; - } - elsif ( $exprline =~ /^\s*([A-Za-z0-9_]+)\s*$/ ) { # (1) - add_referenced_symbol( $1, $filename, $line_no, 'expression' ); - } - elsif ( $exprline =~ /^\s*!(.+)$/ ) { # '!' (5) + my $parens = qr/(\((?:[^\(\)]++|(?-1))*\))/; + my $quotes = qr/"[^"]*"/; + my $balanced = qr/((?:$parens|$quotes|[^\(\)"])+)/; + if ( $exprline =~ /^\s*$balanced\s*(?:\|\||&&)\s*(.+)$/ ) { + # '||' , '&&' (7)(6) + my ( $lhs, $rhs ) = ( $1, $3 ); + handle_expressions( $lhs, $inside_config, $filename, $line_no ); + handle_expressions( $rhs, $inside_config, $filename, $line_no ); + } + elsif ( $exprline =~ /^\s*!(.+)$/ ) { + # '!' (5) handle_expressions( $1, $inside_config, $filename, $line_no ); } - elsif ( $exprline =~ /^\s*\(([^)]+)\)\s*$/ ) { # '(' ')' (4) + elsif ( $exprline =~ /^\s*$parens\s*$/ ) { + # '(' ')' (4) + $exprline =~ /^\s*\((.*)\)\s*$/; handle_expressions( $1, $inside_config, $filename, $line_no ); } - elsif ( $exprline =~ /^\s*(.+)\s*!=\s*(.+)\s*$/ ) { # '!=' (3) - handle_expressions( $1, $inside_config, $filename, $line_no ); - handle_expressions( $2, $inside_config, $filename, $line_no ); + elsif ( $exprline =~ /^\s*($quotes|[^"\s]+)\s*!=$strip$/ ) { + # '!=' (3) + my ( $lhs, $rhs ) = ( $1, $2 ); + handle_symbol( $lhs, $filename, $line_no ); + handle_symbol( $rhs, $filename, $line_no ); } - elsif ( $exprline =~ /^\s*(.+)\s*=\s*(.+)\s*$/ ) { # '=' (2) - handle_expressions( $1, $inside_config, $filename, $line_no ); - handle_expressions( $2, $inside_config, $filename, $line_no ); + elsif ( $exprline =~ /^\s*($quotes|[^"\s]+)\s*=$strip$/ ) { + # '=' (2) + my ( $lhs, $rhs ) = ( $1, $2 ); + handle_symbol( $lhs, $filename, $line_no ); + handle_symbol( $rhs, $filename, $line_no ); } - elsif ( $exprline =~ /^\s*([^(]+|\(.+\))\s*&&\s*(.+)\s*$/ ) { # '&&' (6) - handle_expressions( $1, $inside_config, $filename, $line_no ); - handle_expressions( $2, $inside_config, $filename, $line_no ); + elsif ( $exprline =~ /^$strip$/ ) { + # (1) + handle_symbol( $1, $filename, $line_no ); } - elsif ( $exprline =~ /^\s*([^(]+|\(.+\))\s*\|\|\s*(.+)\s*$/ ) { # '||' (7) - handle_expressions( $1, $inside_config, $filename, $line_no ); - handle_expressions( $2, $inside_config, $filename, $line_no ); - } - else { - show_error("Unrecognized expression '$exprline' in $filename line $line_no."); - } - - return; } #-------------------------------------------------------------------------------