b-site/vendor/github.com/bazelbuild/buildtools/build/parse.y

// BUILD file parser.

// This is a yacc grammar. Its lexer is in lex.go.
//
// For a good introduction to writing yacc grammars, see
// Kernighan and Pike's book The Unix Programming Environment.
//
// The definitive yacc manual is
// Stephen C. Johnson and Ravi Sethi, "Yacc: A Parser Generator",
// online at http://plan9.bell-labs.com/sys/doc/yacc.pdf.

%{
package build
%}

// The generated parser puts these fields in a struct named yySymType.
// (The name %union is historical, but it is inaccurate for Go.)
%union {
	// input tokens
	tok       string     // raw input syntax
	str       string     // decoding of quoted string
	pos       Position   // position of token
	triple    bool       // was string triple quoted?

	// partial syntax trees
	expr      Expr
	exprs     []Expr
	forc      *ForClause
	ifs       []*IfClause
	forifs    *ForClauseWithIfClausesOpt
	forsifs   []*ForClauseWithIfClausesOpt
	string    *StringExpr
	strings   []*StringExpr
	block     CodeBlock

	// supporting information
	comma     Position   // position of trailing comma in list, if present
	lastRule  Expr  // most recent rule, to attach line comments to
}

// These declarations set the type for a $ reference ($$, $1, $2, ...)
// based on the kind of symbol it refers to. Other fields can be referred
// to explicitly, as in $<tok>1.
//
// %token is for input tokens generated by the lexer.
// %type is for higher-level grammar rules defined here.
//
// It is possible to put multiple tokens per line, but it is easier to
// keep ordered using a sparser one-per-line list.

%token	<pos>	'%'
%token	<pos>	'('
%token	<pos>	')'
%token	<pos>	'*'
%token	<pos>	'+'
%token	<pos>	','
%token	<pos>	'-'
%token	<pos>	'.'
%token	<pos>	'/'
%token	<pos>	':'
%token	<pos>	'<'
%token	<pos>	'='
%token	<pos>	'>'
%token	<pos>	'['
%token	<pos>	']'
%token	<pos>	'{'
%token	<pos>	'}'

// By convention, yacc token names are all caps.
// However, we do not want to export them from the Go package
// we are creating, so prefix them all with underscores.

%token	<pos>	_AUGM    // augmented assignment
%token	<pos>	_AND     // keyword and
%token	<pos>	_COMMENT // top-level # comment
%token	<pos>	_EOF     // end of file
%token	<pos>	_EQ      // operator ==
%token	<pos>	_FOR     // keyword for
%token	<pos>	_GE      // operator >=
%token	<pos>	_IDENT   // non-keyword identifier or number
%token	<pos>	_IF      // keyword if
%token	<pos>	_ELSE    // keyword else
%token	<pos>	_ELIF    // keyword elif
%token	<pos>	_IN      // keyword in
%token	<pos>	_IS      // keyword is
%token	<pos>	_LAMBDA  // keyword lambda
%token	<pos>	_LOAD    // keyword load
%token	<pos>	_LE      // operator <=
%token	<pos>	_NE      // operator !=
%token	<pos>	_NOT     // keyword not
%token	<pos>	_OR      // keyword or
%token	<pos>	_PYTHON  // uninterpreted Python block
%token	<pos>	_STRING  // quoted string
%token	<pos>	_DEF     // keyword def
%token	<pos>	_RETURN  // keyword return
%token	<pos>	_INDENT  // indentation
%token	<pos>	_UNINDENT // unindentation

%type	<pos>		comma_opt
%type	<expr>		expr
%type	<expr>		expr_opt
%type	<expr>		primary_expr
%type	<exprs>		exprs
%type	<exprs>		exprs_opt
%type	<exprs>		primary_exprs
%type	<forc>		for_clause
%type	<forifs>	for_clause_with_if_clauses_opt
%type	<forsifs>	for_clauses_with_if_clauses_opt
%type	<expr>		ident
%type	<ifs>		if_clauses_opt
%type	<exprs>		stmts
%type	<exprs>		stmt          // a simple_stmt or a for/if/def block
%type	<expr>		block_stmt    // a single for/if/def statement
%type	<expr>		if_else_block // a single if-else statement
%type	<exprs>		simple_stmt   // One or many small_stmts on one line, e.g. 'a = f(x); return str(a)'
%type	<expr>		small_stmt    // A single statement, e.g. 'a = f(x)'
%type <exprs>		small_stmts_continuation  // A sequence of `';' small_stmt`
%type	<expr>		keyvalue
%type	<exprs>		keyvalues
%type	<exprs>		keyvalues_no_comma
%type	<string>	string
%type	<strings>	strings
%type	<block>		suite

// Operator precedence.
// Operators listed lower in the table bind tighter.

// We tag rules with this fake, low precedence to indicate
// that when the rule is involved in a shift/reduce
// conflict, we prefer that the parser shift (try for a longer parse).
// Shifting is the default resolution anyway, but stating it explicitly
// silences yacc's warning for that specific case.
%left	ShiftInstead

%left	'\n'
%left	_ASSERT
// '=' and augmented assignments have the lowest precedence
// e.g. "x = a if c > 0 else 'bar'"
// followed by
// 'if' and 'else' which have lower precedence than all other operators.
// e.g. "a, b if c > 0 else 'foo'" is either a tuple of (a,b) or 'foo'
// and not a tuple of "(a, (b if ... ))"
%left  '=' _AUGM
%left  _IF _ELSE _ELIF
%left  ','
%left  ':'
%left  _IN _NOT _IS
%left  _OR
%left  _AND
%left  '<' '>' _EQ _NE _LE _GE
%left  '+' '-'
%left  '*' '/' '%'
%left  '.' '[' '('
%right _UNARY
%left  _STRING

%%

// Grammar rules.
//
// A note on names: if foo is a rule, then foos is a sequence of foos
// (with interleaved commas or other syntax as appropriate)
// and foo_opt is an optional foo.

file:
	stmts _EOF
	{
		yylex.(*input).file = &File{Stmt: $1}
		return 0
	}

suite:
	'\n' _INDENT stmts _UNINDENT
	{
		$$ = CodeBlock{
			Start: $2,
			Statements: $3,
			End: End{Pos: $4},
		}
	}
| simple_stmt
	{
		// simple_stmt is never empty
		start, _ := $1[0].Span()
		_, end := $1[len($1)-1].Span()
		$$ = CodeBlock{
			Start: start,
			Statements: $1,
			End: End{Pos: end},
		}
	}

stmts:
	{
		$$ = nil
		$<lastRule>$ = nil
	}
|	stmts stmt
	{
		// If this statement follows a comment block,
		// attach the comments to the statement.
		if cb, ok := $<lastRule>1.(*CommentBlock); ok {
			$$ = append($1[:len($1)-1], $2...)
			$2[0].Comment().Before = cb.After
			$<lastRule>$ = $2[len($2)-1]
			break
		}

		// Otherwise add to list.
		$$ = append($1, $2...)
		$<lastRule>$ = $2[len($2)-1]

		// Consider this input:
		//
		//	foo()
		//	# bar
		//	baz()
		//
		// If we've just parsed baz(), the # bar is attached to
		// foo() as an After comment. Make it a Before comment
		// for baz() instead.
		if x := $<lastRule>1; x != nil {
			com := x.Comment()
			// stmt is never empty
			$2[0].Comment().Before = com.After
			com.After = nil
		}
	}
|	stmts '\n'
	{
		// Blank line; sever last rule from future comments.
		$$ = $1
		$<lastRule>$ = nil
	}
|	stmts _COMMENT '\n'
	{
		$$ = $1
		$<lastRule>$ = $<lastRule>1
		if $<lastRule>$ == nil {
			cb := &CommentBlock{Start: $2}
			$$ = append($$, cb)
			$<lastRule>$ = cb
		}
		com := $<lastRule>$.Comment()
		com.After = append(com.After, Comment{Start: $2, Token: $<tok>2})
	}

stmt:
	simple_stmt
	{
		$$ = $1
	}
|	block_stmt
	{
		$$ = []Expr{$1}
	}

block_stmt:
	_DEF _IDENT '(' exprs_opt ')' ':' suite
	{
		$$ = &FuncDef{
			Start: $1,
			Name: $<tok>2,
			ListStart: $3,
			Args: $4,
			Body: $7,
			End: $7.End,
			ForceCompact: forceCompact($3, $4, $5),
			ForceMultiLine: forceMultiLine($3, $4, $5),
		}
	}
|	_FOR primary_exprs _IN expr ':' suite
	{
		$$ = &ForLoop{
			Start: $1,
			LoopVars: $2,
			Iterable: $4,
			Body: $6,
			End: $6.End,
		}
	}
| if_else_block
	{
		$$ = $1
	}

if_else_block:
	_IF expr ':' suite
	{
		$$ = &IfElse{
			Start: $1,
			Conditions: []Condition{
				Condition{
					If: $2,
					Then: $4,
				},
			},
			End: $4.End,
		}
	}
| if_else_block elif expr ':' suite
	{
		block := $1.(*IfElse)
		block.Conditions = append(block.Conditions, Condition{
			If: $3,
			Then: $5,
		})
		block.End = $5.End
		$$ = block
	}
| if_else_block _ELSE ':' suite
	{
		block := $1.(*IfElse)
		block.Conditions = append(block.Conditions, Condition{
			Then: $4,
		})
		block.End = $4.End
		$$ = block
	}

elif:
	_ELSE _IF
|	_ELIF

simple_stmt:
	small_stmt small_stmts_continuation semi_opt '\n'
	{
		$$ = append([]Expr{$1}, $2...)
		$<lastRule>$ = $$[len($$)-1]
	}

small_stmts_continuation:
	{
		$$ = []Expr{}
	}
| small_stmts_continuation ';' small_stmt
	{
		$$ = append($1, $3)
	}

small_stmt:
	expr %prec ShiftInstead
|	_RETURN expr
	{
		_, end := $2.Span()
		$$ = &ReturnExpr{
			X: $2,
			End: end,
		}
	}
|	_RETURN
	{
		$$ = &ReturnExpr{End: $1}
	}
|	_PYTHON
	{
		$$ = &PythonBlock{Start: $1, Token: $<tok>1}
	}

semi_opt:
|	';'

primary_expr:
	ident
|	primary_expr '.' _IDENT
	{
		$$ = &DotExpr{
			X: $1,
			Dot: $2,
			NamePos: $3,
			Name: $<tok>3,
		}
	}
|	_LOAD '(' exprs_opt ')'
	{
		$$ = &CallExpr{
                        X: &LiteralExpr{Start: $1, Token: "load"},
			ListStart: $2,
			List: $3,
			End: End{Pos: $4},
			ForceCompact: forceCompact($2, $3, $4),
			ForceMultiLine: forceMultiLine($2, $3, $4),
		}
	}
|	primary_expr '(' exprs_opt ')'
	{
		$$ = &CallExpr{
			X: $1,
			ListStart: $2,
			List: $3,
			End: End{Pos: $4},
			ForceCompact: forceCompact($2, $3, $4),
			ForceMultiLine: forceMultiLine($2, $3, $4),
		}
	}
|	primary_expr '[' expr ']'
	{
		$$ = &IndexExpr{
			X: $1,
			IndexStart: $2,
			Y: $3,
			End: $4,
		}
	}
|	primary_expr '[' expr_opt ':' expr_opt ']'
	{
		$$ = &SliceExpr{
			X: $1,
			SliceStart: $2,
			From: $3,
			FirstColon: $4,
			To: $5,
			End: $6,
		}
	}
|	primary_expr '[' expr_opt ':' expr_opt ':' expr_opt ']'
	{
		$$ = &SliceExpr{
			X: $1,
			SliceStart: $2,
			From: $3,
			FirstColon: $4,
			To: $5,
			SecondColon: $6,
			Step: $7,
			End: $8,
		}
	}
|	primary_expr '(' expr for_clauses_with_if_clauses_opt ')'
	{
		$$ = &CallExpr{
			X: $1,
			ListStart: $2,
			List: []Expr{
				&ListForExpr{
					Brack: "",
					Start: $2,
					X: $3,
					For: $4,
					End: End{Pos: $5},
				},
			},
			End: End{Pos: $5},
		}
	}
|	strings %prec ShiftInstead
	{
		if len($1) == 1 {
			$$ = $1[0]
			break
		}
		$$ = $1[0]
		for _, x := range $1[1:] {
			_, end := $$.Span()
			$$ = binary($$, end, "+", x)
		}
	}
|	'[' exprs_opt ']'
	{
		$$ = &ListExpr{
			Start: $1,
			List: $2,
			Comma: $<comma>2,
			End: End{Pos: $3},
			ForceMultiLine: forceMultiLine($1, $2, $3),
		}
	}
|	'[' expr for_clauses_with_if_clauses_opt ']'
	{
		exprStart, _ := $2.Span()
		$$ = &ListForExpr{
			Brack: "[]",
			Start: $1,
			X: $2,
			For: $3,
			End: End{Pos: $4},
			ForceMultiLine: $1.Line != exprStart.Line,
		}
	}
|	'(' expr for_clauses_with_if_clauses_opt ')'
	{
		exprStart, _ := $2.Span()
		$$ = &ListForExpr{
			Brack: "()",
			Start: $1,
			X: $2,
			For: $3,
			End: End{Pos: $4},
			ForceMultiLine: $1.Line != exprStart.Line,
		}
	}
|	'{' keyvalue for_clauses_with_if_clauses_opt '}'
	{
		exprStart, _ := $2.Span()
		$$ = &ListForExpr{
			Brack: "{}",
			Start: $1,
			X: $2,
			For: $3,
			End: End{Pos: $4},
			ForceMultiLine: $1.Line != exprStart.Line,
		}
	}
|	'{' keyvalues '}'
	{
		$$ = &DictExpr{
			Start: $1,
			List: $2,
			Comma: $<comma>2,
			End: End{Pos: $3},
			ForceMultiLine: forceMultiLine($1, $2, $3),
		}
	}
|	'{' exprs_opt '}'
	{
		$$ = &SetExpr{
			Start: $1,
			List: $2,
			Comma: $<comma>2,
			End: End{Pos: $3},
			ForceMultiLine: forceMultiLine($1, $2, $3),
		}
	}
|	'(' exprs_opt ')'
	{
		if len($2) == 1 && $<comma>2.Line == 0 {
			// Just a parenthesized expression, not a tuple.
			$$ = &ParenExpr{
				Start: $1,
				X: $2[0],
				End: End{Pos: $3},
				ForceMultiLine: forceMultiLine($1, $2, $3),
			}
		} else {
			$$ = &TupleExpr{
				Start: $1,
				List: $2,
				Comma: $<comma>2,
				End: End{Pos: $3},
				ForceCompact: forceCompact($1, $2, $3),
				ForceMultiLine: forceMultiLine($1, $2, $3),
			}
		}
	}
|	'-' primary_expr  %prec _UNARY { $$ = unary($1, $<tok>1, $2) }

expr:
	primary_expr
|	_LAMBDA exprs ':' expr
	{
		$$ = &LambdaExpr{
			Lambda: $1,
			Var: $2,
			Colon: $3,
			Expr: $4,
		}
	}
|	_NOT expr %prec _UNARY { $$ = unary($1, $<tok>1, $2) }
|	'*' expr  %prec _UNARY { $$ = unary($1, $<tok>1, $2) }
|	expr '*' expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr '%' expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr '/' expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr '+' expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr '-' expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr '<' expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr '>' expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr _EQ expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr _LE expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr _NE expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr _GE expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr '=' expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr _AUGM expr    { $$ = binary($1, $2, $<tok>2, $3) }
|	expr _IN expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr _NOT _IN expr { $$ = binary($1, $2, "not in", $4) }
|	expr _OR expr      { $$ = binary($1, $2, $<tok>2, $3) }
|	expr _AND expr     { $$ = binary($1, $2, $<tok>2, $3) }
|	expr _IS expr
	{
		if b, ok := $3.(*UnaryExpr); ok && b.Op == "not" {
			$$ = binary($1, $2, "is not", b.X)
		} else {
			$$ = binary($1, $2, $<tok>2, $3)
		}
	}
| expr _IF expr _ELSE expr
	{
		$$ = &ConditionalExpr{
			Then: $1,
			IfStart: $2,
			Test: $3,
			ElseStart: $4,
			Else: $5,
		}
	}

expr_opt:
	{
		$$ = nil
	}
|	expr

// comma_opt is an optional comma. If the comma is present,
// the rule's value is the position of the comma. Otherwise
// the rule's value is the zero position. Tracking this
// lets us distinguish (x) and (x,).
comma_opt:
	{
		$$ = Position{}
	}
|	','

keyvalue:
	expr ':' expr  {
		$$ = &KeyValueExpr{
			Key: $1,
			Colon: $2,
			Value: $3,
		}
	}

keyvalues_no_comma:
	keyvalue
	{
		$$ = []Expr{$1}
	}
|	keyvalues_no_comma ',' keyvalue
	{
		$$ = append($1, $3)
	}

keyvalues:
	keyvalues_no_comma
	{
		$$ = $1
	}
|	keyvalues_no_comma ','
	{
		$$ = $1
	}

exprs:
	expr
	{
		$$ = []Expr{$1}
	}
|	exprs ',' expr
	{
		$$ = append($1, $3)
	}

exprs_opt:
	{
		$$, $<comma>$ = nil, Position{}
	}
|	exprs comma_opt
	{
		$$, $<comma>$ = $1, $2
	}

primary_exprs:
	primary_expr
	{
		$$ = []Expr{$1}
	}
|	primary_exprs ',' primary_expr
	{
		$$ = append($1, $3)
	}

string:
	_STRING
	{
		$$ = &StringExpr{
			Start: $1,
			Value: $<str>1,
			TripleQuote: $<triple>1,
			End: $1.add($<tok>1),
			Token: $<tok>1,
		}
	}

strings:
	string
	{
		$$ = []*StringExpr{$1}
	}
|	strings string
	{
		$$ = append($1, $2)
	}

ident:
	_IDENT
	{
		$$ = &LiteralExpr{Start: $1, Token: $<tok>1}
	}

for_clause:
	_FOR primary_exprs _IN expr
	{
		$$ = &ForClause{
			For: $1,
			Var: $2,
			In: $3,
			Expr: $4,
		}
	}

for_clause_with_if_clauses_opt:
	for_clause if_clauses_opt {
		$$ = &ForClauseWithIfClausesOpt{
			For: $1,
			Ifs: $2,
		}
	}

for_clauses_with_if_clauses_opt:
	for_clause_with_if_clauses_opt
	{
		$$ = []*ForClauseWithIfClausesOpt{$1}
	}
| for_clauses_with_if_clauses_opt for_clause_with_if_clauses_opt {
		$$ = append($1, $2)
	}

if_clauses_opt:
	{
		$$ = nil
	}
|	if_clauses_opt _IF expr
	{
		$$ = append($1, &IfClause{
			If: $2,
			Cond: $3,
		})
	}

%%

// Go helper code.

// unary returns a unary expression with the given
// position, operator, and subexpression.
func unary(pos Position, op string, x Expr) Expr {
	return &UnaryExpr{
		OpStart: pos,
		Op:      op,
		X:       x,
	}
}

// binary returns a binary expression with the given
// operands, position, and operator.
func binary(x Expr, pos Position, op string, y Expr) Expr {
	_, xend := x.Span()
	ystart, _ := y.Span()
	return &BinaryExpr{
		X:       x,
		OpStart: pos,
		Op:      op,
		LineBreak: xend.Line < ystart.Line,
		Y:       y,
	}
}

// isSimpleExpression returns whether an expression is simple and allowed to exist in
// compact forms of sequences.
// The formal criteria are the following: an expression is considered simple if it's
// a literal (variable, string or a number), a literal with a unary operator or an empty sequence.
func isSimpleExpression(expr *Expr) bool {
	switch x := (*expr).(type) {
	case *LiteralExpr, *StringExpr:
		return true
	case *UnaryExpr:
		_, ok := x.X.(*LiteralExpr)
		return ok
	case *ListExpr:
		return len(x.List) == 0
	case *TupleExpr:
		return len(x.List) == 0
	case *DictExpr:
		return len(x.List) == 0
	case *SetExpr:
		return len(x.List) == 0
	default:
		return false
	}
}

// forceCompact returns the setting for the ForceCompact field for a call or tuple.
//
// NOTE 1: The field is called ForceCompact, not ForceSingleLine,
// because it only affects the formatting associated with the call or tuple syntax,
// not the formatting of the arguments. For example:
//
//	call([
//		1,
//		2,
//		3,
//	])
//
// is still a compact call even though it runs on multiple lines.
//
// In contrast the multiline form puts a linebreak after the (.
//
//	call(
//		[
//			1,
//			2,
//			3,
//		],
//	)
//
// NOTE 2: Because of NOTE 1, we cannot use start and end on the
// same line as a signal for compact mode: the formatting of an
// embedded list might move the end to a different line, which would
// then look different on rereading and cause buildifier not to be
// idempotent. Instead, we have to look at properties guaranteed
// to be preserved by the reformatting, namely that the opening
// paren and the first expression are on the same line and that
// each subsequent expression begins on the same line as the last
// one ended (no line breaks after comma).
func forceCompact(start Position, list []Expr, end Position) bool {
	if len(list) <= 1 {
		// The call or tuple will probably be compact anyway; don't force it.
		return false
	}

	// If there are any named arguments or non-string, non-literal
	// arguments, cannot force compact mode.
	line := start.Line
	for _, x := range list {
		start, end := x.Span()
		if start.Line != line {
			return false
		}
		line = end.Line
		if !isSimpleExpression(&x) {
			return false
		}
	}
	return end.Line == line
}

// forceMultiLine returns the setting for the ForceMultiLine field.
func forceMultiLine(start Position, list []Expr, end Position) bool {
	if len(list) > 1 {
		// The call will be multiline anyway, because it has multiple elements. Don't force it.
		return false
	}

	if len(list) == 0 {
		// Empty list: use position of brackets.
		return start.Line != end.Line
	}

	// Single-element list.
	// Check whether opening bracket is on different line than beginning of
	// element, or closing bracket is on different line than end of element.
	elemStart, elemEnd := list[0].Span()
	return start.Line != elemStart.Line || end.Line != elemEnd.Line
}