# Tcl prototype of a next-generation concise expression evaluator '='
#
# Bare words are treated as variable references.
# Only numeric and boolean values and operations are supported.

proc deb1 s {}
proc deb2 s {}
# uncomment these for debug output
#proc deb1 s {puts $s}
#proc deb2 s {puts $s}

proc = args {
    set exp [join $args]
    set tokens [tokenise $exp]
    deb1 "TOKENS = '$tokens'"
    set code [compile $tokens]
    deb1 "GENERATED CODE:\n$code\n" 
    uplevel [list ::tcl::unsupported::assemble $code]
}

proc tokenise input {
    set op_re {\*\*|%|/|\*|-|\+|>>|<<|>=|<=|>|<|!=|==|&&|&|\|\||\||\^|::|:|\?|,|!|~|\(|\)}
    set pos 0
    set output {}
    foreach ind [regexp -indices -all -inline -- $op_re $input] {
        lassign $ind start end
        set prev [string trim [string range $input $pos $start-1]]
        if {$prev ne {}} {lappend output $prev}
        lappend output [string range $input $start $end]
        set pos [expr {$end + 1}]
    }
    set rest [string trim [string range $input $pos end]]
    if {$rest ne {}} {lappend output $rest}
    return $output
}

proc compile toks {

    if {[llength $toks] == 0} {error "Calc: nothing to calculate"}
    set ::tokens $toks
    set ::tokpos 0
    set ::depth 0
    return [parse 0] 
}

# Pratt Parser loosely based on https://www.rosettacode.org/wiki/Arithmetic_evaluation#Nim

# Define infix operators, their precedences and bytecodes
foreach {op prec code} {
    )  0 -
    ,  0 -
    ?  1 -
    :  1 -
    || 2 lor
    && 3 land
    |  4 bitor
    ^  5 bitxor
    &  6 bitand
    == 7 eq
    != 7 neq
    <  8 lt
    >  8 gt
    <= 8 le
    >= 8 ge
    << 9 lshift
    >> 9 rshift
    +  10 add
    -  10 sub
    *  11 mult
    /  11 div
    %  11 mod
    ** 12 expon
} {
    set inprec($op) $prec
    set incode($op) $code
}

# Define prefix operators and their bytecodes
foreach {op code} {
    + uplus
    - uminus
    ! not
    ~ bitnot
} {
    set precode($op) $code
}
# Prefix ops all have the same precedence
set preprec 13

# Parse expression until we hit an operator with precedence lower than min_prec.
# The expression is supplied as a list of tokens in the global var tokens.
# The current position in the input is in global var tokpos.
# Returns the TAL bytecode to evaluate the expression.
proc parse min_prec {

    set token [lindex $::tokens $::tokpos]
    set dep [incr ::depth]
    deb2 "[string repeat {  } $dep]PARSE min_prec=$min_prec tokpos=$::tokpos token='$token'"
    incr ::tokpos
    set opcodes [parsePrefix $token]
    set ::depth $dep

    while {$::tokpos < [llength $::tokens]} {
        
        set token [lindex $::tokens $::tokpos]
        if {[info exists ::inprec($token)]} {
            set tok_prec $::inprec($token)
        } else {
            error "Calc: expected operator but found '$token'"
        }
        deb2 "[string repeat {  } $dep]PARSE token=$token tok_prec=$tok_prec"
        if {$tok_prec < $min_prec} {
            break
        }
	# Binary ops are left-associative except for **
        if {$tok_prec == $min_prec && $token ne "**"} {
            break
        }
        # if-then-else needs special handling
        incr ::tokpos
        if {$token eq "?"} {
            append opcodes [parseTernary]
            continue
	}
        # Infix operator
        append opcodes [parse $tok_prec] "$::incode($token); "
    }
    deb2 "[string repeat {  } $dep]PARSE opcodes='$opcodes'"
    set ::depth [expr {$dep - 1}]
    return $opcodes
}

# Parse expression up to the first operator at the same level of parentheses.
# Returns the bytecode to evaluate the subexpression.
proc parsePrefix token {
    set dep [incr ::depth]
    deb2 "[string repeat {  } $dep]PARSEPREFIX token=`$token` tokpos=$::tokpos"

    # Is it a number? In C might use Tcl_GetNumberFromObj() here
    if {[string is entier $token] || [string is double $token]} {
        return "push $token; "
    }
    # Is it boolean? In C might use Tcl_GetBoolean() here
    if {[string is boolean $token]} {
        return "push $token; "
    }
    # Parenthesised subexpression?
    if {$token eq "("} {
        set opcodes [parse 0]
        set token [lindex $::tokens $::tokpos]
        if {$token eq ")"} {
            incr ::tokpos
            return $opcodes
	}
        error "Calc: expected ')' but found '$token'"
    }
    # Unary operator?
    if {$token in {+ - ! ~}} {
        return "[parse $::preprec]$::precode($token); "
    }
    # Function call?
    set nexttok [lindex $::tokens $::tokpos]
    if {$nexttok eq "(" && [string is alpha $token]} {
        set fun [namespace which tcl::mathfunc::$token]
        if {$fun ne {}} {
            incr ::tokpos
            set opcodes "push $fun; "
            append opcodes [parseFuncArgs]
            return $opcodes
        }
    }
    # Variable reference?
    set name {}
    while {$token eq {::} || [regexp {^[[:alpha:]][[:alnum:]_]*$} $token]} {
        append name $token
        set token [lindex $::tokens $::tokpos]
        incr ::tokpos
    }
    if {$name ne {}} {
        incr ::tokpos -1
        set opcodes "push $name; "
        append opcodes "loadStk; "
        return $opcodes
    }
    error "Calc: expected start of expression but found '$token'"
}

# Parse zero or more arguments to a math function. The arguments are
# expressions separated by commas and terminated by a closing parenthesis.
# Returns the bytecode to evaluate the arguments and call the function.
proc parseFuncArgs {} {
    set dep [incr ::depth]
    deb2 "[string repeat {  } $dep]PARSEFUNCARGS tokpos=$::tokpos"

    set token [lindex $::tokens $::tokpos]
    set arg_num 1
    while 1 {
        if {$token eq ")"} {
            incr ::tokpos
            append opcodes "invokeStk $arg_num; "
            return $opcodes
        }
        append opcodes [parse 0]
        incr arg_num

        set token [lindex $::tokens $::tokpos]
        switch $token {
            , { incr ::tokpos }
            ) {}
            default {
                error "Calc: expected ')' or ',' but found '$token'"
            }
        }
    }
}

# We have just seen the '?' of an if-then-else, so parse the rest of that.
# Returns the bytecode to check the previous condition, then evaluate the
# appropriate branch.
proc parseTernary {} {
    set dep [incr ::depth]
    deb2 "[string repeat {  } $dep]PARSETERNARY tokpos=$::tokpos"

    set else else[incr ::labelcount]
    set end end$::labelcount
    append opcodes "jumpFalse $else; [parse $::inprec(:)]"

    set token [lindex $::tokens $::tokpos]
    if {$token ne ":"} {
        error "Calc: expected ':' but found '$token'"
    }
    incr ::tokpos

    append opcodes "jump $end; label $else; [parse $::inprec(:)]"
    append opcodes "label $end; nop; "
    return $opcodes
}