(* CS:3820, Fall 2016 *)
(* Cesare Tinelli *)
(* The University of Iowa *)

(* F# examples seen in class

   Adapted from "Programming Language Concepts" by P. Sestoft, 
   Springer 2012
*)

(* Evaluation, checking, and compilation of object language expressions *)
(* Stack machines for expression evaluation                             *) 

(* Object language expressions with variables and let binders *)

type expr = 
  | CstI of int
  | Var of string
  | Prim of string * expr * expr
  | Let of string * expr * expr   // new


(* Some expressions: *)

//       let z = 17 in z + z
let e1 = Let("z", CstI 17, Prim("+", Var "z", Var "z"))

//       let z = 17 in (let z = 22 in 100 * z) + z
let e2 = Let("z", CstI 17, 
             Prim("+", Let("z", CstI 22, Prim("*", CstI 100, Var "z")),
                       Var "z"))

//       let z = 5 - 4 in 100 * z
let e3 = Let("z", Prim("-", CstI 5, CstI 4), 
             Prim("*", CstI 100, Var "z"))

//       (20 + (let z = 17 in z + 2)) + 30
let e4 = Prim("+", Prim("+", CstI 20, Let("z", CstI 17, 
                                          Prim("+", Var "z", CstI 2))),
                   CstI 30)

//       2 * (let x = 3 in x + 4)
let e5 = Prim("*", CstI 2, Let("x", CstI 3, Prim("+", Var "x", CstI 4)))


(* ---------------------------------------------------------------------- *)
 
(* Compilation to target expressions with numerical indexes instead of
   symbolic variable names.  *)

(* Why compilation?

* Better correctness and safety. The compiler can:
  – check that all names are defined: classes, methods, fields, variables, types, functions, ...
  – check that the names have the correct type
  – check that it is legal to refer to them (not private etc)
  – improve the code, e.g. inline calls to private methods

* Better performance
  – The compiler checks are performed once, but the machine code gets executed again and again

* Why not compilation?
  – Compilation reduces flexibility by imposing static type
    checks and static name binding
  – Web programming often requires more flexibility
  – ... hence PHP, Python, Ruby, JavaScript, VB.NET, ...
*)


(* target expressions *)
type texpr =
  | TCstI of int
  | TVar of int                      // index into runtime environment
  | TLet of texpr * texpr            // rhs and body                
  | TPrim of string * texpr * texpr


// replacing variable names with indexes

// (let x = 5 in x + (let y = 3 in x + y))


// (let x = 5 in x   + (let y = 3 in x   + y  ))

// (let     5 in v_0 + (let 3     in v_1 + v_0))


// (let x = 5 in 
//    x   + (let y = 3 in x   + y  ) + (let z = 4 in x   + z  ))
//
// (let     5 in 
//    v_0 + (let 3     in v_1 + v_0) + (let     4 in v_1 + v_0))


(* Map variable name to variable index at compile-time *)

let rec getIndex vars x = 
  match vars with 
  | []                -> failwith "Variable not found"
  | y :: _ when x = y -> 0
  | y :: vars'        -> 1 + (getIndex vars' x)


(* Compiling from expr to texpr *)

let rec tcomp (e : expr) (cenv : string list) : texpr =
  match e with
  | CstI i           -> TCstI i
  | Var x            -> TVar (getIndex cenv x)
  | Prim(op, e1, e2) -> TPrim (op, tcomp e1 cenv, tcomp e2 cenv)
  | Let(x, e1, e2)   -> TLet (tcomp e1 cenv, tcomp e2 (x :: cenv))


// let z = 17 in z + z
e1
tcomp e1 []

//       let z = 5 in z * (let y = 3 in z + y)
let e6 = Let ("x", CstI 5, Prim ("*", Var "x", Let ("y", CstI 3, Prim ("+", Var "x", Var "y"))))

tcomp e6 []


//       let x = 5 in 
//         x + ((let y = 3 in x + y) - (let z = 4 in x + z))
let e7 = Let ("x", CstI 5, 
           Prim ("+", Var "x",
                      Prim ("-", Let ("y", CstI 3, Prim ("+", Var "x", Var "y")),
                                 Let ("z", CstI 4, Prim ("+", Var "x", Var "z")))))

tcomp e7 []


(* Evaluation of target expressions with variable indexes.  *)

// The run-time environment renv is a list of variable values (ints)

let rec teval (e : texpr) (renv : int list) : int =
  match e with
  | TCstI i -> i
  | TVar n  -> List.item n renv
  | TPrim ("+", e1, e2) -> (teval e1 renv) + (teval e2 renv)
  | TPrim ("*", e1, e2) -> (teval e1 renv) * (teval e2 renv)
  | TPrim ("-", e1, e2) -> (teval e1 renv) - (teval e2 renv)
  | TPrim _            -> failwith "unknown primitive"
  | TLet (e1, e2) -> 
    let v = teval e1 renv
    let renv' = v :: renv 
    teval e2 renv'


(* Correctness:  for all e,  (eval e []) = (teval (tcomp e []) [])   *)


(* ---------------------------------------------------------------------- *)

(* Stack machines *)

(*
   Consider expression: (2 * 3) + (~4 * 5)   (with ~ unary minus)


   Write it in postfix: 2 3 * 4 ~ 5 * +

   This is sequential code for a stack machine:


   Instructions       Stack content
   --------------------------------               
   2 3 * 4 ~ 5 * +    (empty)
   3 * 4 ~ 5 * +      2
   * 4 ~ 5 * +        2 3
   4 ~ 5 * +          6
   ~ 5 * +            6 4
   5 * +              6 -4
   * +                6 -4 5
   +                  6 -20
   (empty)            -14
*)


(*
Exercise:

  What is the postfix of 

  2 * 3 + 4
  2 + 3 * 4
  2 * (3 + 4) 
  2 - 3 - 4 - 5
  2 - (3 - (4 - 5))
  2 + 3 * 4 / 5

  Evaluate the postfix versions using a stack
*)


(*
Instruction  Stack before  Stack after    Effect
RCSTI n      s             s, n           Push const
RADD         s, n1, n2     s, n1+n2       Add
RSUB         s, n1, n2     s, n1-n2       Subtract
RMUL         s, n1, n2     s, n1*n2       Multiply
RNEG         s, n          s, -n          Negate
RDUP         s, n          s, n, n        Duplicate top elem
RSWAP        s, n1, n2     s, n2, n1      Swap

*)

(* Stack machine instructions  *)

type rinstr =
  | RCstI of int
  | RAdd 
  | RSub
  | RMul 
  | RNeg
  | RDup
  | RSwap


(* A simple stack machine for evaluation of variable-free expressions
   in postfix form *)

let rec reval (inss : rinstr list) (stack : int list) : int =
  match (inss, stack) with 
  | (RCstI n :: ris,  s            )  ->  reval ris (n :: s)
  | (RAdd    :: ris,  n2 :: n1 :: s)  ->  reval ris ((n1 + n2) :: s)
  | (RSub    :: ris,  n2 :: n1 :: s)  ->  reval ris ((n1 - n2) :: s)
  | (RMul    :: ris,  n2 :: n1 :: s)  ->  reval ris ((n1 * n2) :: s)
  | (RNeg    :: ris,        n1 :: s)  ->  reval ris (-n1 :: s)
  | (RDup    :: ris,        n1 :: s)  ->  reval ris (n1 :: n1 :: s)
  | (RSwap   :: ris,  n2 :: n1 :: s)  ->  reval ris (n1 :: n2 :: s)
  | ([]            ,  n  :: _      )  ->  n
  | ([]            ,  []           )  -> failwith "reval: no result on stack!"
  | _ -> failwith "reval: too few operands on stack"


let rpn1 = reval [RCstI 10; RCstI 3; RDup; RMul; RAdd] []


(* Compilation of variable-free expressions to rinstr lists *)

let rec rcomp (e : expr) : rinstr list =
  match e with
  | CstI n             -> [RCstI n]
  | Prim ("+", e1, e2) -> (rcomp e1) @ (rcomp e2) @ [RAdd]
  | Prim ("*", e1, e2) -> (rcomp e1) @ (rcomp e2) @ [RMul]
  | Prim ("-", e1, e2) -> (rcomp e1) @ (rcomp e2) @ [RSub]
  | Prim _             -> failwith "unknown primitive"
  | Var _              -> failwith "rcomp cannot compile Var"
  | Let _              -> failwith "rcomp cannot compile Let"
            

// (2 * 3) + (4 * 5)

let e8 = Prim ("+", Prim ("*", CstI 2, CstI 3), Prim ("*", CstI 4, CstI 5))

let r8 = rcomp e8

reval r8 []

(* Correctness: eval e []  equals  reval (rcomp e) [] *)


(*
Exercise:

  Manually convert each of the following expression in abstract syntax
  in a term of type expr. Then manually convert each of those terms
  into the corresponding list of stack machine instructions.
  Finally, manually evaluate each of those lists.

  2 * 3 + 4
  2 + 3 * 4
  2 * (3 + 4) 
  2 - 3 - 4 - 5
  2 - (3 - (4 - 5))
  2 + 3 * 4 / 5

  Check all your intermediate results using rcomp and reval
*)


(* Compiling expressions with let binders *)

(*  Main idea: store variable values in the stack! 

So stack contains mixture of 
- intermediate results (as before)
- values of bound variables

To get a variable's value, index off the stack top

*)


type sinstr =
  | SCstI of int    // push integer
  | SVar of int     // index into stack
  | SAdd            // pop args, push sum
  | SSub            // pop args, push difference
  | SMul            // pop args, push product
  | SDup            // push top
  | SPop            // pop top value/unbind var 
  | SSwap           // exchange top and next
 
(*  Example:    2 * (let x = 3 in x + 4)

Instructions                                                  Stack content
---------------------------------------------------------------------------
SCstI 2; SCstI 3; Svar 0; SCstI 4; SAdd; SSwap; SPop; SMul    
         SCstI 3; Svar 0; SCstI 4; SAdd; SSwap; SPop; SMul    2
                  Svar 0; SCstI 4; SAdd; SSwap; SPop; SMul    2 3 
                          SCstI 4; SAdd; SSwap; SPop; SMul    2 3 3
                                   SAdd; SSwap; SPop; SMul    2 3 3 4
                                         SSwap; SPop; SMul    2 3 7
                                                SPop; SMul    2 7 3
                                                      SMul    2 7
                                                              14
*)


(* Storing intermediate results and variable bindings in the same stack *)

let rec seval (inss : sinstr list) (stack : int list) : int =
  match (inss, stack) with 
  | (SCstI n :: ris,  s            )  ->  seval ris (n :: s)
  | (SVar n  :: ris,  s            )  ->  seval ris ((List.item n s) :: s)  // *****
  | (SAdd    :: ris,  n2 :: n1 :: s)  ->  seval ris ((n1 + n2) :: s)
  | (SSub    :: ris,  n2 :: n1 :: s)  ->  seval ris ((n1 - n2) :: s)
  | (SMul    :: ris,  n2 :: n1 :: s)  ->  seval ris ((n1 * n2) :: s)
  | (SDup    :: ris,        n1 :: s)  ->  seval ris (n1 :: n1 :: s)
  | (SPop    :: ris,         _ :: s)  ->  seval ris s                      // *****
  | (SSwap   :: ris,  n2 :: n1 :: s)  ->  seval ris (n1 :: n2 :: s)
  | ([]            ,  n  :: _      )  ->  n
  | ([]            ,  []           )  -> failwith "seval: no result on stack!"
  | _ -> failwith "seval: too few operands on stack"


(* Compiling to stack machine code *)

let scompOp (x : string) : sinstr =
  match x with 
  | "+" -> SAdd
  | "-" -> SSub
  | "*" -> SMul
  | _ -> failwith "unrecognized operator"


(* 
A compile-time environment keeps track of variable positions in the stack

It is an abstraction of the run-time stack
*)
type stackValue =
  | Value             // A computed value
  | Bound of string   // A bound variable


(* Compilation to a list of instructions for a unified-stack machine *)

let rec scomp (e : expr) (cenv : stackValue list) : sinstr list =
  match e with
  | Let(x, e1, e2) -> 
    let l1 = scomp e1 cenv in
    let l2 = scomp e2 (Bound x :: cenv) in
    l1 @ l2 @ [SSwap; SPop]
  | Var x  -> 
    [SVar (getIndex cenv (Bound x))]
  | CstI n -> 
    [SCstI n]
  | Prim (op, e1, e2) -> 
    let l1 = scomp e1 cenv in
    let l2 = scomp e2 (Value :: cenv) in
    l1 @ l2 @ [scompOp op] 


e1 // let z = 17 in z + z
let s1 = scomp e1 []
seval s1 []

e2 // let z = 17 in (let z = 22 in 100 * z) + z 
let s2 = scomp e2 []
seval s2 []

e5 // 2 * (let x = 3 in x + 4)
let s5 = scomp e5 []
seval s5 []


(* compilation to "byte code" *)

let rec byteCode (l : sinstr list) : string =
  match l with
  | []              ->  ""
  | (SCstI n) :: t  ->  "0 " + string n + " " + (byteCode t)
  | (SVar n)  :: t  ->  "1 " + string n + " " + (byteCode t)
  | SAdd      :: t  ->  "3 " + (byteCode t)
  | SSub      :: t  ->  "4 " + (byteCode t) 
  | SMul      :: t  ->  "5 " + (byteCode t)
  | SDup      :: t  ->  "6 " + (byteCode t)
  | SPop      :: t  ->  "7 " + (byteCode t) 
  | SSwap     :: t  ->  "8 " + (byteCode t)

(* Output the integers in list inss to the text file called fname: *)

let intsToFile (inss : sinstr list) (fname : string) = 
    let text = byteCode inss in
    System.IO.File.WriteAllText(fname, text)


intsToFile s2 "exec.txt"