Update: There is a bug which slipped in all of interpreter, CPS evaluator and CPS compiler: expressions like a() && b() or a() || b() always evaluate both sides. This is wrong, of course, whether or not b() is evaluated depends on the falsy value of a(). The fix is pretty simple (convert || and && binary operations to "if" nodes) but I didn't update these pages yet. Shame on me.

Optimizer

— The devil is in the details —

Optimizing code in CPS is the topic of much academic research. But even without knowing a lot of theory, we can still implement some obvious improvements. For the fib function, the speedup is about 3x versus the previous version:

fib = λ(n) if n < 2 then n else fib(n - 1) + fib(n - 2);
time( λ() println(fib(27)) );

The optimizer recursively walks the AST, like other code we worte, and transforms expressions into simpler ones whenever possible, according to some rules. It works until it finds what mathematicians call a “fixed point”: optimize(ast) == ast. That is, an AST which can no longer be reduced. To achieve this it resets a changes variable to zero before starting, and increments it for every AST reduction. When changes remains zero, we are done.

Examples

The `fib` function

Following you can see the sequence of transformations through which the “fib” function goes. I'm showing JavaScript code to dramatize, but keep in mind that the optimizer receives an AST and returns an AST; make_js is only called once at the end.

The optimizer itself is not very optimized — it could do more on a single pass, but instead I opted to do a single modification at a time, returning the original expression unchanged when changes > 0 (for reasons which I'll detail later).

Initial code, as returned by to_cps.

fib = function β_CC(β_K1, n) {
  GUARD(arguments, β_CC);
  (function β_CC(β_I2) {
    GUARD(arguments, β_CC);
    n < 2 ? β_I2(n) : fib(function β_CC(β_R4) {
      GUARD(arguments, β_CC);
      fib(function β_CC(β_R5) {
        GUARD(arguments, β_CC);
        β_I2(β_R4 + β_R5);
      }, n - 2);
    }, n - 1);
  })(function β_CC(β_R3) {
    GUARD(arguments, β_CC);
    β_K1(β_R3);
  });
};

Unwraps one IIFE. β_I2 (which is the continuation of an "if" node) becomes a variable in the containing function.

fib = function β_CC(β_K1, n) {
  var β_I2;
  GUARD(arguments, β_CC);
  β_I2 = function β_CC(β_R3) {
    GUARD(arguments, β_CC);
    β_K1(β_R3);
  }, n < 2 ? β_I2(n) : fib(function β_CC(β_R4) {
    GUARD(arguments, β_CC);
    fib(function β_CC(β_R5) {
      GUARD(arguments, β_CC);
      β_I2(β_R4 + β_R5);
    }, n - 2);
  }, n - 1);
};

“Tail call optimization”. TCO is not really what happens here, but the net result is the same. It notices that β_I2 is effectively equivalent to β_K1 so it drops one needless closure.

fib = function β_CC(β_K1, n) {
  var β_I2;
  GUARD(arguments, β_CC);
  β_I2 = β_K1, n < 2 ? β_I2(n) : fib(function β_CC(β_R4) {
    GUARD(arguments, β_CC);
    fib(function β_CC(β_R5) {
      GUARD(arguments, β_CC);
      β_I2(β_R4 + β_R5);
    }, n - 2);
  }, n - 1);
};

Variable elimination. Since β_I2 is the same as β_K1, all occurrences of the former are replaced with the latter, and the assignment dropped.

fib = function β_CC(β_K1, n) {
  var β_I2;
  GUARD(arguments, β_CC);
  β_K1, n < 2 ? β_K1(n) : fib(function β_CC(β_R4) {
    GUARD(arguments, β_CC);
    fib(function β_CC(β_R5) {
      GUARD(arguments, β_CC);
      β_K1(β_R4 + β_R5);
    }, n - 2);
  }, n - 1);
};

Drops unneeded code. Since β_I2 is no longer used, the var statement can be dropped. Also, a pointless β_K1 is discarded.

fib = function β_CC(β_K1, n) {
  GUARD(arguments, β_CC);
  n < 2 ? β_K1(n) : fib(function β_CC(β_R4) {
    GUARD(arguments, β_CC);
    fib(function β_CC(β_R5) {
      GUARD(arguments, β_CC);
      β_K1(β_R4 + β_R5);
    }, n - 2);
  }, n - 1);
};

Discard some GUARD calls. When a function immediately calls another function, chances are that the stack will be reset by that next function, so we can afford to not guard, given that our stack nesting limit is pretty small (200 calls).

[XXX: need more testing to make sure this is sound; the speed gain is significant. Also, we lose unlimited recursion, although properly tail-recursive functions will still work.]
```
fib = function β_CC(β_K1, n) {
  GUARD(arguments, β_CC);
  n < 2 ? β_K1(n) : fib(function(β_R4) {
    fib(function(β_R5) {
      β_K1(β_R4 + β_R5);
    }, n - 2);
  }, n - 1);
};
```

No more changes. It reached the fixed point, so it stops there.

fib = function β_CC(β_K1, n) {
  GUARD(arguments, β_CC);
  n < 2 ? β_K1(n) : fib(function(β_R4) {
    fib(function(β_R5) {
      β_K1(β_R4 + β_R5);
    }, n - 2);
  }, n - 1);
};

`let` nodes

I won't show all the intermediate steps on the next examples, but only the initial and final form.

Input code:

(λ(){
  let (a = 2, b = 5) {
    print(a + b);
  };
})();

Transformation:

(function β_CC(β_K1) {
  GUARD(arguments, β_CC);
  (function β_CC(β_K2, a) {
    GUARD(arguments, β_CC);
    (function β_CC(β_K3, b) {
      GUARD(arguments, β_CC);
      print(function β_CC(β_R4) {
        GUARD(arguments, β_CC);
        β_K3(β_R4);
      }, a + b);
    })(function β_CC(β_R5) {
      GUARD(arguments, β_CC);
      β_K2(β_R5);
    }, 5);
  })(function β_CC(β_R6) {
    GUARD(arguments, β_CC);
    β_K1(β_R6);
  }, 2);
})(function β_CC(β_R7) {
  GUARD(arguments, β_CC);
  β_TOPLEVEL(β_R7);
});

→

(function β_CC(β_K1) {
  var a, b;
  GUARD(arguments, β_CC);
  a = 2, b = 5, print(β_K1, a + b);
})(β_TOPLEVEL);

There are no name clashes, so the a and b variables are not renamed. As you can see, to_cps added quite some overhead to a simple let by compiling it to IIFE-s, but the optimizer is able to discard the cruft.

Input code:

(λ(){
  let (a = 2) {
    let (a = 3) {
      print(a);
    };
    print(a);
  };
})();

Transformation:

(function β_CC(β_K1) {
  GUARD(arguments, β_CC);
  (function β_CC(β_K2, a) {
    GUARD(arguments, β_CC);
    (function β_CC(β_K3, a) {
      GUARD(arguments, β_CC);
      print(function β_CC(β_R4) {
        GUARD(arguments, β_CC);
        β_K3(β_R4);
      }, a);
    })(function β_CC(β_R5) {
      GUARD(arguments, β_CC);
      print(function β_CC(β_R6) {
        GUARD(arguments, β_CC);
        β_K2(β_R6);
      }, a);
    }, 3);
  })(function β_CC(β_R7) {
    GUARD(arguments, β_CC);
    β_K1(β_R7);
  }, 2);
})(function β_CC(β_R8) {
  GUARD(arguments, β_CC);
  β_TOPLEVEL(β_R8);
});

→

(function β_CC(β_K1) {
  var a, β_K3, β_a$9;
  GUARD(arguments, β_CC);
  a = 2, β_K3 = function(β_R5) {
    print(β_K1, a);
  }, β_a$9 = 3, print(β_K3, β_a$9);
})(β_TOPLEVEL);

The inner a variable is renamed to β_a$9 in order to avoid the clash with the outer a.

Overview of the implementation

As already stated, the optimizer will descend the AST and simplify nodes, until no more reductions are possible. Unlike the other AST walkers we wrote, this one is “destructive” — it may modify the input AST, instead of creating a fresh one. The key reductions it makes are:

Discard some useless closures: λ(x){ y(x) } → y.
Discard unused variables when possible.
Unwrap IIFE-s into the containing function.
Compute constant expressions: a = 2 + 3 * 4 → a = 14.
As well, if an "if"'s condition is constant, replace the "if" with the appropriate branch.
Discard side-effect-free expressions from "prog" nodes.
Hint "lambda" nodes with unguarded: true whenever possible.

Some optimizations may “cascade” — for instance, after replacing a variable with another, the former might be a candidate for discarding. For this reason it's important to repeat the process after any change was made.

`make_scope`

Because we're messing with the environment (sometimes introducing variables, other times removing them) we need a precise way to figure out the connections between variables. Like, “what are the references to variable x”? I wrote a helper function, make_scope, to provide that. It walks the AST, creating Environment objects as appropriate, and annotates some nodes like this:

"lambda" — since they create a new environment, each "lambda" node will get an env property that points to its environment. Also, "lambda" nodes will get a locs property — an array of variable names “local” to this function (to be defined with var in the final code).
"var" — variable nodes will get an env property which points to the environment where that variable is defined, and a def property which points to a definition object.

The definition object will be stored for each variable in the environment where it is defined, and, for convenience, in the def property of each "var" node pointing to it. It may contain the following:
```
{
  refs: [/* array of "var" nodes pointing to this definition */],
  assigned: integer, // how many times is this var assigned to?
  cont: true or false, // is this a continuation argument?
}
```

This information enables the optimizer to do what it does. For example if a variable is assigned as many times as it is referenced, it means we're not making use of its value anywhere (because each assignment would count one reference) — hence we can drop it. Or, when we need to replace a variable X with another one Y, we can walk X's refs to change the name to “Y” in each reference node.

The locs property that we add to "lambda" nodes will serve as a place for the optimizer to put the variables it unwraps from IIFE-s. make_scope will preserve it if already present (and will take care to define those variables in the environment), or otherwise make it an empty array. The JS code generator (in js_lambda) will include a var statement for those names if they are present (so, mod needed in js_lambda).

My first take was to call make_scope once at the start, and have the optimizer maintain all the env and def extra properties for each change it made, with the advantage that we could do multiple reductions in a pass. But this complicated code so much, I gave up. The current code calls make_scope before each optimizer pass, and turns the optimizer into a no-op after any reduction (that's necessary so that we don't operate with obsolete scope information).

The `optimize` function

As said, it'll call make_scope before entering each optimizer pass, and repeat until no more changes were done. Finally, before returning the optimized expression, it saves the toplevel scope in the env property. This is the entry point:

function optimize(exp) {
    do {
        var changes = 0;
        var defun = exp;
        make_scope(exp);
        exp = opt(exp);
    } while (changes);
    return exp;

    function opt(exp) {
        if (changes) return exp;
        switch (exp.type) {
          case "num"    :
          case "str"    :
          case "bool"   :
          case "var"    : return exp;
          case "binary" : return opt_binary (exp);
          case "assign" : return opt_assign (exp);
          case "if"     : return opt_if     (exp);
          case "prog"   : return opt_prog   (exp);
          case "call"   : return opt_call   (exp);
          case "lambda" : return opt_lambda (exp);
        }
        throw new Error("I don't know how to optimize " + JSON.stringify(exp));
    }

    ... // the opt_* functions here
}

We don't have to care about "let" nodes, since they will have been desugared to IIFE-s after the CPS transformer.

The “trivial” nodes (constants and variables) are returned as they are.

Each opt_* function will have to call opt recursively on members of the current expression.

opt_binary will optimize binary expressions: call opt on left and right nodes, and then check if they are constant — and if so, evaluate the expression and replace with the result.

opt_assign will optimize the left and right nodes too. Then check for potential reductions.

opt_if descends into the cond, then and else. When the condition can be determined (is constant expression) then it returns the then or the else as appropriate.

opt_prog will duplicate some work already done in the CPS transformer, such as discarding expressions that have no side effects; it's useful to do that here because optimizations cascade (so even if to_cps returned tight code, it's possible that after some optimization we end up with a side-effect-free expression here).

opt_call optimizes function calls. In the event the thing we call is an anonymous "lambda" (IIFE), we direct it to opt_iife (see code) which will unwrap that function's arguments and body into the current program. This is probably the trickiest (and most effective) optimization we do.

opt_lambda will optimize a function whose only purpose is to call another function with the same arguments. When that's not the case, it discards unused local variables from locs, and descends the body.

The code

I'm not going to detail on all these functions. Click Show code below to see the whole code (hidden initially as I didn't want the size of the scroll bar to scare you away).

function optimize(exp) {
    var changes, defun;
    do {
        changes = 0;
        make_scope(exp);
        exp = opt(exp);
    } while (changes);
    make_scope(exp);
    return exp;

    function opt(exp) {
        if (changes) return exp;
        switch (exp.type) {
          case "num"    :
          case "str"    :
          case "bool"   :
          case "var"    : return exp;
          case "binary" : return opt_binary (exp);
          case "assign" : return opt_assign (exp);
          case "if"     : return opt_if     (exp);
          case "prog"   : return opt_prog   (exp);
          case "call"   : return opt_call   (exp);
          case "lambda" : return opt_lambda (exp);
        }
        throw new Error("I don't know how to optimize " + JSON.stringify(exp));
    }

    function changed() {
        ++changes;
    }

    function is_constant(exp) {
        return exp.type == "num"
            || exp.type == "str"
            || exp.type == "bool";
    }

    function num(exp) {
        if (exp.type != "num")
            throw new Error("Not a number: " + JSON.stringify(exp));
        return exp.value;
    }

    function div(exp) {
        if (num(exp) == 0)
            throw new Error("Division by zero: " + JSON.stringify(exp));
        return exp.value;
    }

    function opt_binary(exp) {
        exp.left = opt(exp.left);
        exp.right = opt(exp.right);
        if (is_constant(exp.left) && is_constant(exp.right)) {
            switch (exp.operator) {
              case "+":
                changed();
                return {
                    type: "num",
                    value: num(exp.left) + num(exp.right)
                };

              case "-":
                changed();
                return {
                    type: "num",
                    value: num(exp.left) - num(exp.right)
                };

              case "*":
                changed();
                return {
                    type: "num",
                    value: num(exp.left) * num(exp.right)
                };

              case "/":
                changed();
                return {
                    type: "num",
                    value: num(exp.left) / div(exp.right)
                };

              case "%":
                changed();
                return {
                    type: "num",
                    value: num(exp.left) % div(exp.right)
                };

              case "<":
                changed();
                return {
                    type: "bool",
                    value: num(exp.left) < num(exp.right)
                };

              case ">":
                changed();
                return {
                    type: "bool",
                    value: num(exp.left) > num(exp.right)
                };

              case "<=":
                changed();
                return {
                    type: "bool",
                    value: num(exp.left) <= num(exp.right)
                };

              case ">=":
                changed();
                return {
                    type: "bool",
                    value: num(exp.left) >= num(exp.right)
                };

              case "==":
                changed();
                if (exp.left.type != exp.right.type)
                    return FALSE;
                return {
                    type: "bool",
                    value: exp.left.value === exp.right.value
                };

              case "!=":
                changed();
                if (exp.left.type != exp.right.type)
                    return TRUE;
                return {
                    type: "bool",
                    value: exp.left.value !== exp.right.value
                };

              case "||":
                changed();
                if (exp.left.value !== false)
                    return exp.left;
                return exp.right;

              case "&&":
                changed();
                if (exp.left.value !== false)
                    return exp.right;
                return FALSE;
            }
        }
        return exp;
    }

    function opt_assign(exp) {
        if (exp.left.type == "var") {
            if (exp.right.type == "var" && exp.right.def.cont) {
                // the var on the right never changes.  we can safely
                // replace references to exp.left with references to
                // exp.right, saving one var and the assignment.
                changed();
                exp.left.def.refs.forEach(function(node){
                    node.value = exp.right.value;
                });
                return opt(exp.right); // could be needed for the result.
            }
            if (exp.left.def.refs.length == exp.left.def.assigned && exp.left.env.parent) {
                // if assigned as many times as referenced and not a
                // global, it means the var is never used, drop the
                // assignment but keep the right side for possible
                // side effects.
                changed();
                return opt(exp.right);
            }
        }
        exp.left = opt(exp.left);
        exp.right = opt(exp.right);
        return exp;
    }

    function opt_if(exp) {
        exp.cond = opt(exp.cond);
        exp.then = opt(exp.then);
        exp.else = opt(exp.else || FALSE);
        if (is_constant(exp.cond)) {
            changed();
            if (exp.cond.value !== false)
                return exp.then;
            return exp.else;
        }
        return exp;
    }

    function opt_prog(exp) {
        if (exp.prog.length == 0) {
            changed();
            return FALSE;
        }
        if (exp.prog.length == 1) {
            changed();
            return opt(exp.prog[0]);
        }
        if (!has_side_effects(exp.prog[0])) {
            changed();
            return opt({
                type : "prog",
                prog : exp.prog.slice(1)
            });
        }
        if (exp.prog.length == 2) return {
            type: "prog",
            prog: exp.prog.map(opt)
        };
        // normalize
        return opt({
            type: "prog",
            prog: [
                exp.prog[0],
                { type: "prog", prog: exp.prog.slice(1) }
            ]
        });
    }

    function opt_call(exp) {
        // IIFE-s will be optimized away by defining variables in the
        // containing function.  However, we don't unwrap into the
        // global scope (that's why checking for env.parent.parent).
        var func = exp.func;
        if (func.type == "lambda" && !func.name) {
            if (func.env.parent.parent)
                return opt_iife(exp);
            // however, if in global scope we can safely unguard it.
            func.unguarded = true;
        }
        return {
            type : "call",
            func : opt(func),
            args : exp.args.map(opt)
        };
    }

    function opt_lambda(f) {
        // λ(x...) y(x...)  ==>  y
        TCO: if (f.body.type == "call" &&
                 f.body.func.type == "var" &&
                 f.body.func.def.assigned == 0 &&
                 f.body.func.env.parent &&
                 f.vars.indexOf(f.body.func.value) < 0 &&
                 f.vars.length == f.body.args.length) {
            for (var i = 0; i < f.vars.length; ++i) {
                var x = f.body.args[i];
                if (x.type != "var" || x.value != f.vars[i])
                    break TCO;
            }
            changed();
            return opt(f.body.func);
        }
        f.locs = f.locs.filter(function(name){
            var def = f.env.get(name);
            return def.refs.length > 0;
        });
        var save = defun;
        defun = f;
        f.body = opt(f.body);
        if (f.body.type == "call")
            f.unguarded = true;
        defun = save;
        return f;
    }

    // (λ(foo, bar){...body...})(fooval, barval)
    //    ==>
    // foo = fooval, bar = barval, ...body...
    function opt_iife(exp) {
        changed();
        var func = exp.func;
        var argvalues = exp.args.map(opt);
        var body = opt(func.body);
        function rename(name) {
            var sym = name in defun.env.vars ? gensym(name + "$") : name;
            defun.locs.push(sym);
            defun.env.def(sym, true);
            func.env.get(name).refs.forEach(function(ref){
                ref.value = sym;
            });
            return sym;
        }
        var prog = func.vars.map(function(name, i){
            return {
                type     : "assign",
                operator : "=",
                left     : { type: "var", value: rename(name) },
                right    : argvalues[i] || FALSE
            };
        });
        func.locs.forEach(rename);
        prog.push(body);
        return opt({
            type: "prog",
            prog: prog
        });
    }
}

function make_scope(exp) {
    var global = new Environment();
    exp.env = global;
    (function scope(exp, env) {
        switch (exp.type) {
          case "num":
          case "str":
          case "bool":
            break;

          case "var":
            var s = env.lookup(exp.value);
            if (!s) {
                exp.env = global;
                global.def(exp.value, { refs: [], assigned: 0 });
            } else {
                exp.env = s;
            }
            var def = exp.env.get(exp.value);
            def.refs.push(exp);
            exp.def = def;
            break;

          case "assign":
            scope(exp.left, env);
            scope(exp.right, env);
            if (exp.left.type == "var")
                exp.left.def.assigned++;
            break;

          case "binary":
            scope(exp.left, env);
            scope(exp.right, env);
            break;

          case "if":
            scope(exp.cond, env);
            scope(exp.then, env);
            if (exp.else)
                scope(exp.else, env);
            break;

          case "prog":
            exp.prog.forEach(function(exp){
                scope(exp, env);
            });
            break;

          case "call":
            scope(exp.func, env);
            exp.args.forEach(function(exp){
                scope(exp, env);
            });
            break;

          case "lambda":
            exp.env = env = env.extend();
            if (exp.name)
                env.def(exp.name, { refs: [], func: true, assigned: 0 });
            exp.vars.forEach(function(name, i){
                env.def(name, { refs: [], farg: true, assigned: 0, cont: i == 0 });
            });
            if (!exp.locs) exp.locs = [];
            exp.locs.forEach(function(name){
                env.def(name, { refs: [], floc: true, assigned: 0 });
            });
            scope(exp.body, env);
            break;

          default:
            throw new Error("Can't handle node " + JSON.stringify(exp));
        }
    })(exp, global);
    return exp.env;
}

var FALSE = { type: "bool", value: false };
var TRUE = { type: "bool", value: true };

Optimizer

Examples

The fib function

let nodes

Input code:

Input code:

Overview of the implementation

make_scope

The optimize function

The code

How to implement a PL

The `fib` function

`let` nodes

`make_scope`

The `optimize` function