package catala

  1. Overview
  2. Docs
Legend:
Library
Module
Module type
Parameter
Class
Class type
module Runtime = Runtime_ocaml.Runtime
module ModuleName = Catala_utils.Uid.Module
module ScopeName : sig ... end
module TopdefName : sig ... end
module StructName : sig ... end
module StructField : sig ... end
module EnumName : sig ... end
module EnumConstructor : sig ... end

Only used by surface

module RuleName : sig ... end
module LabelName : sig ... end

Used for unresolved structs/maps in desugared

module Ident = Catala_utils.String

Only used by desugared/scopelang

module ScopeVar : sig ... end
module SubScopeName : sig ... end
type scope_var_or_subscope =
  1. | ScopeVar of ScopeVar.t
  2. | SubScope of SubScopeName.t * ScopeName.t
module StateName : sig ... end

Abstract syntax tree

Define a common base type for the expressions in most passes of the compiler

Phantom types used to select relevant cases on the generic AST

we instantiate them with a polymorphic variant to take advantage of sub-typing. The values aren't actually used.

These types allow to select the features present in any given expression type

type yes =
  1. | Yes
type no =
  1. | No
    (*

    Phantom types used in the definitions below. We don't make them abstract, because the typer needs to know that their intersection is empty.

    *)
type desugared = < monomorphic : yes ; polymorphic : yes ; overloaded : yes ; resolved : no ; syntacticNames : yes ; scopeVarStates : yes ; scopeVarSimpl : no ; explicitScopes : yes ; assertions : no ; defaultTerms : yes ; exceptions : no ; custom : no >
type scopelang = < monomorphic : yes ; polymorphic : yes ; overloaded : no ; resolved : yes ; syntacticNames : no ; scopeVarStates : no ; scopeVarSimpl : yes ; explicitScopes : yes ; assertions : no ; defaultTerms : yes ; exceptions : no ; custom : no >
type dcalc = < monomorphic : yes ; polymorphic : yes ; overloaded : no ; resolved : yes ; syntacticNames : no ; scopeVarStates : no ; scopeVarSimpl : no ; explicitScopes : no ; assertions : yes ; defaultTerms : yes ; exceptions : no ; custom : no >
type lcalc = < monomorphic : yes ; polymorphic : yes ; overloaded : no ; resolved : yes ; syntacticNames : no ; scopeVarStates : no ; scopeVarSimpl : no ; explicitScopes : no ; assertions : yes ; defaultTerms : no ; exceptions : yes ; custom : no >
type 'a any = < .. > as 'a

'a any is 'a, but adds the constraint that it should be restricted to valid AST kinds

type dcalc_lcalc_features = < monomorphic : yes ; polymorphic : yes ; overloaded : no ; resolved : yes ; syntacticNames : no ; scopeVarStates : no ; scopeVarSimpl : no ; explicitScopes : no ; assertions : yes >

Features that are common to Dcalc and Lcalc

type ('a, 'b) dcalc_lcalc = < dcalc_lcalc_features ; defaultTerms : 'a ; exceptions : 'b ; custom : no >

This type regroups Dcalc and Lcalc ASTs.

type ('a, 'b, 'c) interpr_kind = < dcalc_lcalc_features ; defaultTerms : 'a ; exceptions : 'b ; custom : 'c >

This type corresponds to the types handled by the interpreter: it regroups Dcalc and Lcalc ASTs and may have custom terms

Types

type typ_lit =
  1. | TBool
  2. | TUnit
  3. | TInt
  4. | TRat
  5. | TMoney
  6. | TDate
  7. | TDuration
and naked_typ =
  1. | TLit of typ_lit
  2. | TTuple of typ list
  3. | TStruct of StructName.t
  4. | TEnum of EnumName.t
  5. | TOption of typ
  6. | TArrow of typ list * typ
  7. | TArray of typ
  8. | TDefault of typ
  9. | TAny
  10. | TClosureEnv
    (*

    Hides an existential type needed for closure conversion

    *)

Constants and operators

type date = Runtime.date
type date_rounding = Runtime.date_rounding
type duration = Runtime.duration
type var_def_log = {
  1. log_typ : naked_typ;
  2. log_io_input : Runtime.io_input;
  3. log_io_output : bool;
}
type log_entry =
  1. | VarDef of var_def_log
    (*

    During code generation, we need to know the type of the variable being logged for embedding as well as its I/O properties.

    *)
  2. | BeginCall
  3. | EndCall
  4. | PosRecordIfTrueBool
module Op : sig ... end

Classification of operators on how they should be typed

type 'a operator = 'a Op.t
type except =
  1. | ConflictError
  2. | EmptyError
  3. | NoValueProvided
  4. | Crash

Markings

type untyped = {
  1. pos : Catala_utils.Pos.t;
}
type typed = {
  1. pos : Catala_utils.Pos.t;
  2. ty : typ;
}
type 'a custom = {
  1. pos : Catala_utils.Pos.t;
  2. custom : 'a;
}
type nil = |

Using empty markings will ensure terms can't be constructed: used for example in interfaces to ensure that they don't contain any expressions

type _ mark =
  1. | Untyped : untyped -> untyped mark
  2. | Typed : typed -> typed mark
  3. | Custom : 'a custom -> 'a custom mark

The generic type of AST markings. Using a GADT allows functions to be polymorphic in the marking, but still do transformations on types when appropriate. The Custom case can be used within passes that need to store specific information, e.g. typing

type ('a, 'm) marked = ('a, 'm mark) Catala_utils.Mark.ed

Type of values marked with the above standard mark GADT

Generic expressions

Define a common base type for the expressions in most passes of the compiler

type lit =
  1. | LBool of bool
  2. | LInt of Runtime.integer
  3. | LRat of Runtime.decimal
  4. | LMoney of Runtime.money
  5. | LUnit
  6. | LDate of date
  7. | LDuration of duration

Literals are the same throughout compilation except for the LEmptyError case which is eliminated midway through.

type external_ref =
  1. | External_value of TopdefName.t
  2. | External_scope of ScopeName.t

External references are resolved to strings that point to functions or constants in the end, but we need to keep different references for typing

type 'a glocation =
  1. | DesugaredScopeVar : {
    1. name : ScopeVar.t Catala_utils.Mark.pos;
    2. state : StateName.t option;
    } -> < scopeVarStates : yes.. > glocation
  2. | ScopelangScopeVar : {
    1. name : ScopeVar.t Catala_utils.Mark.pos;
    } -> < scopeVarSimpl : yes.. > glocation
  3. | SubScopeVar : {
    1. scope : ScopeName.t;
    2. alias : SubScopeName.t Catala_utils.Mark.pos;
    3. var : ScopeVar.t Catala_utils.Mark.pos;
    } -> < explicitScopes : yes.. > glocation
  4. | ToplevelVar : {
    1. name : TopdefName.t Catala_utils.Mark.pos;
    } -> < explicitScopes : yes.. > glocation

Locations are handled differently in desugared and scopelang

type ('a, 'm) gexpr = (('a, 'm) naked_gexpr, 'm) marked
and ('a, 'm) naked_gexpr = ('a, 'a, 'm) base_gexpr

General expressions: groups all expression cases of the different ASTs, and uses a GADT to eliminate irrelevant cases for each one. The 't annotations are also totally unconstrained at this point. The dcalc exprs, for ex ample, are then defined with type naked_expr = dcalc naked_gexpr plus the annotations.

A few tips on using this GADT:

  • To write a function that handles cases from different ASTs, explicit the type variables: fun (type a) (x: a naked_gexpr) -> ...
  • For recursive functions, you may need to additionally explicit the generalisation of the variable: let rec f: type a . a naked_gexpr -> ...
  • Always think of using the pre-defined map/fold functions in Expr rather than completely defining your recursion manually.

The first argument of the base_gexpr type caracterises the "deep" type of the AST, while the second is the shallow type. They are always equal for well-formed AST types, but differentiating them ephemerally allows us to do well-typed recursive transformations on the AST that change its type

and ('a, 'b, 'm) base_gexpr =
  1. | ELit : lit -> ('a, < .. >, 'm) base_gexpr
  2. | EApp : {
    1. f : ('a, 'm) gexpr;
    2. args : ('a, 'm) gexpr list;
      (*

      length may be 1 even if arity > 1 in desugared. scopelang performs detuplification, so length = arity afterwards

      *)
    3. tys : typ list;
      (*

      Set to [] before disambiguation

      *)
    } -> ('a, < .. >, 'm) base_gexpr
  3. | EAppOp : {
    1. op : 'b operator;
    2. args : ('a, 'm) gexpr list;
    3. tys : typ list;
    } -> ('a, < .. > as 'b, 'm) base_gexpr
  4. | EArray : ('a, 'm) gexpr list -> ('a, < .. >, 'm) base_gexpr
  5. | EVar : ('a, 'm) naked_gexpr Bindlib.var -> ('a, _, 'm) base_gexpr
  6. | EAbs : {
    1. binder : (('a, 'a, 'm) base_gexpr, ('a, 'm) gexpr) Bindlib.mbinder;
    2. tys : typ list;
    } -> ('a, < .. >, 'm) base_gexpr
  7. | EIfThenElse : {
    1. cond : ('a, 'm) gexpr;
    2. etrue : ('a, 'm) gexpr;
    3. efalse : ('a, 'm) gexpr;
    } -> ('a, < .. >, 'm) base_gexpr
  8. | EStruct : {
    1. name : StructName.t;
    2. fields : ('a, 'm) gexpr StructField.Map.t;
    } -> ('a, < .. >, 'm) base_gexpr
  9. | EInj : {
    1. name : EnumName.t;
    2. e : ('a, 'm) gexpr;
    3. cons : EnumConstructor.t;
    } -> ('a, < .. >, 'm) base_gexpr
  10. | EMatch : {
    1. name : EnumName.t;
    2. e : ('a, 'm) gexpr;
    3. cases : ('a, 'm) gexpr EnumConstructor.Map.t;
    } -> ('a, < .. >, 'm) base_gexpr
  11. | ETuple : ('a, 'm) gexpr list -> ('a, < .. >, 'm) base_gexpr
  12. | ETupleAccess : {
    1. e : ('a, 'm) gexpr;
    2. index : int;
    3. size : int;
    } -> ('a, < .. >, 'm) base_gexpr
  13. | ELocation : 'b glocation -> ('a, < .. > as 'b, 'm) base_gexpr
  14. | EScopeCall : {
    1. scope : ScopeName.t;
    2. args : ('a, 'm) gexpr ScopeVar.Map.t;
    } -> ('a, < explicitScopes : yes.. >, 'm) base_gexpr
  15. | EDStructAccess : {
    1. name_opt : StructName.t option;
    2. e : ('a, 'm) gexpr;
    3. field : Ident.t;
    } -> ('a, < syntacticNames : yes.. >, 'm) base_gexpr
    (*

    desugared has ambiguous struct fields

    *)
  16. | EStructAccess : {
    1. name : StructName.t;
    2. e : ('a, 'm) gexpr;
    3. field : StructField.t;
    } -> ('a, < .. >, 'm) base_gexpr
    (*

    Resolved struct/enums, after name resolution in desugared

    *)
  17. | EExternal : {
    1. name : external_ref Catala_utils.Mark.pos;
    } -> ('a, < explicitScopes : no.. >, 't) base_gexpr
  18. | EAssert : ('a, 'm) gexpr -> ('a, < assertions : yes.. >, 'm) base_gexpr
  19. | EDefault : {
    1. excepts : ('a, 'm) gexpr list;
    2. just : ('a, 'm) gexpr;
    3. cons : ('a, 'm) gexpr;
    } -> ('a, < defaultTerms : yes.. >, 'm) base_gexpr
  20. | EPureDefault : ('a, 'm) gexpr -> ('a, < defaultTerms : yes.. >, 'm) base_gexpr
    (*

    "return" of a pure term, so that it can be typed as default

    *)
  21. | EEmptyError : ('a, < defaultTerms : yes.. >, 'm) base_gexpr
  22. | EErrorOnEmpty : ('a, 'm) gexpr -> ('a, < defaultTerms : yes.. >, 'm) base_gexpr
  23. | ERaise : except -> ('a, < exceptions : yes.. >, 'm) base_gexpr
  24. | ECatch : {
    1. body : ('a, 'm) gexpr;
    2. exn : except;
    3. handler : ('a, 'm) gexpr;
    } -> ('a, < exceptions : yes.. >, 'm) base_gexpr
  25. | ECustom : {
    1. obj : Obj.t;
    2. targs : typ list;
    3. tret : typ;
    } -> ('a, < custom : yes.. >, 't) base_gexpr
    (*

    A function of the given type, as a runtime OCaml object. The specified types for arguments and result must be the Catala types corresponding to the runtime types of the function.

    *)
type any_expr =
  1. | AnyExpr : ('a, _) gexpr -> any_expr

Useful for errors and printing, for example

type ('a, 'm) boxed_gexpr = (('a, 'm) naked_gexpr Bindlib.box, 'm) marked

The annotation is lifted outside of the box for expressions

type 'e boxed = ('a, 'm) boxed_gexpr constraint 'e = ('a, 'm) gexpr

('a, 'm) gexpr boxed is ('a, 'm) boxed_gexpr. The difference with ('a, 'm) gexpr Bindlib.box is that the annotations is outside of the box, and can therefore be accessed without the need to resolve the box

type ('e, 'b) binder = (('a, 'm) naked_gexpr, 'b) Bindlib.binder constraint 'e = ('a, 'm) gexpr

The expressions use the Bindlib library, based on higher-order abstract syntax

type ('e, 'b) mbinder = (('a, 'm) naked_gexpr, 'b) Bindlib.mbinder constraint 'e = ('a, 'm) gexpr

Higher-level program structure

Constructs scopes and programs on top of expressions. The 'e type parameter throughout is expected to match instances of the gexpr type defined above. Markings are constrained to the mark GADT defined above. Note that this structure is at the moment only relevant for dcalc and lcalc, as scopelang has its own scope structure, as the name implies.

type scope_let_kind =
  1. | DestructuringInputStruct
    (*

    let x = input.field

    *)
  2. | ScopeVarDefinition
    (*

    let x = error_on_empty e

    *)
  3. | SubScopeVarDefinition
    (*

    let s.x = fun _ -> e or let s.x = error_on_empty e for input-only subscope variables.

    *)
  4. | CallingSubScope
    (*

    let result = s ({ x = s.x; y = s.x; ...})

    *)
  5. | DestructuringSubScopeResults
    (*

    let s.x = result.x *

    *)
  6. | Assertion
    (*

    let () = assert e

    *)

This kind annotation signals that the let-binding respects a structural invariant. These invariants concern the shape of the expression in the let-binding, and are documented below.

type 'e scope_let = {
  1. scope_let_kind : scope_let_kind;
  2. scope_let_typ : typ;
  3. scope_let_expr : 'e;
  4. scope_let_next : ('e, 'e scope_body_expr) binder;
  5. scope_let_pos : Catala_utils.Pos.t;
} constraint 'e = ('a any, _) gexpr

This type is parametrized by the expression type so it can be reused in later intermediate representations.

and 'e scope_body_expr =
  1. | Result of 'e
  2. | ScopeLet of 'e scope_let
constraint 'e = ('a any, _) gexpr

A scope let-binding has all the information necessary to make a proper let-binding expression, plus an annotation for the kind of the let-binding that comes from the compilation of a Scopelang.Ast statement.

type 'e scope_body = {
  1. scope_body_input_struct : StructName.t;
  2. scope_body_output_struct : StructName.t;
  3. scope_body_expr : ('e, 'e scope_body_expr) binder;
} constraint 'e = ('a any, _) gexpr

Instead of being a single expression, we give a little more ad-hoc structure to the scope body by decomposing it in an ordered list of let-bindings, and a result expression that uses the let-binded variables. The first binder is the argument of type scope_body_input_struct.

type 'e code_item =
  1. | ScopeDef of ScopeName.t * 'e scope_body
  2. | Topdef of TopdefName.t * typ * 'e
type 'e code_item_list =
  1. | Nil
  2. | Cons of 'e code_item * ('e, 'e code_item_list) binder

A chained list, but with a binder for each element into the next: x := let a = e1 in e2 is thus Cons (e1, {a. Cons (e2, {x. Nil})})

type scope_info = {
  1. in_struct_name : StructName.t;
  2. out_struct_name : StructName.t;
  3. out_struct_fields : StructField.t ScopeVar.Map.t;
}
type module_tree =
  1. | M of module_tree ModuleName.Map.t

In practice, this is a DAG: beware of repeated names

type decl_ctx = {
  1. ctx_enums : enum_ctx;
  2. ctx_structs : struct_ctx;
  3. ctx_scopes : scope_info ScopeName.Map.t;
  4. ctx_topdefs : typ TopdefName.Map.t;
  5. ctx_struct_fields : StructField.t StructName.Map.t Ident.Map.t;
    (*

    needed for disambiguation (desugared -> scope)

    *)
  6. ctx_enum_constrs : EnumConstructor.t EnumName.Map.t Ident.Map.t;
  7. ctx_scope_index : ScopeName.t Ident.Map.t;
    (*

    only used to lookup scopes (in the root module) specified from the cli

    *)
  8. ctx_modules : module_tree;
}
type 'e program = {
  1. decl_ctx : decl_ctx;
  2. code_items : 'e code_item_list;
  3. lang : Catala_utils.Cli.backend_lang;
  4. module_name : ModuleName.t option;
}
module Var : sig ... end
module Qident : sig ... end

This module defines module names and path accesses, used to refer to separate compilation units.

module Type : sig ... end
module Operator : sig ... end
module Expr : sig ... end

Functions handling the expressions of shared_ast

module Scope : sig ... end

Functions handling the code item structures of shared_ast, in particular the scopes

module Program : sig ... end
module Print : sig ... end

Printing functions for the default calculus AST

module Typing : sig ... end

Typing for the default calculus. Because of the error terms, we perform type inference using the classical W algorithm with union-find unification.

module Interpreter : sig ... end

Reference interpreter for the default calculus

module Optimizations : sig ... end

Optimization passes for default calculus and lambda calculus programs and expressions