package owl

  1. Overview
  2. Docs
Legend:
Library
Module
Module type
Parameter
Class
Class type

NLP: Corpus module

type t = {
  1. mutable uri : string;
  2. mutable bin_ofs : int array;
  3. mutable tok_ofs : int array;
  4. mutable bin_fh : in_channel option;
  5. mutable tok_fh : in_channel option;
  6. mutable vocab : Owl_nlp_vocabulary.t option;
  7. mutable minlen : int;
  8. mutable docid : int array;
}
val _close_if_open : in_channel option -> unit
val _open_if_exists : string -> in_channel option
val cleanup : t -> unit
val create : string -> int array -> int array -> in_channel option -> in_channel option -> Owl_nlp_vocabulary.t option -> int -> int array -> t
val get_uri : t -> string
val get_bin_uri : t -> string
val get_bin_fh : t -> in_channel
val get_tok_uri : t -> string
val get_tok_fh : t -> in_channel
val get_vocab_uri : t -> string
val get_vocab : t -> Owl_nlp_vocabulary.t
val get_docid : t -> int array
val length : t -> int
val next : t -> string
val next_tok : t -> int array
val iteri : (int -> 'a -> 'b) -> t -> unit
val iteri_tok : (int -> 'a -> 'b) -> t -> unit
val mapi : (int -> 'a -> 'b) -> t -> 'b array
val mapi_tok : (int -> 'a -> 'b) -> t -> 'b array
val get : t -> int -> string
val get_tok : t -> int -> int array
val reset_iterators : t -> unit
val next_batch : ?size:int -> t -> string array
val next_batch_tok : ?size:int -> t -> int array array
val tokenise : t -> string -> int array
val build : ?docid:int array -> ?stopwords:(string, 'a) Hashtbl.t -> ?lo:float -> ?hi:float -> ?vocab:Owl_nlp_vocabulary.t -> ?minlen:int -> string -> t
val unique : string -> string -> int array
val simple_process : string -> string
val preprocess : (string -> bytes) -> string -> string -> unit
val reduce_model : t -> t
val save : t -> string -> unit
val load : string -> t
val save_txt : t -> string -> unit
val to_string : t -> string
val print : t -> unit