package owl

You can search for identifiers within the package.

in-package search v0.2.0

On This Page

Type definition
Query corpus
Iteration functions
Core functions
I/O functions
Helper functions

package owl

owl
- Owl
  - Arr
  - Mat
- Owl_algodiff
  - D
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Arr
    
    Builder
    
    Aiso
    
    Piso
    
    Siao
    
    Sipo
    
    Siso
    
    Sito
    
    Linalg
    
    Mat
    
    Maths
    
    NN
  - S
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Arr
    
    Builder
    
    Aiso
    
    Piso
    
    Siao
    
    Sipo
    
    Siso
    
    Sito
    
    Linalg
    
    Mat
    
    Maths
    
    NN
- Owl_algodiff_primal_ops
  - D
    
    Linalg
    
    Mat
  - S
    
    Linalg
    
    Mat
- Owl_cblas
- Owl_cblas_basic
- Owl_cblas_generated
- Owl_cluster
- Owl_core_types
- Owl_dataset
- Owl_dense
- Owl_dense_matrix
  - C
  - D
  - Generic
  - Operator
  - S
  - Z
- Owl_dense_matrix_c
- Owl_dense_matrix_d
- Owl_dense_matrix_generic
- Owl_dense_matrix_intf
  - Common
  - Complex
  - Real
- Owl_dense_matrix_s
- Owl_dense_matrix_z
- Owl_dense_ndarray
  - Any
  - C
  - D
  - Generic
  - Operator
  - S
  - Z
- Owl_dense_ndarray_a
- Owl_dense_ndarray_c
- Owl_dense_ndarray_d
- Owl_dense_ndarray_generic
- Owl_dense_ndarray_intf
  - Common
  - Complex
  - Distribution
  - NN
  - Real
- Owl_dense_ndarray_s
- Owl_dense_ndarray_z
- Owl_distribution
  - Make
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Beta
    
    Cauchy
    
    Chi2
    
    Exponential
    
    F
    
    Gamma
    
    Gaussian
    
    Gumbel1
    
    Gumbel2
    
    Laplace
    
    Logistic
    
    Lognormal
    
    Lomax
    
    Poisson
    
    Rayleigh
    
    Uniform
    
    Weibull
- Owl_distribution_common
- Owl_distribution_generic
- Owl_ext
  - Dense
    
    Matrix
    
    Ndarray
- Owl_ext_binop
  - C_C
  - C_DAC
  - C_DAD
  - C_DAS
  - C_DAZ
  - C_DMC
  - C_DMD
  - C_DMS
  - C_DMZ
  - C_F
  - DAC_C
  - DAC_DAC
  - DAC_DAD
  - DAC_DAS
  - DAC_DAZ
  - DAC_F
  - DAD_C
  - DAD_DAC
  - DAD_DAD
  - DAD_DAS
  - DAD_DAZ
  - DAD_F
  - DAS_C
  - DAS_DAC
  - DAS_DAD
  - DAS_DAS
  - DAS_DAZ
  - DAS_F
  - DAZ_C
  - DAZ_DAC
  - DAZ_DAD
  - DAZ_DAS
  - DAZ_DAZ
  - DAZ_F
  - DMC_C
  - DMC_DMC
  - DMC_DMD
  - DMC_DMS
  - DMC_DMZ
  - DMC_F
  - DMD_C
  - DMD_DMC
  - DMD_DMD
  - DMD_DMS
  - DMD_DMZ
  - DMD_F
  - DMS_C
  - DMS_DMC
  - DMS_DMD
  - DMS_DMS
  - DMS_DMZ
  - DMS_F
  - DMZ_C
  - DMZ_DMC
  - DMZ_DMD
  - DMZ_DMS
  - DMZ_DMZ
  - DMZ_F
  - F_C
  - F_DAC
  - F_DAD
  - F_DAS
  - F_DAZ
  - F_DMC
  - F_DMD
  - F_DMS
  - F_DMZ
  - F_F
- Owl_ext_dense_matrix
  - BasicSig
  - C
  - CZ_Sig
  - D
  - Make_Basic
    
    M
    
    P
  - Make_CZ
    
    M
    
    P
  - Make_SD
    
    M
    
    P
  - PackSig
  - Pack_DMC
  - Pack_DMD
  - Pack_DMS
  - Pack_DMZ
  - S
  - SD_Sig
  - Z
- Owl_ext_dense_ndarray
  - BasicSig
  - C
  - CZ_Sig
  - D
  - Make_Basic
    
    M
    
    P
  - Make_CZ
    
    M
    
    P
  - Make_SD
    
    M
    
    P
  - PackSig
  - Pack_DAC
  - Pack_DAD
  - Pack_DAS
  - Pack_DAZ
  - S
  - SD_Sig
  - Z
- Owl_ext_lifts
  - DAC_DAD
  - DAC_DAZ
  - DAD_DAZ
  - DAS_DAC
  - DAS_DAD
  - DAS_DAZ
  - DMC_DMD
  - DMC_DMZ
  - DMD_DMZ
  - DMS_DMC
  - DMS_DMD
  - DMS_DMZ
  - F_C
- Owl_ext_types
- Owl_ext_uniop
  - C
  - DAC
  - DAD
  - DAS
  - DAZ
  - DMC
  - DMD
  - DMS
  - DMZ
  - F
- Owl_fft
  - D
  - Generic
  - S
- Owl_fft_d
- Owl_fft_generic
- Owl_fft_s
- Owl_fftpack
- Owl_lapacke
- Owl_lapacke_generated
- Owl_linalg
  - C
  - D
  - Generic
  - S
  - Z
- Owl_linalg_c
- Owl_linalg_d
- Owl_linalg_generic
- Owl_linalg_intf
  - Common
  - Real
- Owl_linalg_s
- Owl_linalg_z
- Owl_maths
- Owl_maths_special
- Owl_matrix
- Owl_matrix_check
- Owl_matrix_swap
- Owl_mcmc
- Owl_ndarray
- Owl_ndarray_contract
- Owl_ndarray_conv
- Owl_ndarray_fma
- Owl_ndarray_maths
- Owl_ndarray_pool
- Owl_ndarray_repeat
- Owl_ndarray_slide
- Owl_ndarray_sort
- Owl_ndarray_transpose
- Owl_ndarray_upsampling
- Owl_ndarray_utils
- Owl_neural
  - D
    
    Graph
    
    Neuron
    
    Activation
    
    Add
    
    AlphaDropout
    
    Average
    
    AvgPool1D
    
    AvgPool2D
    
    Concatenate
    
    Conv1D
    
    Conv2D
    
    Conv3D
    
    DilatedConv1D
    
    DilatedConv2D
    
    DilatedConv3D
    
    Dot
    
    Dropout
    
    Embedding
    
    Flatten
    
    FullyConnected
    
    GRU
    
    GaussianDropout
    
    GaussianNoise
    
    GlobalAvgPool1D
    
    GlobalAvgPool2D
    
    GlobalMaxPool1D
    
    GlobalMaxPool2D
    
    Init
    
    Input
    
    LSTM
    
    Lambda
    
    LambdaArray
    
    Linear
    
    LinearNoBias
    
    Masking
    
    Max
    
    MaxPool1D
    
    MaxPool2D
    
    Mul
    
    Normalisation
    
    Optimise
    
    Algodiff
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Arr
    
    Builder
    
    Aiso
    
    Piso
    
    Siao
    
    Sipo
    
    Siso
    
    Sito
    
    Linalg
    
    Mat
    
    Maths
    
    NN
    
    Batch
    
    Checkpoint
    
    Clipping
    
    Gradient
    
    Learning_Rate
    
    Loss
    
    Momentum
    
    Params
    
    Regularisation
    
    Stopping
    
    Utils
    
    Padding1D
    
    Padding2D
    
    Padding3D
    
    Recurrent
    
    Reshape
    
    Slice
    
    TransposeConv1D
    
    TransposeConv2D
    
    TransposeConv3D
    
    UpSampling1D
    
    UpSampling2D
    
    UpSampling3D
  - S
    
    Graph
    
    Neuron
    
    Activation
    
    Add
    
    AlphaDropout
    
    Average
    
    AvgPool1D
    
    AvgPool2D
    
    Concatenate
    
    Conv1D
    
    Conv2D
    
    Conv3D
    
    DilatedConv1D
    
    DilatedConv2D
    
    DilatedConv3D
    
    Dot
    
    Dropout
    
    Embedding
    
    Flatten
    
    FullyConnected
    
    GRU
    
    GaussianDropout
    
    GaussianNoise
    
    GlobalAvgPool1D
    
    GlobalAvgPool2D
    
    GlobalMaxPool1D
    
    GlobalMaxPool2D
    
    Init
    
    Input
    
    LSTM
    
    Lambda
    
    LambdaArray
    
    Linear
    
    LinearNoBias
    
    Masking
    
    Max
    
    MaxPool1D
    
    MaxPool2D
    
    Mul
    
    Normalisation
    
    Optimise
    
    Algodiff
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Arr
    
    Builder
    
    Aiso
    
    Piso
    
    Siao
    
    Sipo
    
    Siso
    
    Sito
    
    Linalg
    
    Mat
    
    Maths
    
    NN
    
    Batch
    
    Checkpoint
    
    Clipping
    
    Gradient
    
    Learning_Rate
    
    Loss
    
    Momentum
    
    Params
    
    Regularisation
    
    Stopping
    
    Utils
    
    Padding1D
    
    Padding2D
    
    Padding3D
    
    Recurrent
    
    Reshape
    
    Slice
    
    TransposeConv1D
    
    TransposeConv2D
    
    TransposeConv3D
    
    UpSampling1D
    
    UpSampling2D
    
    UpSampling3D
- Owl_neural_parallel
  - EngineSig
  - Make
    
    E
    
    M
  - ModelSig
- Owl_nlp
- Owl_nlp_corpus
- Owl_nlp_lda
- Owl_nlp_lda0
  - FTreeLDA
  - LightLDA
  - SimpleLDA
  - SparseLDA
- Owl_nlp_similarity
- Owl_nlp_tfidf
- Owl_nlp_utils
- Owl_nlp_vocabulary
- Owl_optimise
  - D
    
    Algodiff
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Arr
    
    Builder
    
    Aiso
    
    Piso
    
    Siao
    
    Sipo
    
    Siso
    
    Sito
    
    Linalg
    
    Mat
    
    Maths
    
    NN
    
    Batch
    
    Checkpoint
    
    Clipping
    
    Gradient
    
    Learning_Rate
    
    Loss
    
    Momentum
    
    Params
    
    Regularisation
    
    Stopping
    
    Utils
  - Make_Embedded
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Algodiff
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Arr
    
    Builder
    
    Aiso
    
    Piso
    
    Siao
    
    Sipo
    
    Siso
    
    Sito
    
    Linalg
    
    Mat
    
    Maths
    
    NN
    
    Batch
    
    Checkpoint
    
    Clipping
    
    Gradient
    
    Learning_Rate
    
    Loss
    
    Momentum
    
    Params
    
    Regularisation
    
    Stopping
    
    Utils
  - S
    
    Algodiff
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Arr
    
    Builder
    
    Aiso
    
    Piso
    
    Siao
    
    Sipo
    
    Siso
    
    Sito
    
    Linalg
    
    Mat
    
    Maths
    
    NN
    
    Batch
    
    Checkpoint
    
    Clipping
    
    Gradient
    
    Learning_Rate
    
    Loss
    
    Momentum
    
    Params
    
    Regularisation
    
    Stopping
    
    Utils
- Owl_parallel
  - Make_Distributed
    
    E
    
    M
  - Make_Distributed_Any
    
    E
    
    M
  - Make_Shared
    
    E
    
    M
  - Mapre_Engine
  - Ndarray
  - Ndarray_Any
- Owl_ppl
  - Make
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Beta
    
    Cauchy
    
    Chi2
    
    Exponential
    
    F
    
    Gamma
    
    Gaussian
    
    Gumbel1
    
    Gumbel2
    
    Laplace
    
    Lazy
    
    Logistic
    
    Lognormal
    
    Lomax
    
    Poisson
    
    Rayleigh
    
    Uniform
    
    Weibull
- Owl_ppl_random_variable
  - Make
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Dist
    
    Beta
    
    Cauchy
    
    Chi2
    
    Exponential
    
    F
    
    Gamma
    
    Gaussian
    
    Gumbel1
    
    Gumbel2
    
    Laplace
    
    Logistic
    
    Lognormal
    
    Lomax
    
    Poisson
    
    Rayleigh
    
    Uniform
    
    Weibull
    
    Lazy
- Owl_regression
  - D
    
    Optimise
    
    Algodiff
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Arr
    
    Builder
    
    Aiso
    
    Piso
    
    Siao
    
    Sipo
    
    Siso
    
    Sito
    
    Linalg
    
    Mat
    
    Maths
    
    NN
    
    Batch
    
    Checkpoint
    
    Clipping
    
    Gradient
    
    Learning_Rate
    
    Loss
    
    Momentum
    
    Params
    
    Regularisation
    
    Stopping
    
    Utils
  - Make_Embedded
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Optimise
    
    Algodiff
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Arr
    
    Builder
    
    Aiso
    
    Piso
    
    Siao
    
    Sipo
    
    Siso
    
    Sito
    
    Linalg
    
    Mat
    
    Maths
    
    NN
    
    Batch
    
    Checkpoint
    
    Clipping
    
    Gradient
    
    Learning_Rate
    
    Loss
    
    Momentum
    
    Params
    
    Regularisation
    
    Stopping
    
    Utils
  - S
    
    Optimise
    
    Algodiff
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Arr
    
    Builder
    
    Aiso
    
    Piso
    
    Siao
    
    Sipo
    
    Siso
    
    Sito
    
    Linalg
    
    Mat
    
    Maths
    
    NN
    
    Batch
    
    Checkpoint
    
    Clipping
    
    Gradient
    
    Learning_Rate
    
    Loss
    
    Momentum
    
    Params
    
    Regularisation
    
    Stopping
    
    Utils
- Owl_regression_generic
  - Make
    
    Optimise
    
    Algodiff
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Arr
    
    Builder
    
    Aiso
    
    Piso
    
    Siao
    
    Sipo
    
    Siso
    
    Sito
    
    Linalg
    
    Mat
    
    Maths
    
    NN
    
    Batch
    
    Checkpoint
    
    Clipping
    
    Gradient
    
    Learning_Rate
    
    Loss
    
    Momentum
    
    Params
    
    Regularisation
    
    Stopping
    
    Utils
- Owl_regression_generic_sig
  - Sig
    
    Optimise
    
    Algodiff
    
    A
    
    Linalg
    
    Mat
    
    Scalar
    
    Arr
    
    Builder
    
    Aiso
    
    Piso
    
    Siao
    
    Sipo
    
    Siso
    
    Sito
    
    Linalg
    
    Mat
    
    Maths
    
    NN
    
    Batch
    
    Checkpoint
    
    Clipping
    
    Gradient
    
    Learning_Rate
    
    Loss
    
    Momentum
    
    Params
    
    Regularisation
    
    Stopping
    
    Utils
- Owl_signal
- Owl_slicing
- Owl_slicing_basic
- Owl_slicing_fancy
- Owl_sparse
- Owl_sparse_common
- Owl_sparse_dok_matrix
- Owl_sparse_matrix
  - C
  - D
  - DOK
  - Generic
  - Operator
  - S
  - Z
- Owl_sparse_matrix_c
- Owl_sparse_matrix_d
- Owl_sparse_matrix_generic
- Owl_sparse_matrix_s
- Owl_sparse_matrix_z
- Owl_sparse_ndarray
  - C
  - D
  - Generic
  - Operator
  - S
  - Z
- Owl_sparse_ndarray_c
- Owl_sparse_ndarray_d
- Owl_sparse_ndarray_generic
- Owl_sparse_ndarray_s
- Owl_sparse_ndarray_z
- Owl_stats
- Owl_stats_dist
- Owl_stats_extend
- Owl_stats_prng
- Owl_stats_sampler

Legend:
Library
Module
Module type
Parameter
Class
Class type

NLP: Corpus module

Type definition

type t

Type of a text corpus.

Query corpus

val length : t -> int

Return the size of the corpus, i.e. number of documents.

val get : t -> int -> string

Return the ith document in the corpus.

val get_tok : t -> int -> int array

Return the ith tokenised document in the corpus.

val get_uri : t -> string

Return the path of the corpus.

val get_bin_uri : t -> string

Return the path of the binary format of corpus.

val get_bin_fh : t -> in_channel

Return the file handle of the binary formation of corpus.

val get_tok_uri : t -> string

Return the path of tokenised corpus.

val get_tok_fh : t -> in_channel

Return the file handle of the tokenised corpus.

val get_vocab_uri : t -> string

Return the path of vocabulary file associated with the corpus.

val get_vocab : t -> Owl_nlp_vocabulary.t

Return the vocabulary associated with the corpus.

val get_docid : t -> int array

Return a list of document ids which are mapped back to the original file where the corpus is built.

Iteration functions

val next : t -> string

Return the next document in the corpus.

val next_tok : t -> int array

Return the next tokenised document in the corpus.

val iteri : (int -> string -> unit) -> t -> unit

Iterate all the documents in the corpus, the index (line number) is passed in.

val iteri_tok : (int -> int array -> unit) -> t -> unit

Iterate the tokenised documents in the corpus, the index (line number) is passed in.

val mapi : (int -> string -> 'a) -> t -> 'a array

Map all the documents in a corpus into another array. The index (line number) is passed in.

val mapi_tok : (int -> 'a -> 'b) -> t -> 'b array

Map all the tokenised ocuments in a corpus into another array. The index (line number) is passed in.

val next_batch : ?size:int -> t -> string array

Return the next batch of documents in a corpus as a string array. The default ``size`` is 100.

val next_batch_tok : ?size:int -> t -> int array array

Return the next batch of tokenised documents in a corpus as a string array. The default ``size`` is 100.

val reset_iterators : t -> unit

Reset the iterator to the beginning of the corpus.

Core functions

val build : 
  ?docid:int array ->
  ?stopwords:(string, 'a) Hashtbl.t ->
  ?lo:float ->
  ?hi:float ->
  ?vocab:Owl_nlp_vocabulary.t ->
  ?minlen:int ->
  string ->
  t

This function builds up a corpus of type ``t`` from a given raw text corpus. We assume that each line in the raw text corpus represents a document.

Parameters: * ``?docid``: passed in ``docid`` can be used for tracking back to the original corpus, but this is not compulsory. * ``?stopwords``: stopwords used in building vocabulary. * ``?lo``: any word below this lower bound of the frequency is removed from vocabulary. * ``?hi``: any word above this upper bound of the frequency is removed from vocabulary. * ``?vocab``: an optional vocabulary, if it is not passed, the vocabulary is built from current corpus. * ``?(minlen=10)``: threshold of the document length, any document shorter than this is removed from the corpus. * ``fname``: the file name of the raw text corpus.

val tokenise : t -> string -> int array

``tokenise corpus doc`` tokenises the document ``doc`` using the ``corpus`` and its associated vocabulary.

val unique : string -> string -> int array

Remove the duplicates in a text corpus, the ids of the removed files are returned.

val simple_process : string -> string

Function for simple pre-processing a given string.

val preprocess : (string -> bytes) -> string -> string -> unit

``preprocess f input_file output_file`` pre-processes a given file ``input_file`` with the passed in function ``f`` then saves the output to ``output_file``.

E.g., you can plug in ``simple_process`` function to clean up the text. Note this function will not change the number of lines in a corpus.

I/O functions

val save : t -> string -> unit

Serialise the corpus and save it to a file of given name.

val load : string -> t

Load a serialised corpus from a file.

val save_txt : t -> string -> unit

Convert the tokenised corpus back to a text file

val to_string : t -> string

The string representation of a corpus, contains the summary of a corpus.

val print : t -> unit

Pretty print the summary of a text corpus.

Helper functions

val create : 
  string ->
  int array ->
  int array ->
  in_channel option ->
  in_channel option ->
  Owl_nlp_vocabulary.t option ->
  int ->
  int array ->
  t

```create uri bin_ofs tok_ofs bin_fh tok_fh vocab minlen docid` wraps up the corpus into a record of type ``t``.

val reduce_model : t -> t

Set some fields to ``None`` so it can be safely serialised.

val cleanup : t -> unit

Close the opened file handles associated with the corpus.