package biocaml

  1. Overview
  2. Docs
Legend:
Library
Module
Module type
Parameter
Class
Class type

Parsing of VCF files.

This module implements VCFv4.1, as defined by 1000 genomes project: http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41

type vcf_id = string
type vcf_description = string
type vcf_number =
  1. | Number of int
  2. | OnePerAllele
  3. | OnePerGenotype
  4. | Unknown
type vcf_format_type = [
  1. | `integer_value
  2. | `float_value
  3. | `character_value
  4. | `string_value
]

Types, allowed for VCF FORMAT meta header.

type vcf_info_type = [
  1. | vcf_format_type
  2. | `flag_value
]

Types, allowed for VCF INFO meta header.

type vcf_info_meta =
  1. | Info of vcf_number * vcf_info_type * vcf_description
type vcf_filter_meta =
  1. | Filter of vcf_description
type vcf_format_meta =
  1. | Format of vcf_number * vcf_format_type * vcf_description
type vcf_alt_meta =
  1. | Alt of vcf_description
type vcf_meta = {
  1. vcfm_version : string;
  2. vcfm_id_cache : vcf_id Core_kernel.Set.Poly.t;
  3. vcfm_info : (vcf_id, vcf_info_meta) Core_kernel.Hashtbl.t;
  4. vcfm_filters : (vcf_id * vcf_filter_meta) list;
  5. vcfm_format : (vcf_id, vcf_format_meta) Core_kernel.Hashtbl.t;
  6. vcfm_alt : (string, vcf_alt_meta) Core_kernel.Hashtbl.t;
  7. vcfm_arbitrary : (string, string) Core_kernel.Hashtbl.t;
  8. vcfm_header : string list;
  9. vcfm_samples : string list;
}
type vcf_format = [
  1. | `integer of int
  2. | `float of float
  3. | `character of char
  4. | `string of string
  5. | `missing
]
type vcf_info = [
  1. | vcf_format
  2. | `flag of string
]
type vcf_row = {
  1. vcfr_chrom : string;
  2. vcfr_pos : int;
  3. vcfr_ids : string list;
  4. vcfr_ref : string;
  5. vcfr_alts : string list;
  6. vcfr_qual : float option;
  7. vcfr_filter : vcf_id list;
  8. vcfr_info : (vcf_id, vcf_info list) Core_kernel.Hashtbl.t;
  9. vcfr_samples : (vcf_id, (vcf_id * vcf_format list) list) Core_kernel.Hashtbl.t;
}
type item = vcf_row
type vcf_parse_row_error = [
  1. | `invalid_int of string
  2. | `invalid_float of string
  3. | `info_type_coersion_failure of vcf_info_type * string
  4. | `format_type_coersion_failure of vcf_format_type * string
  5. | `invalid_dna of string
  6. | `unknown_info of vcf_id
  7. | `unknown_filter of vcf_id
  8. | `unknown_alt of string
  9. | `duplicate_ids of vcf_id list
  10. | `invalid_arguments_length of vcf_id * int * int
  11. | `invalid_row_length of int * int
  12. | `malformed_sample of string
  13. | `unknown_format of vcf_id
]
type vcf_parse_error = [
  1. | `malformed_meta of Pos.t * string
  2. | `malformed_row of Pos.t * vcf_parse_row_error * string
  3. | `malformed_header of Pos.t * string
  4. | `incomplete_input of Pos.t * string list * string option
  5. | `not_ready
]
val parse_error_to_string : vcf_parse_error -> string
module Transform : sig ... end