|
SHOGUN
v1.1.0
|
The class Alphabet implements an alphabet and alphabet utility functions.
These utility functions can be used to remap characters to more (bit-)efficient representations, check if a string is valid, compute histograms etc.
Currently supported alphabets are DNA, RAWDNA, RNA, PROTEIN, BINARY, ALPHANUM, CUBE, RAW, IUPAC_NUCLEIC_ACID and IUPAC_AMINO_ACID.
Definition at line 88 of file Alphabet.h.

Public Member Functions | |
| CAlphabet () | |
| CAlphabet (char *alpha, int32_t len) | |
| CAlphabet (EAlphabet alpha) | |
| CAlphabet (CAlphabet *alpha) | |
| virtual | ~CAlphabet () |
| bool | set_alphabet (EAlphabet alpha) |
| EAlphabet | get_alphabet () const |
| int32_t | get_num_symbols () const |
| int32_t | get_num_bits () const |
| uint8_t | remap_to_bin (uint8_t c) |
| uint8_t | remap_to_char (uint8_t c) |
| void | clear_histogram () |
| clear histogram | |
| template<class T > | |
| void | add_string_to_histogram (T *p, int64_t len) |
| void | add_byte_to_histogram (uint8_t p) |
| void | print_histogram () |
| print histogram | |
| SGVector< int64_t > | get_histogram () |
| bool | check_alphabet (bool print_error=true) |
| bool | is_valid (uint8_t c) |
| bool | check_alphabet_size (bool print_error=true) |
| int32_t | get_num_symbols_in_histogram () |
| int32_t | get_max_value_in_histogram () |
| int32_t | get_num_bits_in_histogram () |
| virtual const char * | get_name () const |
| template<> | |
| void | translate_from_single_order (float32_t *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap) |
| template<> | |
| void | translate_from_single_order (float64_t *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap) |
| template<> | |
| void | translate_from_single_order (floatmax_t *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap) |
| template<> | |
| void | translate_from_single_order_reversed (float32_t *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap) |
| template<> | |
| void | translate_from_single_order_reversed (float64_t *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap) |
| template<> | |
| void | translate_from_single_order_reversed (floatmax_t *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap) |
Public Member Functions inherited from CSGObject | |
| CSGObject () | |
| CSGObject (const CSGObject &orig) | |
| virtual | ~CSGObject () |
| virtual bool | is_generic (EPrimitiveType *generic) const |
| template<class T > | |
| void | set_generic () |
| void | unset_generic () |
| virtual void | print_serializable (const char *prefix="") |
| virtual bool | save_serializable (CSerializableFile *file, const char *prefix="") |
| virtual bool | load_serializable (CSerializableFile *file, const char *prefix="") |
| void | set_global_io (SGIO *io) |
| SGIO * | get_global_io () |
| void | set_global_parallel (Parallel *parallel) |
| Parallel * | get_global_parallel () |
| void | set_global_version (Version *version) |
| Version * | get_global_version () |
| SGVector< char * > | get_modelsel_names () |
| char * | get_modsel_param_descr (const char *param_name) |
| index_t | get_modsel_param_index (const char *param_name) |
Static Public Member Functions | |
| static const char * | get_alphabet_name (EAlphabet alphabet) |
| template<class ST > | |
| static void | translate_from_single_order (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val) |
| template<class ST > | |
| static void | translate_from_single_order_reversed (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val) |
| template<class ST > | |
| static void | translate_from_single_order (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap) |
| template<class ST > | |
| static void | translate_from_single_order_reversed (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap) |
Static Public Attributes | |
| static const uint8_t | B_A = 0 |
| static const uint8_t | B_C = 1 |
| static const uint8_t | B_G = 2 |
| static const uint8_t | B_T = 3 |
| static const uint8_t | B_0 = 4 |
| static const uint8_t | MAPTABLE_UNDEF = 0xff |
| static const char * | alphabet_names [18] |
Protected Member Functions | |
| void | init_map_table () |
| void | copy_histogram (CAlphabet *src) |
| virtual void | load_serializable_post () throw (ShogunException) |
Protected Member Functions inherited from CSGObject | |
| virtual void | load_serializable_pre () throw (ShogunException) |
| virtual void | save_serializable_pre () throw (ShogunException) |
| virtual void | save_serializable_post () throw (ShogunException) |
Protected Attributes | |
| EAlphabet | alphabet |
| int32_t | num_symbols |
| int32_t | num_bits |
| bool | valid_chars [1<< (sizeof(uint8_t)*8)] |
| uint8_t | maptable_to_bin [1<< (sizeof(uint8_t)*8)] |
| uint8_t | maptable_to_char [1<< (sizeof(uint8_t)*8)] |
| int64_t | histogram [1<< (sizeof(uint8_t)*8)] |
Additional Inherited Members | |
Public Attributes inherited from CSGObject | |
| SGIO * | io |
| Parallel * | parallel |
| Version * | version |
| Parameter * | m_parameters |
| Parameter * | m_model_selection_parameters |
| CAlphabet | ( | ) |
default constructor
Definition at line 34 of file Alphabet.cpp.
| CAlphabet | ( | char * | alpha, |
| int32_t | len | ||
| ) |
|
virtual |
Definition at line 103 of file Alphabet.cpp.
| void add_byte_to_histogram | ( | uint8_t | p | ) |
| void add_string_to_histogram | ( | T * | p, |
| int64_t | len | ||
| ) |
make histogram for whole string
| p | string |
| len | length of string |
Definition at line 180 of file Alphabet.h.
| bool check_alphabet | ( | bool | print_error = true | ) |
check whether symbols in histogram are valid in alphabet e.g. for DNA if only letters ACGT appear
| print_error | if errors shall be printed |
Definition at line 600 of file Alphabet.cpp.
| bool check_alphabet_size | ( | bool | print_error = true | ) |
check whether symbols in histogram ALL fit in alphabet
| print_error | if errors shall be printed |
Definition at line 622 of file Alphabet.cpp.
| void clear_histogram | ( | ) |
clear histogram
Definition at line 543 of file Alphabet.cpp.
|
protected |
copy histogram
| src | alphabet to copy histogram from |
Definition at line 639 of file Alphabet.cpp.
| EAlphabet get_alphabet | ( | ) | const |
|
static |
return alphabet name
| alphabet | alphabet type to get name from |
Definition at line 652 of file Alphabet.cpp.
| SGVector< int64_t > get_histogram | ( | ) |
| int32_t get_max_value_in_histogram | ( | ) |
return maximum value in histogram
Definition at line 549 of file Alphabet.cpp.
|
virtual |
| int32_t get_num_bits | ( | ) | const |
get number of bits necessary to store all symbols in alphabet
Definition at line 146 of file Alphabet.h.
| int32_t get_num_bits_in_histogram | ( | ) |
return number of bits required to store all symbols in histogram
Definition at line 576 of file Alphabet.cpp.
| int32_t get_num_symbols | ( | ) | const |
get number of symbols in alphabet
Definition at line 136 of file Alphabet.h.
| int32_t get_num_symbols_in_histogram | ( | ) |
return number of symbols in histogram
Definition at line 564 of file Alphabet.cpp.
|
protected |
init map table
Definition at line 178 of file Alphabet.cpp.
| bool is_valid | ( | uint8_t | c | ) |
check whether symbols are valid in alphabet e.g. for DNA if symbol is one of the A,C,G or T
| c | symbol |
Definition at line 218 of file Alphabet.h.
|
protectedvirtual |
Can (optionally) be overridden to post-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::LOAD_SERIALIZABLE_POST is called.
| ShogunException | Will be thrown if an error occurres. |
Reimplemented from CSGObject.
Definition at line 732 of file Alphabet.cpp.
| void print_histogram | ( | ) |
print histogram
Definition at line 586 of file Alphabet.cpp.
| uint8_t remap_to_bin | ( | uint8_t | c | ) |
remap element e.g translate ACGT to 0123
| c | element to remap |
Definition at line 156 of file Alphabet.h.
| uint8_t remap_to_char | ( | uint8_t | c | ) |
remap element e.g translate 0123 to ACGT
| c | element to remap |
Definition at line 166 of file Alphabet.h.
| bool set_alphabet | ( | EAlphabet | alpha | ) |
set alphabet and initialize mapping table (for remap)
| alpha | new alphabet |
Definition at line 107 of file Alphabet.cpp.
|
static |
translate from single order
| obs | observation |
| sequence_length | length of sequence |
| start | start |
| p_order | order |
| max_val | maximum value |
Definition at line 743 of file Alphabet.cpp.
|
static |
translate from single order
| obs | observation |
| sequence_length | length of sequence |
| start | start |
| p_order | order |
| max_val | maximum value |
| gap | gap |
Definition at line 819 of file Alphabet.cpp.
| void translate_from_single_order | ( | float32_t * | obs, |
| int32_t | sequence_length, | ||
| int32_t | start, | ||
| int32_t | p_order, | ||
| int32_t | max_val, | ||
| int32_t | gap | ||
| ) |
Definition at line 938 of file Alphabet.cpp.
| void translate_from_single_order | ( | float64_t * | obs, |
| int32_t | sequence_length, | ||
| int32_t | start, | ||
| int32_t | p_order, | ||
| int32_t | max_val, | ||
| int32_t | gap | ||
| ) |
Definition at line 942 of file Alphabet.cpp.
| void translate_from_single_order | ( | floatmax_t * | obs, |
| int32_t | sequence_length, | ||
| int32_t | start, | ||
| int32_t | p_order, | ||
| int32_t | max_val, | ||
| int32_t | gap | ||
| ) |
Definition at line 946 of file Alphabet.cpp.
|
static |
translate from single order reversed
| obs | observation |
| sequence_length | length of sequence |
| start | start |
| p_order | order |
| max_val | maximum value |
Definition at line 781 of file Alphabet.cpp.
|
static |
translate from single order reversed
| obs | observation |
| sequence_length | length of sequence |
| start | start |
| p_order | order |
| max_val | maximum value |
| gap | gap |
Definition at line 881 of file Alphabet.cpp.
| template void translate_from_single_order_reversed< float32_t > | ( | float32_t * | obs, |
| int32_t | sequence_length, | ||
| int32_t | start, | ||
| int32_t | p_order, | ||
| int32_t | max_val, | ||
| int32_t | gap | ||
| ) |
Definition at line 950 of file Alphabet.cpp.
| template void translate_from_single_order_reversed< float64_t > | ( | float64_t * | obs, |
| int32_t | sequence_length, | ||
| int32_t | start, | ||
| int32_t | p_order, | ||
| int32_t | max_val, | ||
| int32_t | gap | ||
| ) |
Definition at line 954 of file Alphabet.cpp.
| template void translate_from_single_order_reversed< floatmax_t > | ( | floatmax_t * | obs, |
| int32_t | sequence_length, | ||
| int32_t | start, | ||
| int32_t | p_order, | ||
| int32_t | max_val, | ||
| int32_t | gap | ||
| ) |
Definition at line 958 of file Alphabet.cpp.
|
protected |
alphabet
Definition at line 350 of file Alphabet.h.
|
static |
alphabet names
Definition at line 335 of file Alphabet.h.
|
static |
B_0
Definition at line 331 of file Alphabet.h.
|
static |
B_A
Definition at line 323 of file Alphabet.h.
|
static |
B_C
Definition at line 325 of file Alphabet.h.
|
static |
B_G
Definition at line 327 of file Alphabet.h.
|
static |
B_T
Definition at line 329 of file Alphabet.h.
|
protected |
histogram
Definition at line 362 of file Alphabet.h.
|
protected |
maptable to bin
Definition at line 358 of file Alphabet.h.
|
protected |
maptable to char
Definition at line 360 of file Alphabet.h.
|
static |
MAPTABLE UNDEF
Definition at line 333 of file Alphabet.h.
|
protected |
number of bits
Definition at line 354 of file Alphabet.h.
|
protected |
number of symbols
Definition at line 352 of file Alphabet.h.
|
protected |
valid chars
Definition at line 356 of file Alphabet.h.