From 51dcdeb28559d7ef379bac9f5ac90175358342ff Mon Sep 17 00:00:00 2001
From: Jonathan Perdomo <jonperdomodb@gmail.com>
Date: Fri, 17 Nov 2023 11:35:03 -0500
Subject: [PATCH] 2 alignment based cnv calling (#11)

Add high-recall deletion and duplication detection without clustering
---
 .github/workflows/build-tests.yml          |   2 +-
 .gitignore                                 |  10 +-
 doc/Doxyfile => Doxyfile                   | 859 ++++++++++++++-------
 README.md                                  |   5 +-
 __main__.py                                | 212 ++++-
 data/chr_cov.txt                           |   3 +
 data/hhall.hmm                             |  36 +
 data/hhall_loh.hmm                         |  36 +
 docs/genome-assembly-guide.md              |  43 ++
 doc/OUTLINE.md => docs/project-overview.md |  84 +-
 environment.yml                            |   4 +
 include/cli.h                              |  15 -
 include/cnv_caller.h                       |  44 +-
 include/cnv_data.h                         |  26 +
 include/common.h                           |  45 --
 include/contextsv.h                        |  27 +-
 include/fasta_query.h                      |  25 +
 include/input_data.h                       |  93 +++
 include/integrative_caller.h               |  36 -
 include/kc.h                               |   9 +-
 include/khmm.h                             |  47 +-
 include/sv_caller.h                        |  38 +
 include/sv_data.h                          |  54 ++
 include/swig_interface.h                   |  17 +
 include/types.h                            |  63 ++
 include/utils.h                            |  12 +-
 include/vcf_writer.h                       |  23 +
 python/cnv_plots.py                        | 263 +++++++
 python/scoring_model.py                    |   6 +
 python/train_model.py                      |  69 ++
 python/utils.py                            |  44 ++
 samtools/htslib                            |   1 -
 scripts/find_cigar_deletions.py            |  58 ++
 setup.py                                   |  44 +-
 src/cli.cpp                                |  40 -
 src/cnv_caller.cpp                         | 494 +++++++++---
 src/cnv_data.cpp                           | 114 +++
 src/common.cpp                             | 169 ----
 src/contextsv.cpp                          |  87 +++
 src/fasta_query.cpp                        | 168 ++++
 src/input_data.cpp                         | 324 ++++++++
 src/integrative_caller.cpp                 |  40 -
 src/kc.cpp                                 |  18 +-
 src/khmm.cpp                               | 145 ++--
 src/sv_caller.cpp                          | 350 +++++++++
 src/sv_data.cpp                            | 194 +++++
 src/swig_interface.cpp                     |  31 +
 src/swig_wrapper.i                         |   9 +-
 src/utils.cpp                              |  36 +
 src/vcf_writer.cpp                         |  73 ++
 tests/data/ref.fa                          |   0
 tests/test_general.py                      |  42 +-
 52 files changed, 3695 insertions(+), 992 deletions(-)
 rename doc/Doxyfile => Doxyfile (77%)
 create mode 100644 data/chr_cov.txt
 create mode 100644 data/hhall.hmm
 create mode 100644 data/hhall_loh.hmm
 create mode 100644 docs/genome-assembly-guide.md
 rename doc/OUTLINE.md => docs/project-overview.md (80%)
 delete mode 100644 include/cli.h
 create mode 100644 include/cnv_data.h
 delete mode 100644 include/common.h
 create mode 100644 include/fasta_query.h
 create mode 100644 include/input_data.h
 delete mode 100644 include/integrative_caller.h
 create mode 100644 include/sv_caller.h
 create mode 100644 include/sv_data.h
 create mode 100644 include/swig_interface.h
 create mode 100644 include/types.h
 create mode 100644 include/vcf_writer.h
 create mode 100644 python/cnv_plots.py
 create mode 100644 python/scoring_model.py
 create mode 100644 python/train_model.py
 create mode 100644 python/utils.py
 delete mode 160000 samtools/htslib
 create mode 100644 scripts/find_cigar_deletions.py
 delete mode 100644 src/cli.cpp
 create mode 100644 src/cnv_data.cpp
 delete mode 100644 src/common.cpp
 create mode 100644 src/contextsv.cpp
 create mode 100644 src/fasta_query.cpp
 create mode 100644 src/input_data.cpp
 delete mode 100644 src/integrative_caller.cpp
 create mode 100644 src/sv_caller.cpp
 create mode 100644 src/sv_data.cpp
 create mode 100644 src/swig_interface.cpp
 create mode 100644 src/vcf_writer.cpp
 create mode 100644 tests/data/ref.fa

diff --git a/.github/workflows/build-tests.yml b/.github/workflows/build-tests.yml
index 18ba20bd..82269bbf 100644
--- a/.github/workflows/build-tests.yml
+++ b/.github/workflows/build-tests.yml
@@ -34,4 +34,4 @@ jobs:
       shell: bash --login {0}
       run: |
         mkdir -p tests/output
-        python -m pytest
+        python -m pytest -s -v tests/test_general.py
diff --git a/.gitignore b/.gitignore
index ff375ec3..e5b1f803 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,12 +50,14 @@ __pycache__/
 *.code-workspace
 CMakeSettings.json
 
-# Doxygen
-doc/doxygen
-*.code-workspace
-
 # Shell scripts
 *.sh
 
 # Output folder
 output/
+
+# Doxygen
+docs/html/
+
+# Singularity images
+*.sif
diff --git a/doc/Doxyfile b/Doxyfile
similarity index 77%
rename from doc/Doxyfile
rename to Doxyfile
index 691382ab..12593cc4 100644
--- a/doc/Doxyfile
+++ b/Doxyfile
@@ -1,4 +1,4 @@
-# Doxyfile 1.8.17
+# Doxyfile 1.9.8
 
 # This file describes the settings to be used by the documentation system
 # doxygen (www.doxygen.org) for a project.
@@ -12,6 +12,16 @@
 # For lists, items can also be appended using:
 # TAG += value [value, ...]
 # Values that contain spaces should be placed between quotes (\" \").
+#
+# Note:
+#
+# Use doxygen to compare the used configuration file with the template
+# configuration file:
+# doxygen -x [configFile]
+# Use doxygen to compare the used configuration file with the template
+# configuration file without replacing the environment variables or CMake type
+# replacement variables:
+# doxygen -x_noenv [configFile]
 
 #---------------------------------------------------------------------------
 # Project related configuration options
@@ -58,18 +68,30 @@ PROJECT_LOGO           =
 # entered, it will be relative to the location where doxygen was started. If
 # left blank the current directory will be used.
 
-OUTPUT_DIRECTORY       = doc/doxygen
+OUTPUT_DIRECTORY       = ./docs
 
-# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
-# directories (in 2 levels) under the output directory of each output format and
-# will distribute the generated files over these directories. Enabling this
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096
+# sub-directories (in 2 levels) under the output directory of each output format
+# and will distribute the generated files over these directories. Enabling this
 # option can be useful when feeding doxygen a huge amount of source files, where
 # putting all generated files in the same directory would otherwise causes
-# performance problems for the file system.
+# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to
+# control the number of sub-directories.
 # The default value is: NO.
 
 CREATE_SUBDIRS         = NO
 
+# Controls the number of sub-directories that will be created when
+# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every
+# level increment doubles the number of directories, resulting in 4096
+# directories at level 8 which is the default and also the maximum value. The
+# sub-directories are organized in 2 levels, the first level always has a fixed
+# number of 16 directories.
+# Minimum value: 0, maximum value: 8, default value: 8.
+# This tag requires that the tag CREATE_SUBDIRS is set to YES.
+
+CREATE_SUBDIRS_LEVEL   = 8
+
 # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
 # characters to appear in the names of generated files. If set to NO, non-ASCII
 # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
@@ -81,26 +103,18 @@ ALLOW_UNICODE_NAMES    = NO
 # The OUTPUT_LANGUAGE tag is used to specify the language in which all
 # documentation generated by doxygen is written. Doxygen will use this
 # information to generate all constant output in the proper language.
-# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
-# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
-# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
-# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
-# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
-# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
-# Ukrainian and Vietnamese.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian,
+# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English
+# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek,
+# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with
+# English messages), Korean, Korean-en (Korean with English messages), Latvian,
+# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese,
+# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish,
+# Swedish, Turkish, Ukrainian and Vietnamese.
 # The default value is: English.
 
 OUTPUT_LANGUAGE        = English
 
-# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all generated output in the proper direction.
-# Possible values are: None, LTR, RTL and Context.
-# The default value is: None.
-
-OUTPUT_TEXT_DIRECTION  = None
-
 # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
 # descriptions after the members that are listed in the file and class
 # documentation (similar to Javadoc). Set to NO to disable this.
@@ -227,6 +241,14 @@ QT_AUTOBRIEF           = NO
 
 MULTILINE_CPP_IS_BRIEF = NO
 
+# By default Python docstrings are displayed as preformatted text and doxygen's
+# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
+# doxygen's special commands can be used and the contents of the docstring
+# documentation blocks is shown as doxygen documentation.
+# The default value is: YES.
+
+PYTHON_DOCSTRING       = YES
+
 # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
 # documentation from any documented member that it re-implements.
 # The default value is: YES.
@@ -250,25 +272,19 @@ TAB_SIZE               = 4
 # the documentation. An alias has the form:
 # name=value
 # For example adding
-# "sideeffect=@par Side Effects:\n"
+# "sideeffect=@par Side Effects:^^"
 # will allow you to put the command \sideeffect (or @sideeffect) in the
 # documentation, which will result in a user-defined paragraph with heading
-# "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines (in the resulting output). You can put ^^ in the value part of an
-# alias to insert a newline as if a physical newline was in the original file.
-# When you need a literal { or } or , in the value part of an alias you have to
-# escape them by means of a backslash (\), this can lead to conflicts with the
-# commands \{ and \} for these it is advised to use the version @{ and @} or use
-# a double escape (\\{ and \\})
+# "Side Effects:". Note that you cannot put \n's in the value part of an alias
+# to insert newlines (in the resulting output). You can put ^^ in the value part
+# of an alias to insert a newline as if a physical newline was in the original
+# file. When you need a literal { or } or , in the value part of an alias you
+# have to escape them by means of a backslash (\), this can lead to conflicts
+# with the commands \{ and \} for these it is advised to use the version @{ and
+# @} or use a double escape (\\{ and \\})
 
 ALIASES                =
 
-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST              =
-
 # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
 # only. Doxygen will then generate output that is more tailored for C. For
 # instance, some of the names that are used will be different. The list of all
@@ -310,18 +326,21 @@ OPTIMIZE_OUTPUT_SLICE  = NO
 # extension. Doxygen has a built-in mapping, but you can override or extend it
 # using this tag. The format is ext=language, where ext is a file extension, and
 # language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
-# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice,
-# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
+# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice,
+# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
 # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
 # tries to guess whether the code is fixed or free formatted code, this is the
-# default for Fortran type files), VHDL, tcl. For instance to make doxygen treat
-# .inc files as Fortran files (default is PHP), and .f files as C (default is
-# Fortran), use: inc=Fortran f=C.
+# default for Fortran type files). For instance to make doxygen treat .inc files
+# as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C.
 #
 # Note: For files without extension you can use no_extension as a placeholder.
 #
 # Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
+# the files are not read by doxygen. When specifying no_extension you should add
+# * to the FILE_PATTERNS.
+#
+# Note see also the list of default file extension mappings.
 
 EXTENSION_MAPPING      =
 
@@ -344,6 +363,17 @@ MARKDOWN_SUPPORT       = YES
 
 TOC_INCLUDE_HEADINGS   = 5
 
+# The MARKDOWN_ID_STYLE tag can be used to specify the algorithm used to
+# generate identifiers for the Markdown headings. Note: Every identifier is
+# unique.
+# Possible values are: DOXYGEN use a fixed 'autotoc_md' string followed by a
+# sequence number starting at 0 and GITHUB use the lower case version of title
+# with any whitespace replaced by '-' and punctuation characters removed.
+# The default value is: DOXYGEN.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+MARKDOWN_ID_STYLE      = DOXYGEN
+
 # When enabled doxygen tries to link words that correspond to documented
 # classes, or namespaces to their corresponding documentation. Such a link can
 # be prevented in individual cases by putting a % sign in front of the word or
@@ -455,6 +485,27 @@ TYPEDEF_HIDES_STRUCT   = NO
 
 LOOKUP_CACHE_SIZE      = 0
 
+# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use
+# during processing. When set to 0 doxygen will based this on the number of
+# cores available in the system. You can set it explicitly to a value larger
+# than 0 to get more control over the balance between CPU load and processing
+# speed. At this moment only the input processing can be done using multiple
+# threads. Since this is still an experimental feature the default is set to 1,
+# which effectively disables parallel processing. Please report any issues you
+# encounter. Generating dot graphs in parallel is controlled by the
+# DOT_NUM_THREADS setting.
+# Minimum value: 0, maximum value: 32, default value: 1.
+
+NUM_PROC_THREADS       = 1
+
+# If the TIMESTAMP tag is set different from NO then each generated page will
+# contain the date or date and time when the page was generated. Setting this to
+# NO can help when comparing the output of multiple runs.
+# Possible values are: YES, NO, DATETIME and DATE.
+# The default value is: NO.
+
+TIMESTAMP              = NO
+
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
@@ -467,7 +518,7 @@ LOOKUP_CACHE_SIZE      = 0
 # normally produced when WARNINGS is set to YES.
 # The default value is: NO.
 
-EXTRACT_ALL            = NO
+EXTRACT_ALL            = YES
 
 # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
 # be included in the documentation.
@@ -518,6 +569,13 @@ EXTRACT_LOCAL_METHODS  = NO
 
 EXTRACT_ANON_NSPACES   = NO
 
+# If this flag is set to YES, the name of an unnamed parameter in a declaration
+# will be determined by the corresponding definition. By default unnamed
+# parameters remain unnamed in the output.
+# The default value is: YES.
+
+RESOLVE_UNNAMED_PARAMS = YES
+
 # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
 # undocumented members inside documented classes or files. If set to NO these
 # members will be included in the various overviews, but no documentation
@@ -529,7 +587,8 @@ HIDE_UNDOC_MEMBERS     = NO
 # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
 # undocumented classes that are normally visible in the class hierarchy. If set
 # to NO, these classes will be included in the various overviews. This option
-# has no effect if EXTRACT_ALL is enabled.
+# will also hide undocumented C++ concepts if enabled. This option has no effect
+# if EXTRACT_ALL is enabled.
 # The default value is: NO.
 
 HIDE_UNDOC_CLASSES     = NO
@@ -555,12 +614,20 @@ HIDE_IN_BODY_DOCS      = NO
 
 INTERNAL_DOCS          = NO
 
-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES, upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# (including Cygwin) ands Mac users are advised to set this option to NO.
-# The default value is: system dependent.
+# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
+# able to match the capabilities of the underlying filesystem. In case the
+# filesystem is case sensitive (i.e. it supports files in the same directory
+# whose names only differ in casing), the option must be set to YES to properly
+# deal with such files in case they appear in the input. For filesystems that
+# are not case sensitive the option should be set to NO to properly deal with
+# output files written for symbols that only differ in casing, such as for two
+# classes, one named CLASS and the other named Class, and to also support
+# references to files without having to specify the exact matching casing. On
+# Windows (including Cygwin) and MacOS, users should typically set this option
+# to NO, whereas on Linux or other Unix flavors it should typically be set to
+# YES.
+# Possible values are: SYSTEM, NO and YES.
+# The default value is: SYSTEM.
 
 CASE_SENSE_NAMES       = NO
 
@@ -578,6 +645,12 @@ HIDE_SCOPE_NAMES       = NO
 
 HIDE_COMPOUND_REFERENCE= NO
 
+# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class
+# will show which file needs to be included to use the class.
+# The default value is: YES.
+
+SHOW_HEADERFILE        = YES
+
 # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
 # the files that are included by a file in the documentation of that file.
 # The default value is: YES.
@@ -735,7 +808,8 @@ FILE_VERSION_FILTER    =
 # output files in an output format independent way. To create the layout file
 # that represents doxygen's defaults, run doxygen with the -l option. You can
 # optionally specify a file name after the option, if omitted DoxygenLayout.xml
-# will be used as the name of the layout file.
+# will be used as the name of the layout file. See also section "Changing the
+# layout of pages" for information.
 #
 # Note that if you run doxygen from a directory containing a file called
 # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
@@ -781,24 +855,50 @@ WARNINGS               = YES
 WARN_IF_UNDOCUMENTED   = YES
 
 # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some parameters
-# in a documented function, or documenting parameters that don't exist or using
-# markup commands wrongly.
+# potential errors in the documentation, such as documenting some parameters in
+# a documented function twice, or documenting parameters that don't exist or
+# using markup commands wrongly.
 # The default value is: YES.
 
 WARN_IF_DOC_ERROR      = YES
 
+# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete
+# function parameter documentation. If set to NO, doxygen will accept that some
+# parameters have no documentation without warning.
+# The default value is: YES.
+
+WARN_IF_INCOMPLETE_DOC = YES
+
 # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
 # are documented, but have no documentation for their parameters or return
-# value. If set to NO, doxygen will only warn about wrong or incomplete
-# parameter documentation, but not about the absence of documentation. If
-# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
+# value. If set to NO, doxygen will only warn about wrong parameter
+# documentation, but not about the absence of documentation. If EXTRACT_ALL is
+# set to YES then this flag will automatically be disabled. See also
+# WARN_IF_INCOMPLETE_DOC
 # The default value is: NO.
 
 WARN_NO_PARAMDOC       = NO
 
+# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about
+# undocumented enumeration values. If set to NO, doxygen will accept
+# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: NO.
+
+WARN_IF_UNDOC_ENUM_VAL = NO
+
 # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
-# a warning is encountered.
+# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
+# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
+# at the end of the doxygen process doxygen will return with a non-zero status.
+# If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS_PRINT then doxygen behaves
+# like FAIL_ON_WARNINGS but in case no WARN_LOGFILE is defined doxygen will not
+# write the warning messages in between other messages but write them at the end
+# of a run, in case a WARN_LOGFILE is defined the warning messages will be
+# besides being in the defined file also be shown at the end of a run, unless
+# the WARN_LOGFILE is defined as - i.e. standard output (stdout) in that case
+# the behavior will remain as with the setting FAIL_ON_WARNINGS.
+# Possible values are: NO, YES, FAIL_ON_WARNINGS and FAIL_ON_WARNINGS_PRINT.
 # The default value is: NO.
 
 WARN_AS_ERROR          = NO
@@ -809,13 +909,27 @@ WARN_AS_ERROR          = NO
 # and the warning text. Optionally the format may contain $version, which will
 # be replaced by the version of the file (if it could be obtained via
 # FILE_VERSION_FILTER)
+# See also: WARN_LINE_FORMAT
 # The default value is: $file:$line: $text.
 
 WARN_FORMAT            = "$file:$line: $text"
 
+# In the $text part of the WARN_FORMAT command it is possible that a reference
+# to a more specific place is given. To make it easier to jump to this place
+# (outside of doxygen) the user can define a custom "cut" / "paste" string.
+# Example:
+# WARN_LINE_FORMAT = "'vi $file +$line'"
+# See also: WARN_FORMAT
+# The default value is: at line $line of file $file.
+
+WARN_LINE_FORMAT       = "at line $line of file $file"
+
 # The WARN_LOGFILE tag can be used to specify a file to which warning and error
 # messages should be written. If left blank the output is written to standard
-# error (stderr).
+# error (stderr). In case the file specified cannot be opened for writing the
+# warning and error messages are written to standard error. When as file - is
+# specified the warning and error messages are written to standard output
+# (stdout).
 
 WARN_LOGFILE           =
 
@@ -829,17 +943,30 @@ WARN_LOGFILE           =
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = .
+INPUT                  = include \
+                         src \
+                         docs/project-overview.md \
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
 # libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
-# possible encodings.
+# documentation (see:
+# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
+# See also: INPUT_FILE_ENCODING
 # The default value is: UTF-8.
 
 INPUT_ENCODING         = UTF-8
 
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify
+# character encoding on a per file pattern basis. Doxygen will compare the file
+# name with each pattern and apply the encoding instead of the default
+# INPUT_ENCODING) if there is a match. The character encodings are a list of the
+# form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding
+# "INPUT_ENCODING" for further information on supported encodings.
+
+INPUT_FILE_ENCODING    =
+
 # If the value of the INPUT tag contains directories, you can use the
 # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
 # *.h) to filter out the source-files in the directories.
@@ -848,13 +975,15 @@ INPUT_ENCODING         = UTF-8
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # read by doxygen.
 #
-# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
-# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
-# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
-# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
-# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen
-# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f, *.for, *.tcl, *.vhd,
-# *.vhdl, *.ucf, *.qsf and *.ice.
+# Note the list of default checked file patterns might differ from the list of
+# default file extension mappings.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cxxm,
+# *.cpp, *.cppm, *.c++, *.c++m, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl,
+# *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, *.h++, *.ixx, *.l, *.cs, *.d, *.php,
+# *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be
+# provided as doxygen C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08,
+# *.f18, *.f, *.for, *.vhd, *.vhdl, *.ucf, *.qsf and *.ice.
 
 FILE_PATTERNS          = *.c \
                          *.cc \
@@ -918,7 +1047,10 @@ RECURSIVE              = YES
 # run.
 
 EXCLUDE                = build \
-                         cmake-build-debug
+                         cmake-build-debug \
+                         lib \
+                         docs \
+                         src/swig_wrapper.cpp
 
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
@@ -940,12 +1072,9 @@ EXCLUDE_PATTERNS       =
 # (namespaces, classes, functions, etc.) that should be excluded from the
 # output. The symbol name can be a fully qualified name, a word, or if the
 # wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories use the pattern */test/*
+# ANamespace::AClass, ANamespace::*Test
 
-EXCLUDE_SYMBOLS        =
+EXCLUDE_SYMBOLS        = swig::*
 
 # The EXAMPLE_PATH tag can be used to specify one or more files or directories
 # that contain example code fragments that are included (see the \include
@@ -988,6 +1117,11 @@ IMAGE_PATH             =
 # code is scanned, but not when the output code is generated. If lines are added
 # or removed, the anchors will not be placed correctly.
 #
+# Note that doxygen will use the data processed and written to standard output
+# for further processing, therefore nothing else, like debug statements or used
+# commands (so in case of a Windows batch file always use @echo OFF), should be
+# written to standard output.
+#
 # Note that for custom extensions or not directly supported extensions you also
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # properly processed by doxygen.
@@ -1029,6 +1163,15 @@ FILTER_SOURCE_PATTERNS =
 
 USE_MDFILE_AS_MAINPAGE =
 
+# The Fortran standard specifies that for fixed formatted Fortran code all
+# characters from position 72 are to be considered as comment. A common
+# extension is to allow longer lines before the automatic comment starts. The
+# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can
+# be processed before the automatic comment starts.
+# Minimum value: 7, maximum value: 10000, default value: 72.
+
+FORTRAN_COMMENT_AFTER  = 72
+
 #---------------------------------------------------------------------------
 # Configuration options related to source browsing
 #---------------------------------------------------------------------------
@@ -1116,16 +1259,24 @@ USE_HTAGS              = NO
 VERBATIM_HEADERS       = YES
 
 # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the
-# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the
-# cost of reduced performance. This can be particularly helpful with template
-# rich C++ code for which doxygen's built-in parser lacks the necessary type
-# information.
+# clang parser (see:
+# http://clang.llvm.org/) for more accurate parsing at the cost of reduced
+# performance. This can be particularly helpful with template rich C++ code for
+# which doxygen's built-in parser lacks the necessary type information.
 # Note: The availability of this option depends on whether or not doxygen was
 # generated with the -Duse_libclang=ON option for CMake.
 # The default value is: NO.
 
 CLANG_ASSISTED_PARSING = NO
 
+# If the CLANG_ASSISTED_PARSING tag is set to YES and the CLANG_ADD_INC_PATHS
+# tag is set to YES then doxygen will add the directory of each input to the
+# include path.
+# The default value is: YES.
+# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
+
+CLANG_ADD_INC_PATHS    = YES
+
 # If clang assisted parsing is enabled you can provide the compiler with command
 # line options that you would normally use when invoking the compiler. Note that
 # the include paths will already be set by doxygen for the files and directories
@@ -1135,10 +1286,13 @@ CLANG_ASSISTED_PARSING = NO
 CLANG_OPTIONS          =
 
 # If clang assisted parsing is enabled you can provide the clang parser with the
-# path to the compilation database (see:
-# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files
-# were built. This is equivalent to specifying the "-p" option to a clang tool,
-# such as clang-check. These options will then be passed to the parser.
+# path to the directory containing a file called compile_commands.json. This
+# file is the compilation database (see:
+# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the
+# options used when the source files were built. This is equivalent to
+# specifying the -p option to a clang tool, such as clang-check. These options
+# will then be passed to the parser. Any options specified with CLANG_OPTIONS
+# will be added as well.
 # Note: The availability of this option depends on whether or not doxygen was
 # generated with the -Duse_libclang=ON option for CMake.
 
@@ -1155,17 +1309,11 @@ CLANG_DATABASE_PATH    =
 
 ALPHABETICAL_INDEX     = YES
 
-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX    = 5
-
-# In case all classes in a project start with a common prefix, all classes will
-# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
-# can be used to specify a prefix (or a list of prefixes) that should be ignored
-# while generating the index headers.
+# The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes)
+# that should be ignored while generating the index headers. The IGNORE_PREFIX
+# tag works for classes, function and member names. The entity will be placed in
+# the alphabetical list under the first letter of the entity name that remains
+# after removing the prefix.
 # This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
 
 IGNORE_PREFIX          =
@@ -1244,7 +1392,12 @@ HTML_STYLESHEET        =
 # Doxygen will copy the style sheet files to the output directory.
 # Note: The order of the extra style sheet files is of importance (e.g. the last
 # style sheet in the list overrules the setting of the previous ones in the
-# list). For an example see the documentation.
+# list).
+# Note: Since the styling of scrollbars can currently not be overruled in
+# Webkit/Chromium, the styling will be left out of the default doxygen.css if
+# one or more extra stylesheets have been specified. So if scrollbar
+# customization is desired it has to be added explicitly. For an example see the
+# documentation.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
 HTML_EXTRA_STYLESHEET  =
@@ -1259,9 +1412,22 @@ HTML_EXTRA_STYLESHEET  =
 
 HTML_EXTRA_FILES       =
 
+# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output
+# should be rendered with a dark or light theme.
+# Possible values are: LIGHT always generate light mode output, DARK always
+# generate dark mode output, AUTO_LIGHT automatically set the mode according to
+# the user preference, use light mode if no preference is set (the default),
+# AUTO_DARK automatically set the mode according to the user preference, use
+# dark mode if no preference is set and TOGGLE allow to user to switch between
+# light and dark mode via a button.
+# The default value is: AUTO_LIGHT.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE        = AUTO_LIGHT
+
 # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
 # will adjust the colors in the style sheet and background images according to
-# this color. Hue is specified as an angle on a colorwheel, see
+# this color. Hue is specified as an angle on a color-wheel, see
 # https://en.wikipedia.org/wiki/Hue for more information. For instance the value
 # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
 # purple, and 360 is red again.
@@ -1271,7 +1437,7 @@ HTML_EXTRA_FILES       =
 HTML_COLORSTYLE_HUE    = 220
 
 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
-# in the HTML output. For a value of 0 the output will use grayscales only. A
+# in the HTML output. For a value of 0 the output will use gray-scales only. A
 # value of 255 will produce the most vivid colors.
 # Minimum value: 0, maximum value: 255, default value: 100.
 # This tag requires that the tag GENERATE_HTML is set to YES.
@@ -1289,15 +1455,6 @@ HTML_COLORSTYLE_SAT    = 100
 
 HTML_COLORSTYLE_GAMMA  = 80
 
-# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
-# page will contain the date and time when the page was generated. Setting this
-# to YES can help to show when doxygen was last run and thus if the
-# documentation is up to date.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_TIMESTAMP         = NO
-
 # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
 # documentation will contain a main index with vertical navigation menus that
 # are dynamically created via JavaScript. If disabled, the navigation index will
@@ -1317,6 +1474,13 @@ HTML_DYNAMIC_MENUS     = YES
 
 HTML_DYNAMIC_SECTIONS  = NO
 
+# If the HTML_CODE_FOLDING tag is set to YES then classes and functions can be
+# dynamically folded and expanded in the generated HTML source code.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_CODE_FOLDING      = YES
+
 # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
 # shown in the various tree structured indices initially; the user can expand
 # and collapse entries dynamically later on. Doxygen will expand the tree to
@@ -1332,10 +1496,11 @@ HTML_INDEX_NUM_ENTRIES = 100
 
 # If the GENERATE_DOCSET tag is set to YES, additional index files will be
 # generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: https://developer.apple.com/xcode/), introduced with OSX
-# 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
+# environment (see:
+# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
+# create a documentation set, doxygen will generate a Makefile in the HTML
+# output directory. Running make will produce the docset in that directory and
+# running make install will install the docset in
 # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
 # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
 # genXcode/_index.html for more information.
@@ -1352,6 +1517,13 @@ GENERATE_DOCSET        = NO
 
 DOCSET_FEEDNAME        = "Doxygen generated docs"
 
+# This tag determines the URL of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDURL         =
+
 # This tag specifies a string that should uniquely identify the documentation
 # set bundle. This should be a reverse domain-name style string, e.g.
 # com.mycompany.MyDocSet. Doxygen will append .docset to the name.
@@ -1377,8 +1549,12 @@ DOCSET_PUBLISHER_NAME  = Publisher
 # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
 # additional HTML index files: index.hhp, index.hhc, and index.hhk. The
 # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
+# on Windows. In the beginning of 2021 Microsoft took the original page, with
+# a.o. the download links, offline the HTML help workshop was already many years
+# in maintenance mode). You can download the HTML help workshop from the web
+# archives at Installation executable (see:
+# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo
+# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe).
 #
 # The HTML Help Workshop contains a compiler that can convert all HTML output
 # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
@@ -1408,7 +1584,7 @@ CHM_FILE               =
 HHC_LOCATION           =
 
 # The GENERATE_CHI flag controls if a separate .chi index file is generated
-# (YES) or that it should be included in the master .chm file (NO).
+# (YES) or that it should be included in the main .chm file (NO).
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTMLHELP is set to YES.
 
@@ -1435,6 +1611,16 @@ BINARY_TOC             = NO
 
 TOC_EXPAND             = NO
 
+# The SITEMAP_URL tag is used to specify the full URL of the place where the
+# generated documentation will be placed on the server by the user during the
+# deployment of the documentation. The generated sitemap is called sitemap.xml
+# and placed on the directory specified by HTML_OUTPUT. In case no SITEMAP_URL
+# is specified no sitemap is generated. For information about the sitemap
+# protocol see https://www.sitemaps.org
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SITEMAP_URL            =
+
 # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
 # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
 # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
@@ -1453,7 +1639,8 @@ QCH_FILE               =
 
 # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
 # Project output. For more information please see Qt Help Project / Namespace
-# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
+# (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
@@ -1461,8 +1648,8 @@ QHP_NAMESPACE          = org.doxygen.Project
 
 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
 # Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
-# folders).
+# Folders (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
 # The default value is: doc.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
@@ -1470,16 +1657,16 @@ QHP_VIRTUAL_FOLDER     = doc
 
 # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
 # filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHP_CUST_FILTER_NAME   =
 
 # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
 # custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHP_CUST_FILTER_ATTRS  =
@@ -1491,9 +1678,9 @@ QHP_CUST_FILTER_ATTRS  =
 
 QHP_SECT_FILTER_ATTRS  =
 
-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
+# The QHG_LOCATION tag can be used to specify the location (absolute path
+# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
+# run qhelpgenerator on the generated .qhp file.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHG_LOCATION           =
@@ -1536,16 +1723,28 @@ DISABLE_INDEX          = NO
 # to work a browser that supports JavaScript, DHTML, CSS and frames is required
 # (i.e. any modern browser). Windows users are probably better off using the
 # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
-# further fine-tune the look of the index. As an example, the default style
-# sheet generated by doxygen has an example that shows how to put an image at
-# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
-# the same information as the tab index, you could consider setting
-# DISABLE_INDEX to YES when enabling this option.
+# further fine tune the look of the index (see "Fine-tuning the output"). As an
+# example, the default style sheet generated by doxygen has an example that
+# shows how to put an image at the root of the tree instead of the PROJECT_NAME.
+# Since the tree basically has the same information as the tab index, you could
+# consider setting DISABLE_INDEX to YES when enabling this option.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
 GENERATE_TREEVIEW      = YES
 
+# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the
+# FULL_SIDEBAR option determines if the side bar is limited to only the treeview
+# area (value NO) or if it should extend to the full height of the window (value
+# YES). Setting this to YES gives a layout similar to
+# https://docs.readthedocs.io with more room for contents, but less room for the
+# project logo, title, and description. If either GENERATE_TREEVIEW or
+# DISABLE_INDEX is set to NO, this option has no effect.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FULL_SIDEBAR           = NO
+
 # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
 # doxygen will group on one line in the generated HTML documentation.
 #
@@ -1570,6 +1769,24 @@ TREEVIEW_WIDTH         = 250
 
 EXT_LINKS_IN_WINDOW    = NO
 
+# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email
+# addresses.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+OBFUSCATE_EMAILS       = YES
+
+# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg
+# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
+# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
+# the HTML output. These images will generally look nicer at scaled resolutions.
+# Possible values are: png (the default) and svg (looks nicer but requires the
+# pdf2svg or inkscape tool).
+# The default value is: png.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FORMULA_FORMAT    = png
+
 # Use this tag to change the font size of LaTeX formulas included as images in
 # the HTML documentation. When you change the font size after a successful
 # doxygen run you need to manually remove any form_*.png images from the HTML
@@ -1579,17 +1796,6 @@ EXT_LINKS_IN_WINDOW    = NO
 
 FORMULA_FONTSIZE       = 10
 
-# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
-# generated for formulas are transparent PNGs. Transparent PNGs are not
-# supported properly for IE 6.0, but are supported on all modern browsers.
-#
-# Note that when changing this option you need to delete any form_*.png files in
-# the HTML output directory before the changes have effect.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_TRANSPARENT    = YES
-
 # The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
 # to create new LaTeX commands to be used in formulas as building blocks. See
 # the section "Including formulas" for details.
@@ -1607,11 +1813,29 @@ FORMULA_MACROFILE      =
 
 USE_MATHJAX            = NO
 
+# With MATHJAX_VERSION it is possible to specify the MathJax version to be used.
+# Note that the different versions of MathJax have different requirements with
+# regards to the different settings, so it is possible that also other MathJax
+# settings have to be changed when switching between the different MathJax
+# versions.
+# Possible values are: MathJax_2 and MathJax_3.
+# The default value is: MathJax_2.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_VERSION        = MathJax_2
+
 # When MathJax is enabled you can set the default output format to be used for
-# the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
+# the MathJax output. For more details about the output format see MathJax
+# version 2 (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3
+# (see:
+# http://docs.mathjax.org/en/latest/web/components/output.html).
 # Possible values are: HTML-CSS (which is slower, but has the best
-# compatibility), NativeMML (i.e. MathML) and SVG.
+# compatibility. This is the name for Mathjax version 2, for MathJax version 3
+# this will be translated into chtml), NativeMML (i.e. MathML. Only supported
+# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This
+# is the name for Mathjax version 3, for MathJax version 2 this will be
+# translated into HTML-CSS) and SVG.
 # The default value is: HTML-CSS.
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
@@ -1624,22 +1848,29 @@ MATHJAX_FORMAT         = HTML-CSS
 # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
 # Content Delivery Network so you can quickly see the result without installing
 # MathJax. However, it is strongly recommended to install a local copy of
-# MathJax from https://www.mathjax.org before deployment.
-# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/.
+# MathJax from https://www.mathjax.org before deployment. The default value is:
+# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2
+# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
 MATHJAX_RELPATH        = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/
 
 # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
 # extension names that should be enabled during MathJax rendering. For example
+# for MathJax version 2 (see
+# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions):
 # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# For example for MathJax version 3 (see
+# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html):
+# MATHJAX_EXTENSIONS = ams
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
 MATHJAX_EXTENSIONS     =
 
 # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
 # of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
 # example see the documentation.
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
@@ -1686,7 +1917,8 @@ SERVER_BASED_SEARCH    = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/).
+# Xapian (see:
+# https://xapian.org/).
 #
 # See the section "External Indexing and Searching" for details.
 # The default value is: NO.
@@ -1699,8 +1931,9 @@ EXTERNAL_SEARCH        = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/). See the section "External Indexing and
-# Searching" for details.
+# Xapian (see:
+# https://xapian.org/). See the section "External Indexing and Searching" for
+# details.
 # This tag requires that the tag SEARCHENGINE is set to YES.
 
 SEARCHENGINE_URL       =
@@ -1738,7 +1971,7 @@ EXTRA_SEARCH_MAPPINGS  =
 # If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
 # The default value is: YES.
 
-GENERATE_LATEX         = YES
+GENERATE_LATEX         = NO
 
 # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
 # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
@@ -1809,29 +2042,31 @@ PAPER_TYPE             = a4
 
 EXTRA_PACKAGES         =
 
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
-# generated LaTeX document. The header should contain everything until the first
-# chapter. If it is left blank doxygen will generate a standard header. See
-# section "Doxygen usage" for information on how to let doxygen write the
-# default header to a separate file.
+# The LATEX_HEADER tag can be used to specify a user-defined LaTeX header for
+# the generated LaTeX document. The header should contain everything until the
+# first chapter. If it is left blank doxygen will generate a standard header. It
+# is highly recommended to start with a default header using
+# doxygen -w latex new_header.tex new_footer.tex new_stylesheet.sty
+# and then modify the file new_header.tex. See also section "Doxygen usage" for
+# information on how to generate the default header that doxygen normally uses.
 #
-# Note: Only use a user-defined header if you know what you are doing! The
-# following commands have a special meaning inside the header: $title,
-# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
-# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
-# string, for the replacement values of the other commands the user is referred
-# to HTML_HEADER.
+# Note: Only use a user-defined header if you know what you are doing!
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. The following
+# commands have a special meaning inside the header (and footer): For a
+# description of the possible markers and block names see the documentation.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 LATEX_HEADER           =
 
-# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
-# generated LaTeX document. The footer should contain everything after the last
-# chapter. If it is left blank doxygen will generate a standard footer. See
+# The LATEX_FOOTER tag can be used to specify a user-defined LaTeX footer for
+# the generated LaTeX document. The footer should contain everything after the
+# last chapter. If it is left blank doxygen will generate a standard footer. See
 # LATEX_HEADER for more information on how to generate a default footer and what
-# special commands can be used inside the footer.
-#
-# Note: Only use a user-defined footer if you know what you are doing!
+# special commands can be used inside the footer. See also section "Doxygen
+# usage" for information on how to generate the default footer that doxygen
+# normally uses. Note: Only use a user-defined footer if you know what you are
+# doing!
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 LATEX_FOOTER           =
@@ -1864,18 +2099,26 @@ LATEX_EXTRA_FILES      =
 
 PDF_HYPERLINKS         = YES
 
-# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
-# the PDF file directly from the LaTeX files. Set this option to YES, to get a
-# higher quality PDF documentation.
+# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
+# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
+# files. Set this option to YES, to get a higher quality PDF documentation.
+#
+# See also section LATEX_CMD_NAME for selecting the engine.
 # The default value is: YES.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 USE_PDFLATEX           = YES
 
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
-# command to the generated LaTeX files. This will instruct LaTeX to keep running
-# if errors occur, instead of asking the user for help. This option is also used
-# when generating formulas in HTML.
+# The LATEX_BATCHMODE tag signals the behavior of LaTeX in case of an error.
+# Possible values are: NO same as ERROR_STOP, YES same as BATCH, BATCH In batch
+# mode nothing is printed on the terminal, errors are scrolled as if <return> is
+# hit at every error; missing files that TeX tries to input or request from
+# keyboard input (\read on a not open input stream) cause the job to abort,
+# NON_STOP In nonstop mode the diagnostic message will appear on the terminal,
+# but there is no possibility of user interaction just like in batch mode,
+# SCROLL In scroll mode, TeX will stop only for missing files to input or if
+# keyboard input is necessary and ERROR_STOP In errorstop mode, TeX will stop at
+# each error, asking for user intervention.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
@@ -1888,16 +2131,6 @@ LATEX_BATCHMODE        = NO
 
 LATEX_HIDE_INDICES     = NO
 
-# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
-# code with syntax highlighting in the LaTeX output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_SOURCE_CODE      = NO
-
 # The LATEX_BIB_STYLE tag can be used to specify the style to use for the
 # bibliography, e.g. plainnat, or ieeetr. See
 # https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
@@ -1906,14 +2139,6 @@ LATEX_SOURCE_CODE      = NO
 
 LATEX_BIB_STYLE        = plain
 
-# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
-# page will contain the date and time when the page was generated. Setting this
-# to NO can help when comparing the output of multiple runs.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_TIMESTAMP        = NO
-
 # The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
 # path from which the emoji images will be read. If a relative path is entered,
 # it will be relative to the LATEX_OUTPUT directory. If left blank the
@@ -1978,16 +2203,6 @@ RTF_STYLESHEET_FILE    =
 
 RTF_EXTENSIONS_FILE    =
 
-# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
-# with syntax highlighting in the RTF output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_SOURCE_CODE        = NO
-
 #---------------------------------------------------------------------------
 # Configuration options related to the man page output
 #---------------------------------------------------------------------------
@@ -2084,27 +2299,44 @@ GENERATE_DOCBOOK       = NO
 
 DOCBOOK_OUTPUT         = docbook
 
-# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
-# program listings (including syntax highlighting and cross-referencing
-# information) to the DOCBOOK output. Note that enabling this will significantly
-# increase the size of the DOCBOOK output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
-
-DOCBOOK_PROGRAMLISTING = NO
-
 #---------------------------------------------------------------------------
 # Configuration options for the AutoGen Definitions output
 #---------------------------------------------------------------------------
 
 # If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
-# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# AutoGen Definitions (see https://autogen.sourceforge.net/) file that captures
 # the structure of the code including all documentation. Note that this feature
 # is still experimental and incomplete at the moment.
 # The default value is: NO.
 
 GENERATE_AUTOGEN_DEF   = NO
 
+#---------------------------------------------------------------------------
+# Configuration options related to Sqlite3 output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_SQLITE3 tag is set to YES doxygen will generate a Sqlite3
+# database with symbols found by doxygen stored in tables.
+# The default value is: NO.
+
+GENERATE_SQLITE3       = NO
+
+# The SQLITE3_OUTPUT tag is used to specify where the Sqlite3 database will be
+# put. If a relative path is entered the value of OUTPUT_DIRECTORY will be put
+# in front of it.
+# The default directory is: sqlite3.
+# This tag requires that the tag GENERATE_SQLITE3 is set to YES.
+
+SQLITE3_OUTPUT         = sqlite3
+
+# The SQLITE3_OVERWRITE_DB tag is set to YES, the existing doxygen_sqlite3.db
+# database file will be recreated with each doxygen run. If set to NO, doxygen
+# will warn if an a database file is already found and not modify it.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_SQLITE3 is set to YES.
+
+SQLITE3_RECREATE_DB    = YES
+
 #---------------------------------------------------------------------------
 # Configuration options related to the Perl module output
 #---------------------------------------------------------------------------
@@ -2179,7 +2411,8 @@ SEARCH_INCLUDES        = YES
 
 # The INCLUDE_PATH tag can be used to specify one or more directories that
 # contain include files that are not input files but should be processed by the
-# preprocessor.
+# preprocessor. Note that the INCLUDE_PATH is not recursive, so the setting of
+# RECURSIVE has no effect here.
 # This tag requires that the tag SEARCH_INCLUDES is set to YES.
 
 INCLUDE_PATH           =
@@ -2246,15 +2479,15 @@ TAGFILES               =
 
 GENERATE_TAGFILE       =
 
-# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
-# the class index. If set to NO, only the inherited external classes will be
-# listed.
+# If the ALLEXTERNALS tag is set to YES, all external classes and namespaces
+# will be listed in the class and namespace index. If set to NO, only the
+# inherited external classes will be listed.
 # The default value is: NO.
 
 ALLEXTERNALS           = NO
 
 # If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
-# in the modules index. If set to NO, only the current project's groups will be
+# in the topic index. If set to NO, only the current project's groups will be
 # listed.
 # The default value is: YES.
 
@@ -2268,25 +2501,9 @@ EXTERNAL_GROUPS        = YES
 EXTERNAL_PAGES         = YES
 
 #---------------------------------------------------------------------------
-# Configuration options related to the dot tool
+# Configuration options related to diagram generator tools
 #---------------------------------------------------------------------------
 
-# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
-# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
-# NO turns the diagrams off. Note that this option also works with HAVE_DOT
-# disabled, but it is recommended to install and use dot, since it yields more
-# powerful graphs.
-# The default value is: YES.
-
-CLASS_DIAGRAMS         = YES
-
-# You can include diagrams made with dia in doxygen documentation. Doxygen will
-# then run dia to produce the diagram and insert it in the documentation. The
-# DIA_PATH tag allows you to specify the directory where the dia binary resides.
-# If left empty dia is assumed to be found in the default search path.
-
-DIA_PATH               =
-
 # If set to YES the inheritance and collaboration graphs will hide inheritance
 # and usage relations if the target is undocumented or is not a class.
 # The default value is: YES.
@@ -2295,10 +2512,10 @@ HIDE_UNDOC_RELATIONS   = YES
 
 # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
 # available from the path. This tool is part of Graphviz (see:
-# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# https://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
 # Bell Labs. The other options in this section have no effect if this option is
 # set to NO
-# The default value is: YES.
+# The default value is: NO.
 
 HAVE_DOT               = YES
 
@@ -2312,49 +2529,73 @@ HAVE_DOT               = YES
 
 DOT_NUM_THREADS        = 0
 
-# When you want a differently looking font in the dot files that doxygen
-# generates you can specify the font name using DOT_FONTNAME. You need to make
-# sure dot is able to find the font, which can be done by putting it in a
-# standard location or by setting the DOTFONTPATH environment variable or by
-# setting DOT_FONTPATH to the directory containing the font.
-# The default value is: Helvetica.
+# DOT_COMMON_ATTR is common attributes for nodes, edges and labels of
+# subgraphs. When you want a differently looking font in the dot files that
+# doxygen generates you can specify fontname, fontcolor and fontsize attributes.
+# For details please see <a href=https://graphviz.org/doc/info/attrs.html>Node,
+# Edge and Graph Attributes specification</a> You need to make sure dot is able
+# to find the font, which can be done by putting it in a standard location or by
+# setting the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
+# directory containing the font. Default graphviz fontsize is 14.
+# The default value is: fontname=Helvetica,fontsize=10.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_FONTNAME           = Helvetica
+DOT_COMMON_ATTR        = "fontname=Helvetica,fontsize=10"
 
-# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
-# dot graphs.
-# Minimum value: 4, maximum value: 24, default value: 10.
+# DOT_EDGE_ATTR is concatenated with DOT_COMMON_ATTR. For elegant style you can
+# add 'arrowhead=open, arrowtail=open, arrowsize=0.5'. <a
+# href=https://graphviz.org/doc/info/arrows.html>Complete documentation about
+# arrows shapes.</a>
+# The default value is: labelfontname=Helvetica,labelfontsize=10.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_FONTSIZE           = 10
+DOT_EDGE_ATTR          = "labelfontname=Helvetica,labelfontsize=10"
 
-# By default doxygen will tell dot to use the default font as specified with
-# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
-# the path where dot can find it using this tag.
+# DOT_NODE_ATTR is concatenated with DOT_COMMON_ATTR. For view without boxes
+# around nodes set 'shape=plain' or 'shape=plaintext' <a
+# href=https://www.graphviz.org/doc/info/shapes.html>Shapes specification</a>
+# The default value is: shape=box,height=0.2,width=0.4.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NODE_ATTR          = "shape=box,height=0.2,width=0.4"
+
+# You can set the path where dot can find font specified with fontname in
+# DOT_COMMON_ATTR and others dot attributes.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 DOT_FONTPATH           =
 
-# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
-# each documented class showing the direct and indirect inheritance relations.
-# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# If the CLASS_GRAPH tag is set to YES or GRAPH or BUILTIN then doxygen will
+# generate a graph for each documented class showing the direct and indirect
+# inheritance relations. In case the CLASS_GRAPH tag is set to YES or GRAPH and
+# HAVE_DOT is enabled as well, then dot will be used to draw the graph. In case
+# the CLASS_GRAPH tag is set to YES and HAVE_DOT is disabled or if the
+# CLASS_GRAPH tag is set to BUILTIN, then the built-in generator will be used.
+# If the CLASS_GRAPH tag is set to TEXT the direct and indirect inheritance
+# relations will be shown as texts / links.
+# Possible values are: NO, YES, TEXT, GRAPH and BUILTIN.
 # The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
 
 CLASS_GRAPH            = YES
 
 # If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
 # graph for each documented class showing the direct and indirect implementation
 # dependencies (inheritance, containment, and class references variables) of the
-# class with other documented classes.
+# class with other documented classes. Explicit enabling a collaboration graph,
+# when COLLABORATION_GRAPH is set to NO, can be accomplished by means of the
+# command \collaborationgraph. Disabling a collaboration graph can be
+# accomplished by means of the command \hidecollaborationgraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 COLLABORATION_GRAPH    = YES
 
 # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
-# groups, showing the direct groups dependencies.
+# groups, showing the direct groups dependencies. Explicit enabling a group
+# dependency graph, when GROUP_GRAPHS is set to NO, can be accomplished by means
+# of the command \groupgraph. Disabling a directory graph can be accomplished by
+# means of the command \hidegroupgraph. See also the chapter Grouping in the
+# manual.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2377,10 +2618,32 @@ UML_LOOK               = NO
 # but if the number exceeds 15, the total amount of fields shown is limited to
 # 10.
 # Minimum value: 0, maximum value: 100, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
+# This tag requires that the tag UML_LOOK is set to YES.
 
 UML_LIMIT_NUM_FIELDS   = 10
 
+# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
+# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
+# tag is set to YES, doxygen will add type and arguments for attributes and
+# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
+# will not generate fields with class member information in the UML graphs. The
+# class diagrams will look similar to the default class diagrams but using UML
+# notation for the relationships.
+# Possible values are: NO, YES and NONE.
+# The default value is: NO.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+DOT_UML_DETAILS        = NO
+
+# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
+# to display on a single line. If the actual line length exceeds this threshold
+# significantly it will wrapped across multiple lines. Some heuristics are apply
+# to avoid ugly line breaks.
+# Minimum value: 0, maximum value: 1000, default value: 17.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_WRAP_THRESHOLD     = 17
+
 # If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
 # collaboration graphs will show the relations between templates and their
 # instances.
@@ -2392,7 +2655,9 @@ TEMPLATE_RELATIONS     = NO
 # If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
 # YES then doxygen will generate a graph for each documented file showing the
 # direct and indirect include dependencies of the file with other documented
-# files.
+# files. Explicit enabling an include graph, when INCLUDE_GRAPH is is set to NO,
+# can be accomplished by means of the command \includegraph. Disabling an
+# include graph can be accomplished by means of the command \hideincludegraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2401,7 +2666,10 @@ INCLUDE_GRAPH          = YES
 # If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
 # set to YES then doxygen will generate a graph for each documented file showing
 # the direct and indirect include dependencies of the file with other documented
-# files.
+# files. Explicit enabling an included by graph, when INCLUDED_BY_GRAPH is set
+# to NO, can be accomplished by means of the command \includedbygraph. Disabling
+# an included by graph can be accomplished by means of the command
+# \hideincludedbygraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2441,28 +2709,36 @@ GRAPHICAL_HIERARCHY    = YES
 # If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
 # dependencies a directory has on other directories in a graphical way. The
 # dependency relations are determined by the #include relations between the
-# files in the directories.
+# files in the directories. Explicit enabling a directory graph, when
+# DIRECTORY_GRAPH is set to NO, can be accomplished by means of the command
+# \directorygraph. Disabling a directory graph can be accomplished by means of
+# the command \hidedirectorygraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 DIRECTORY_GRAPH        = YES
 
+# The DIR_GRAPH_MAX_DEPTH tag can be used to limit the maximum number of levels
+# of child directories generated in directory dependency graphs by dot.
+# Minimum value: 1, maximum value: 25, default value: 1.
+# This tag requires that the tag DIRECTORY_GRAPH is set to YES.
+
+DIR_GRAPH_MAX_DEPTH    = 10
+
 # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
 # generated by dot. For an explanation of the image formats see the section
 # output formats in the documentation of the dot tool (Graphviz (see:
-# http://www.graphviz.org/)).
+# https://www.graphviz.org/)).
 # Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
 # to make the SVG files visible in IE 9+ (other browsers do not have this
 # requirement).
-# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd,
-# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo,
-# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo,
+# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo,
 # png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
 # png:gdiplus:gdiplus.
 # The default value is: png.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_IMAGE_FORMAT       = png
+DOT_IMAGE_FORMAT       = svg
 
 # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
 # enable generation of interactive SVG images that allow zooming and panning.
@@ -2489,11 +2765,12 @@ DOT_PATH               =
 
 DOTFILE_DIRS           =
 
-# The MSCFILE_DIRS tag can be used to specify one or more directories that
-# contain msc files that are included in the documentation (see the \mscfile
-# command).
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
 
-MSCFILE_DIRS           =
+DIA_PATH               =
 
 # The DIAFILE_DIRS tag can be used to specify one or more directories that
 # contain dia files that are included in the documentation (see the \diafile
@@ -2502,10 +2779,10 @@ MSCFILE_DIRS           =
 DIAFILE_DIRS           =
 
 # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
-# path where java can find the plantuml.jar file. If left blank, it is assumed
-# PlantUML is not used or called during a preprocessing step. Doxygen will
-# generate a warning when it encounters a \startuml command in this case and
-# will not generate output for the diagram.
+# path where java can find the plantuml.jar file or to the filename of jar file
+# to be used. If left blank, it is assumed PlantUML is not used or called during
+# a preprocessing step. Doxygen will generate a warning when it encounters a
+# \startuml command in this case and will not generate output for the diagram.
 
 PLANTUML_JAR_PATH      =
 
@@ -2541,19 +2818,7 @@ DOT_GRAPH_MAX_NODES    = 50
 # Minimum value: 0, maximum value: 1000, default value: 0.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-MAX_DOT_GRAPH_DEPTH    = 0
-
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is disabled by default, because dot on Windows does not seem
-# to support this out of the box.
-#
-# Warning: Depending on the platform used, enabling this option may lead to
-# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
-# read).
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_TRANSPARENT        = NO
+MAX_DOT_GRAPH_DEPTH    = 1000
 
 # Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
 # files in one run (i.e. multiple -o and -T options on the command line). This
@@ -2567,14 +2832,34 @@ DOT_MULTI_TARGETS      = NO
 # If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
 # explaining the meaning of the various boxes and arrows in the dot generated
 # graphs.
+# Note: This tag requires that UML_LOOK isn't set, i.e. the doxygen internal
+# graphical representation for inheritance and collaboration diagrams is used.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 GENERATE_LEGEND        = YES
 
-# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
 # files that are used to generate the various graphs.
+#
+# Note: This setting is not only used for dot files but also for msc temporary
+# files.
 # The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
 
 DOT_CLEANUP            = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. If the MSCGEN_TOOL tag is left empty (the default), then doxygen will
+# use a built-in version of mscgen tool to produce the charts. Alternatively,
+# the MSCGEN_TOOL tag can also specify the name an external tool. For instance,
+# specifying prog as the value, doxygen will call the tool as prog -T
+# <outfile_format> -o <outputfile> <inputfile>. The external tool should support
+# output file formats "png", "eps", "svg", and "ismap".
+
+MSCGEN_TOOL            =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
diff --git a/README.md b/README.md
index 04c7b886..a053f78d 100644
--- a/README.md
+++ b/README.md
@@ -2,4 +2,7 @@
 An alignment-based, generalized structural variant caller for long-read
 sequencing/mapping data 
 
-[![build tests](https://github.com/WGLab/ContextSV/actions/workflows/build-tests.yml/badge.svg)](https://github.com/WGLab/ContextSV/actions/workflows/build-tests.yml)
\ No newline at end of file
+[![build
+tests](https://github.com/WGLab/ContextSV/actions/workflows/build-tests.yml/badge.svg)](https://github.com/WGLab/ContextSV/actions/workflows/build-tests.yml)
+
+Documentation available at [https://wglab.openbioinformatics.org/ContextSV](https://wglab.openbioinformatics.org/ContextSV)
diff --git a/__main__.py b/__main__.py
index 137e7109..4d946d5f 100644
--- a/__main__.py
+++ b/__main__.py
@@ -3,52 +3,222 @@
 """
 
 import os
+import sys
 import argparse
+import logging as log
 from lib import contextsv
+from python import cnv_plots
+
+# Set up logging.
+log.basicConfig(
+    level=log.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    handlers=[
+        log.StreamHandler(sys.stdout)
+    ]
+)
 
 def main():
+    """Entry point and user interface for the program."""
 
     # Grab the command line arguments using argparse.
     parser = argparse.ArgumentParser(
         description="ContextSV: A tool for integrative structural variant detection."
     )
+
+    # Add common arguments.
+    parser.add_argument(
+        "-r", "--region",
+        help="The region to analyze.",
+        required=True
+    )
+
+    parser.add_argument(
+        "-o", "--output",
+        help="The path to the output directory.",
+        required=True
+    )
+
+    parser.add_argument(
+        "-v", "--version",
+        help="Print the version number and exit.",
+        action="version",
+        version="%(prog)s 0.0.1"
+    )
+
+    # Thread count.
+    parser.add_argument(
+        "-t", "--threads",
+        help="The number of threads to use.",
+        required=False,
+        default=1,
+        type=int
+    )
+
+    # CNV data path.
+    parser.add_argument(
+        "--cnv",
+        help="The path to the CNV data in TSV format.",
+        required=False
+    )
+
+    # Mode 1: SV detection mode.
     parser.add_argument(
         "-b", "--bam",
         help="The path to the BAM file.",
-        required=True
+        required=False
+    )
+    parser.add_argument(
+        "-g", "--reference",
+        help="The path to the reference genome.",
+        required=False
     )
     parser.add_argument(
         "-s", "--snps",
         help="The path to the SNPs file.",
-        required=True
+        required=False
     )
+
+    # PFB file of population allele frequencies.
     parser.add_argument(
-        "-o", "--output",
-        help="The path to the output file.",
-        required=True
+        "-p", "--pfb",
+        help="The path to the PFB file of population allele frequencies.",
+        required=False
     )
+
+    # HMM file path.
     parser.add_argument(
-        "-r", "--region",
-        help="The region to analyze.",
-        required=True
+        "--hmm",
+        help="The path to the HMM file.",
+        required=False
+    )
+
+    # Pass in chromosome mean coverage data for speediness.
+    parser.add_argument(
+        "--chr-cov",
+        help="Chromosome mean coverage values passed in as a comma-separated list (e.g. chr1:100,chr2:200,chr3:300).",
+        required=False
+    )
+
+    # Turn off CIGAR string SV detection. This is for debugging purposes (speeds
+    # up the program).
+    parser.add_argument(
+        "--disable-cigar",
+        help="Turn off CIGAR string SV detection.",
+        required=False,
+        action="store_true",
+        default=False
+    )
+
+    # Mode 2: CNV plots mode. If only the VCF file is provided, then the program
+    # will run in this mode.
+    parser.add_argument(
+        "--vcf",
+        help="The path to the VCF file of SV calls.",
+        required=False
     )
     
     # Get the command line arguments.
     args = parser.parse_args()
 
-    # Run the program.
-    print("Running contextsv with the following arguments:")
-    print("BAM: {}".format(args.bam))
-    print("SNPs: {}".format(args.snps))
-    print("Output: {}".format(args.output))
-    print("Region: {}".format(args.region))
-    
-    contextsv.run(
-        args.bam,
-        args.snps,
-        args.output,
-        args.region
-    )
+    # Remove commas or spaces from the region.
+    region = args.region
+    region = region.replace(",", "")
+    region = region.replace(" ", "")
+
+    # Determine the selected program mode (SV detection or CNV plots).
+    if (args.vcf is not None):
+        # Run SV analysis mode.
+        
+        # Ensure that the CNV data path is provided.
+        arg_error = False
+        if (args.cnv is None):
+            log.error("Please provide the CNV data path.")
+            arg_error = True
+
+        # Exit if there are any errors.
+        if (arg_error):
+            sys.exit(1)
+
+        # Create the output directory if it doesn't exist.
+        if (not os.path.exists(args.output)):
+            os.makedirs(args.output)
+
+        # Set the data paths from user input for downstream analysis.
+        vcf_path = args.vcf
+        cnv_data_path = args.cnv
+        output_dir = args.output
+
+        log.info("Analyzing SV calls in VCF file %s...", args.vcf)
+
+    else:
+        # Run SV detection mode.
+
+        # Ensure BAM, reference, and SNPs files are provided.
+        arg_error = False
+        if (args.bam is None):
+            log.error("Please provide the BAM file.")
+            arg_error = True
+
+        if (args.reference is None):
+            log.error("Please provide the reference genome.")
+            arg_error = True
+
+        if (args.snps is None):
+            log.error("Please provide the SNPs file.")
+            arg_error = True
+
+        # Exit if there are any errors.
+        if (arg_error):
+            # Exit with error code 1.
+            sys.exit(1)
+
+        # Set all None values to empty strings.
+        for key, value in vars(args).items():
+            if value is None:
+                setattr(args, key, "")
+
+            else:
+                log.info("Setting %s to %s", key, value)
+        
+        # Loop and set all None values to empty strings.
+        for key, value in vars(args).items():
+            if value is None:
+                setattr(args, key, "")
+            
+        log.info("Chromosome mean coverage: %s", args.chr_cov)
+        log.info("PFB filepath: %s", args.pfb)
+
+        # Set input parameters.
+        input_data = contextsv.InputData()
+        input_data.setBAMFilepath(args.bam)
+        input_data.setRefGenome(args.reference)
+        input_data.setSNPFilepath(args.snps)
+        input_data.setRegion(args.region)
+        input_data.setThreadCount(args.threads)
+        input_data.setChrCov(args.chr_cov)
+        input_data.setPFBFilepath(args.pfb)
+        input_data.setHMMFilepath(args.hmm)
+        input_data.setOutputDir(args.output)
+        input_data.setDisableCIGAR(args.disable_cigar)
+        input_data.setCNVFilepath(args.cnv)
+
+        # Run the analysis.
+        contextsv.run(input_data)
+
+        # Determine the data paths for downstream analysis.
+        vcf_path = os.path.join(args.output, "sv_calls.vcf")
+        output_dir = args.output
+
+        if (args.cnv == ""):
+            cnv_data_path = os.path.join(args.output, "cnv_data.tsv")
+        else:
+            cnv_data_path = args.cnv
+
+    # Run the python-based analysis.
+    log.info("Running python-based analysis...")
+    cnv_plots.run(vcf_path, cnv_data_path, output_dir, region)
+    log.info("Done.")
 
 
 if __name__ == '__main__':
diff --git a/data/chr_cov.txt b/data/chr_cov.txt
new file mode 100644
index 00000000..89fe909d
--- /dev/null
+++ b/data/chr_cov.txt
@@ -0,0 +1,3 @@
+# This text file includes chromosome coverage values for speeding up runtime
+chr3=39.561
+chr6=39.4096
diff --git a/data/hhall.hmm b/data/hhall.hmm
new file mode 100644
index 00000000..139e6361
--- /dev/null
+++ b/data/hhall.hmm
@@ -0,0 +1,36 @@
+M=6
+N=6
+A:
+0.936719716 0.006332139 0.048770575 0.000000001 0.008177573 0.000000001 
+0.000801036 0.949230924 0.048770575 0.000000001 0.001168245 0.000029225 
+0.000004595 0.000047431 0.999912387 0.000000001 0.000034971 0.000000621 
+0.000049998 0.000049998 0.000049998 0.999750015 0.000049998 0.000049998 
+0.000916738 0.001359036 0.048770575 0.000000001 0.948953653 0.000000002 
+0.000000001 0.000000001 0.027257213 0.000000001 0.000000004 0.972742785 
+B:
+0.950000 0.000001 0.050000 0.000001 0.000001 0.000001 
+0.000001 0.950000 0.050000 0.000001 0.000001 0.000001 
+0.000001 0.000001 0.999995 0.000001 0.000001 0.000001 
+0.000001 0.000001 0.050000 0.950000 0.000001 0.000001 
+0.000001 0.000001 0.050000 0.000001 0.950000 0.000001 
+0.000001 0.000001 0.050000 0.000001 0.000001 0.950000 
+pi:
+0.000001 0.000500 0.999000 0.000001 0.000500 0.000001 
+B1_mean:
+-3.527211 -0.664184 0.000000 100.000000 0.395621 0.678345 
+B1_sd:
+1.329152 0.284338 0.159645 0.211396 0.209089 0.191579 
+B1_uf:
+0.010000
+B2_mean:
+0.000000 0.250000 0.333333 0.500000 0.500000 
+B2_sd:
+0.016372 0.042099 0.045126 0.034982 0.304243 
+B2_uf:
+0.010000
+B3_mean:
+-2.051407 -0.572210 0.000000 0.000000 0.361669 0.626711 
+B3_sd:
+2.132843 0.382025 0.184001 0.200297 0.253551 0.353183 
+B3_uf:
+0.010000
diff --git a/data/hhall_loh.hmm b/data/hhall_loh.hmm
new file mode 100644
index 00000000..2c7a2ff7
--- /dev/null
+++ b/data/hhall_loh.hmm
@@ -0,0 +1,36 @@
+M=6
+N=6
+A:
+0.936719716 0.006332139 0.048770575 0.000000001 0.008177573 0.000000001 
+0.000801036 0.949230924 0.048770575 0.000000001 0.001168245 0.000029225 
+0.000004595 0.000047431 0.999912387 0.000000001 0.000034971 0.000000621 
+0.000049998 0.000049998 0.000049998 0.999750015 0.000049998 0.000049998 
+0.000916738 0.001359036 0.048770575 0.000000001 0.948953653 0.000000002 
+0.000000001 0.000000001 0.027257213 0.000000001 0.000000004 0.972742785 
+B:
+0.950000 0.000001 0.050000 0.000001 0.000001 0.000001 
+0.000001 0.950000 0.050000 0.000001 0.000001 0.000001 
+0.000001 0.000001 0.999995 0.000001 0.000001 0.000001 
+0.000001 0.000001 0.050000 0.950000 0.000001 0.000001 
+0.000001 0.000001 0.050000 0.000001 0.950000 0.000001 
+0.000001 0.000001 0.050000 0.000001 0.000001 0.950000 
+pi:
+0.000001 0.000500 0.999000 0.000001 0.000500 0.000001 
+B1_mean:
+-3.527211 -0.664184 0.000000 0.000000 0.395621 0.678345 
+B1_sd:
+1.329152 0.284338 0.159645 0.211396 0.209089 0.191579 
+B1_uf:
+0.010000
+B2_mean:
+0.000000 0.250000 0.333333 0.500000 0.500000 
+B2_sd:
+0.016372 0.042099 0.045126 0.034982 0.304243 
+B2_uf:
+0.010000
+B3_mean:
+-2.051407 -0.572210 0.000000 0.000000 0.361669 0.626711 
+B3_sd:
+2.132843 0.382025 0.184001 0.200297 0.253551 0.353183 
+B3_uf:
+0.010000
diff --git a/docs/genome-assembly-guide.md b/docs/genome-assembly-guide.md
new file mode 100644
index 00000000..c37821b9
--- /dev/null
+++ b/docs/genome-assembly-guide.md
@@ -0,0 +1,43 @@
+# Hybrid Optical Map-Guided Assembly
+
+## Download Test Data
+Input data is from GIAB: Oxford Nanopore ultralong (guppy-V3.2.4_2020-01-22)
+
+https://github.com/genome-in-a-bottle/giab_data_indexes
+
+## De novo Assembly
+We will perform de novo assembly of the hg002 data using miniasm (instructions from https://github.com/lh3/miniasm)
+
+- Also see Optical Kermit's optical map guided assembly script, which uses the
+same tools and approach (https://github.com/Denopia/kermit-optical-maps/blob/master/genome-assembly.sh)
+
+### Set up the software environment:
+
+```
+export PATH="$software_dir"/minimap2:$PATH
+export PATH="$software_dir"/miniasm:$PATH
+```
+
+### Use minimap2 to self-align the hg002 data:
+
+```echo "Running minimap2..."
+minimap2 -x ava-ont -t 12 input_data/HG002_hs37d5_ONT_GIAB.fastq input_data/HG002_hs37d5_ONT_GIAB.fastq | gzip -1 > output/HG002_hs37d5_ONT_GIAB.paf.gz
+```
+
+### Use miniasm to assemble the self-aligned data:
+
+```
+echo "Running miniasm..."
+miniasm -f inputs/HG002_hs37d5_ONT_GIAB.fastq outputs/HG002_hs37d5_ONT_GIAB.paf.gz > outputs/HG002_hs37d5_ONT_GIAB.gfa
+```
+
+### Convert GFA file to FASTA for downstream analysis 
+(instructions from https://www.biostars.org/p/169516/)
+
+```
+echo "Converting GFA to FASTA..."
+awk '/^S/{print ">"$2"\n"$3}' outputs/HG002_hs37d5_ONT_GIAB.gfa > outputs/HG002_hs37d5_ONT_GIAB.fasta
+echo "Complete."
+```
+
+This yields an assembly which can be used for input into BioNano's hybrid scaffolding pipeline to generate a more accurate hybrid assembly using the BioNano data.
diff --git a/doc/OUTLINE.md b/docs/project-overview.md
similarity index 80%
rename from doc/OUTLINE.md
rename to docs/project-overview.md
index f10c4872..995ea731 100644
--- a/doc/OUTLINE.md
+++ b/docs/project-overview.md
@@ -1,44 +1,40 @@
-# ContextSV Outline
-
-## External dependencies
-
-| Dependency | Use | Installation |
-| - | - | - |
-| HTSLib     | Reading BAM files.              | [https://www.biostars.org/p/328831/](https://www.biostars.org/p/328831/) <br /> http://www.htslib.org/download/ |
-| Tensorflow | Importing the SV scoring model. | [https://www.tensorflow.org/install/lang_c](https://www.tensorflow.org/install/lang_c)|
-
-## SV scoring model
-The SV scoring model is trained and saved as a Tensorflow weighted graph in PB file format. It can be trained via the [Python API](https://www.tensorflow.org/api_docs/python/tf). The [C++ API](https://www.tensorflow.org/api_docs/cc) is used to import and utilize this model for scoring SVs.
-
-### Training the model
-
-## Project structure
-
-```
-ContextSV
-├── include
-│   └── ContextSV.h
-├── src
-│   └── ContextSV.cpp
-└── tests
-│   ├── BasicTests.cpp
-│   ├── AccuracyTests.cpp
-│   └── ScoringModelTests.cpp
-└── lib
-    ├── tensorflow
-    |   ├── LICENSE
-    │   ├── include
-    │   │   └── ...
-    │   └── lib
-    │       └── ...
-    └── htslib
-        ├── LICENSE
-        ├── include
-        │   └── ...
-        └── lib
-            └── ...
-```
-
-## Tasks by order
-- [x] Draft project structure
-- [ ] CLI with BAM file input
+# Project Overview
+
+## External dependencies
+
+| Dependency | Use | Installation |
+| - | - | - |
+| HTSLib     | Reading BAM files.              | [https://www.biostars.org/p/328831/](https://www.biostars.org/p/328831/) <br /> http://www.htslib.org/download/ |
+| scikit-learn | Importing the SV scoring model. | [https://www.tensorflow.org/install/lang_c](https://www.tensorflow.org/install/lang_c)|
+
+## SV scoring model
+The SV scoring model is trained and saved as a Tensorflow weighted graph in PB file format. It can be trained via the [Python API](https://www.tensorflow.org/api_docs/python/tf). The [C++ API](https://www.tensorflow.org/api_docs/cc) is used to import and utilize this model for scoring SVs.
+
+### Training the model
+
+## Project structure
+
+```
+ContextSV
+├── include
+│   └── ContextSV.h
+├── src
+│   └── ContextSV.cpp
+└── tests
+│   ├── BasicTests.cpp
+│   ├── AccuracyTests.cpp
+│   └── ScoringModelTests.cpp
+└── lib
+    ├── tensorflow
+    |   ├── LICENSE
+    │   ├── include
+    │   │   └── ...
+    │   └── lib
+    │       └── ...
+    └── htslib
+        ├── LICENSE
+        ├── include
+        │   └── ...
+        └── lib
+            └── ...
+```
diff --git a/environment.yml b/environment.yml
index 726c3c89..8820b657 100644
--- a/environment.yml
+++ b/environment.yml
@@ -10,3 +10,7 @@ dependencies:
   - htslib
   - swig
   - pytest
+  - plotly
+  - pandas
+  - scikit-learn
+  - joblib
diff --git a/include/cli.h b/include/cli.h
deleted file mode 100644
index 716c8cb1..00000000
--- a/include/cli.h
+++ /dev/null
@@ -1,15 +0,0 @@
-//
-// cli.h:
-// Command-line interface entrypoint.
-//
-
-#ifndef CONTEXTSV_CLI_H
-#define CONTEXTSV_CLI_H
-
-#include "common.h"
-
-#include <string>
-
-int run(std::string bam, std::string snps, std::string outdir, std::string region);
-
-#endif //CONTEXTSV_CLI_H
diff --git a/include/cnv_caller.h b/include/cnv_caller.h
index 2672e86d..4ad68927 100644
--- a/include/cnv_caller.h
+++ b/include/cnv_caller.h
@@ -1,37 +1,34 @@
-//
-// cnv_caller.h:
-// Detect CNVs
-//
+// CNVCaller: Detect CNVs and return the state sequence by SNP position
+// (key = [chromosome, SNP position], value = state)
 
 #ifndef CNV_CALLER_H
 #define CNV_CALLER_H
 
 #include "khmm.h"
-#include "common.h"
+#include "input_data.h"
+#include "cnv_data.h"
+#include "types.h"
 
+/// @cond
 #include <string>
 #include <vector>
+/// @endcond
 
-struct RegionCoverage {
-    bool valid  = false;
-    uint64_t length  = 0;
-    double mean = 0;
-    double baf  = 0;
-};
 
 class CNVCaller {
     private:
-        Common common;
+        InputData* input_data;
         double chr_mean_coverage = 0;
 
     public:
-        CNVCaller(Common common);
+        CNVCaller(InputData& input_data);
 
-        // Detect CNVs
-		std::vector<double> run();
+        // Detect CNVs and return the state sequence by SNP position
+        // (key = [chromosome, SNP position], value = state)
+		CNVData run();
 
-        // Calculate Log R Ratios
-		std::vector<double> calculateLogRRatiosAtSNPS(std::vector<int> snp_positions);
+        // Calculate coverage log2 ratios at SNP positions
+		std::vector<double> calculateLog2RatioAtSNPS(std::vector<int64_t> snp_positions);
 
         // Calculate the mean chromosome coverage
         double calculateMeanChromosomeCoverage();
@@ -40,10 +37,17 @@ class CNVCaller {
         double calculateWindowLogRRatio(double mean_chr_cov, int start_pos, int end_pos);
 
         // Read SNP positions and BAF values from the VCF file
-        std::pair<std::vector<int>, std::vector<double>> readSNPBAFs();
+        SNPData readSNPBAFs(std::string filepath);
+
+        // Read SNP population frequencies from the PFB file and return a vector
+        // of population frequencies for each SNP location
+        std::vector<double> getSNPPopulationFrequencies(std::vector<int64_t> snp_locations);
+
+        // Save a BED file with predicted copy number states
+        void saveToBED(SNPData& snp_data, std::string filepath);
 
-        // Save a CSV with SNP positions, BAF values and Log R Ratios
-        void saveSNPLRRBAFCSV(std::string filepath, std::vector<int> snp_positions, std::vector<double> bafs, std::vector<double> logr_ratios, std::vector<int> state_sequence);
+        // Save a TSV with B-allele frequencies, log 2 ratios, and copy number predictions
+        void saveToTSV(SNPData& snp_data, std::string filepath);
 };
 
 #endif // CNV_CALLER_H
diff --git a/include/cnv_data.h b/include/cnv_data.h
new file mode 100644
index 00000000..39e33cd4
--- /dev/null
+++ b/include/cnv_data.h
@@ -0,0 +1,26 @@
+#ifndef CNV_DATA_H
+#define CNV_DATA_H
+
+#include "types.h"
+
+/// @cond
+#include <string>
+/// @endcond
+
+
+class CNVData {
+    private:
+        SNPToCNVMap cnv_calls;  // Map of SNP positions to CNV types
+
+    public:
+        // Add a CNV call to the map
+        void addCNVCall(std::string chr, int snp_pos, int cnv_type);
+
+        // Get the most common CNV type within the SV region start and end positions
+        int getMostCommonCNV(std::string chr, int start, int end);
+
+        // Load CNV calls from file
+        void loadFromFile(std::string filepath);
+};
+
+#endif // CNV_DATA_H
diff --git a/include/common.h b/include/common.h
deleted file mode 100644
index 24689b2a..00000000
--- a/include/common.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//
-// common.h:
-// Manage common parameters and functions
-
-#ifndef CONTEXTSV_COMMON_H
-#define CONTEXTSV_COMMON_H
-
-#include <string>
-#include <vector>
-#include <map>
-
-class Common {
-    public:
-        std::string get_bam_filepath();
-        void set_bam_filepath(std::string filepath);
-        std::string get_ref_filepath();
-		void set_ref_filepath(std::string filepath);
-        std::string get_output_dir();
-		void set_output_dir(std::string dirpath);
-        std::string get_region();
-		void set_region(std::string region);
-        int get_window_size();
-		void set_window_size(int window_size);
-        std::string get_snp_vcf_filepath();
-        void set_snp_vcf_filepath(std::string filepath);
-        std::string get_region_chr();
-        int get_region_start();
-        int get_region_end();
-        bool get_region_set();
-        void print_progress(int progress, int total);
-
-    private:
-        std::string bam_filepath = "";
-        std::string ref_filepath = "";
-        std::string output_dir   = "";
-        std::string region = "";
-        int window_size = 10000;
-        std::string snp_vcf_filepath = "";
-        std::string region_chr = "";
-        int region_start = 0;
-        int region_end   = 0;
-        bool region_set = false;
-};
-
-#endif //CONTEXTSV_COMMON_H
diff --git a/include/contextsv.h b/include/contextsv.h
index d5304c1e..b90877ce 100644
--- a/include/contextsv.h
+++ b/include/contextsv.h
@@ -1,15 +1,28 @@
 //
-// contextsv.h
-// Main header file for the contextsv library.
+// contextsv.h:
+// Main class for ContextSV
 //
 
 #ifndef CONTEXTSV_H
 #define CONTEXTSV_H
 
-#include "khmm.h"
-#include "common.h"
+#include "input_data.h"
+#include "cnv_data.h"
+#include "sv_data.h"
 
-#include <string>
-#include <vector>
 
-#endif // CONTEXTSV_H
+class ContextSV {
+	private:
+		InputData* input_data;
+
+		// Label SVs based on CNV calls
+		void labelCNVs(CNVData cnv_calls, SVData& sv_calls);
+
+	public:
+		ContextSV(InputData& input_data);
+
+		// Entry point
+		int run();
+};
+
+#endif  // CONTEXTSV_H
diff --git a/include/fasta_query.h b/include/fasta_query.h
new file mode 100644
index 00000000..4050aff5
--- /dev/null
+++ b/include/fasta_query.h
@@ -0,0 +1,25 @@
+// FASTAQuery: A class for querying a FASTA file.
+
+#ifndef FASTA_QUERY_H
+#define FASTA_QUERY_H
+
+/// @cond
+#include <string>
+#include <map>
+/// @endcond
+
+class FASTAQuery {
+    private:
+        std::string fasta_filepath;
+        std::map<std::string, std::string> chr_to_seq;
+
+    public:
+        int setFilepath(std::string fasta_filepath);
+        std::string getFilepath();
+        std::string query(std::string chr, int64_t pos_start, int64_t pos_end);
+
+        // Get the chromosome contig lengths in VCF header format
+        std::string getContigHeader();
+};
+
+#endif // FASTA_QUERY_H
diff --git a/include/input_data.h b/include/input_data.h
new file mode 100644
index 00000000..9af673a4
--- /dev/null
+++ b/include/input_data.h
@@ -0,0 +1,93 @@
+//
+// common.h:
+// Manage common types, parameters, and functions
+
+#ifndef INPUT_DATA_H
+#define INPUT_DATA_H
+
+#include "fasta_query.h"
+
+/// @cond
+#include <string>
+#include <vector>
+#include <map>
+/// @endcond
+
+class InputData {
+    public:
+        InputData();
+
+        // Set the filepath to the BAM file with long read alignments for SV detection.
+        void setBAMFilepath(std::string filepath);
+        std::string getBAMFilepath();
+
+        // Set the filepath to the HMM parameters.
+        void setHMMFilepath(std::string filepath);
+        std::string getHMMFilepath();
+
+        // Set the filepath to the reference genome FASTA file.
+		void setRefGenome(std::string fasta_filepath);
+        FASTAQuery getRefGenome();
+
+        // Set the filepath to the tab-delimited file with SNP population frequencies.
+        void setPFBFilepath(std::string filepath);
+        std::string getPFBFilepath();
+        
+        // Set the filepath to the VCF file with SNP calls used for CNV
+        // detection with the HMM.
+        void setSNPFilepath(std::string filepath);
+        std::string getSNPFilepath();
+
+        // Set the genomic region to analyze.
+		void setRegion(std::string region);
+        std::string getRegion();
+        std::string getRegionChr();
+        int getRegionStart();
+        int getRegionEnd();
+        bool getRegionSet();
+
+        // Set the window size for the log2 ratio calculation.
+        void setWindowSize(int window_size);
+        int getWindowSize();
+
+        // Set entire-chromosome mean coverage values to speed up the log2 ratio calculations.
+        void setChrCov(std::string chr_cov);
+        int getChrCov(std::string chr, double& cov);
+
+        // Set the output directory where the results will be written.
+        void setOutputDir(std::string dirpath);
+        std::string getOutputDir();
+
+        // Set the number of threads to use when parallelization is possible.
+        void setThreadCount(int thread_count);
+        int getThreadCount();
+
+        // Disable CIGAR string SV calling. This is useful for testing.
+        void setDisableCIGAR(bool disable_cigar);
+        bool getDisableCIGAR();
+
+        // Set the filepath to the TSV file with the CNV predictions.
+        void setCNVFilepath(std::string filepath);
+        std::string getCNVFilepath();
+        
+    private:
+        std::string bam_filepath;
+        std::string ref_filepath;
+        std::string snp_vcf_filepath;
+        std::string pfb_filepath;
+        FASTAQuery fasta_query;
+        std::string output_dir;
+        std::string region;
+        int window_size;
+        std::string region_chr;
+        int region_start;
+        int region_end;
+        bool region_set;
+        std::map<std::string, double> chr_cov;
+        int thread_count;
+        std::string hmm_filepath;
+        bool disable_cigar;
+        std::string cnv_filepath;
+};
+
+#endif // INPUT_DATA_H
diff --git a/include/integrative_caller.h b/include/integrative_caller.h
deleted file mode 100644
index 4f6e23ea..00000000
--- a/include/integrative_caller.h
+++ /dev/null
@@ -1,36 +0,0 @@
-//
-// integrative_caller.h:
-// Integrate SV calls
-//
-
-#ifndef CONTEXTSV_INTEGRATIVE_CALLER_H
-#define CONTEXTSV_INTEGRATIVE_CALLER_H
-
-#include "common.h"
-
-#include <string>
-
-class IntegrativeCaller {
-	private:
-		Common common;
-
-	public:
-		IntegrativeCaller(Common common);
-
-		// Entry point
-		int run();
-
-		// Check if the bam file uses chr prefix notation
-		int bamHasChrPrefix(std::string filepath, bool& uses_chr_prefix);
-
-		// Set the bam file path
-		void set_bam_filepath(std::string bam_filepath);
-
-		// Set the reference file path
-		void set_ref_filepath(std::string ref_filepath);
-        void set_output_dir(std::string output_dir);
-        void set_region(std::string region);
-        void set_window_size(int window_size);
-};
-
-#endif
diff --git a/include/kc.h b/include/kc.h
index 0c9bedbc..a47e393b 100644
--- a/include/kc.h
+++ b/include/kc.h
@@ -1,5 +1,10 @@
+#ifndef KC_H
+#define KC_H
 
+/// @cond
 #include <stdio.h>
+/// @endcond
+
 // #include "EXTERN.h"   /* std perl include */
 // #include "perl.h"     /* std perl include */
 // #include "XSUB.h"     /* XSUB include */
@@ -234,7 +239,7 @@ double beta(double z, double w);
 double betai(double a, double b, double x);
 double betacf(double a, double b, double x);
 double inverff (double x);
-double erf(double x);
+double errorf (double x);
 
 /****************
 random number
@@ -249,3 +254,5 @@ double random_stdnormal (int *idum);
 3rd party code
 ******************/
 double SNPHWE(int obs_hets, int obs_hom1, int obs_hom2);
+
+#endif  /* KC_H */
diff --git a/include/khmm.h b/include/khmm.h
index 179abf80..416c543d 100644
--- a/include/khmm.h
+++ b/include/khmm.h
@@ -1,11 +1,14 @@
 #ifndef _KHMM_H
 #define _KHMM_H
 
+/// @cond
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
-#include <string.h>
+#include <string>
 #include <vector>
+#include <map>
+/// @endcond
 
 typedef struct {
 	int N;			/* number of states;  Q={1,2,...,N} */
@@ -38,47 +41,19 @@ typedef struct {
 /// Read an HMM from a file
 CHMM ReadCHMM (const char *filename);
 
-/// Generate an HMM with default parameters
-CHMM GetDefaultCHMM();
-
-void PrintCHMM(FILE *fp, CHMM *phmm);
-void CopyCHMM(CHMM *phmm1, CHMM *phmm2);
+/// Free the memory allocated for an HMM
 void FreeCHMM(CHMM *phmm);
 
+/// Run the main HMM algorithm
 std::vector<int> testVit_CHMM (CHMM hmm, int T, double *O1, double *O2, double *pfb, int *snpdist, double *plogproba);
-void tumorVit_CHMM (CHMM hmm, int T, double *O1, double *O2, double *pfb, int *snpdist, double *plogproba, double stroma);
-std::vector<int> ViterbiLogNP_CHMM(CHMM *phmm, int T, double *O1, double *O2, double *pfb, int *snpdist, double **delta, int **psi, double *pprob);
-void ViterbiLogNPStroma_CHMM(CHMM *phmm, int T, double *O1, double *O2, double *pfb, int *snpdist, double **delta, int **psi, int *q, double *pprob, double stroma);
 
-void estHMMFromFile_CHMM (CHMM hmm, int T, FILE *fp, int *niter, double *logprobinit, double *logprobfinal);
-void BaumWelchNP_CHMM(CHMM *phmm, int T, double *O1, double *O2, double *pfb, int *snpdist, double **alpha, double **beta, double **gamma, int *pniter, double *plogprobinit, double *plogprobfinal);
-void ComputeXi_CHMM (CHMM* phmm, int T, double *O1, double *O2, double **biot, int *snpdist, double **alpha, double **beta, double ***xi);
-void ComputeGamma_CHMM (CHMM *phmm, int T, double **alpha, double **beta, double **gamma);
-void BackwardWithScale_CHMM (CHMM *phmm, int T, double *O1, double *O2, double **biot, int *snpdist, double **beta, double *scale, double *pprob);
-void ForwardWithScale_CHMM (CHMM *phmm, int T, double *O1, double *O2, double **biot, int *snpdist, double **alpha, double *scale, double *pprob);
-
-void GetStateProb_CHMM(CHMM *phmm, int T, double *O1, double *O2, double *pfb, int *snpdist, double *pprob, int state);
-void convertHMMTransition (CHMM *phmm, double **A1, int dist);
-void adjustBSD (CHMM *phmm, double sdo);
+/// Viterbi algorithm
+std::vector<int> ViterbiLogNP_CHMM(CHMM *phmm, int T, double *O1, double *O2, double *pfb, int *snpdist, double **delta, int **psi, double *pprob);
 
+/// O1 emission probability
 double b1iot (int state, double *mean, double *sd, double uf, double o);
-double b2iot (int state, double *mean, double *sd, double uf, double pfb, double b);
-double b5iot (int state, double *mean, double *sd, double uf, double o, double stroma);
-double b6iot (int state, double *mean, double *sd, double uf, double pfb, double b, double stroma);
-
-void testVitTrio_CHMM (CHMM *phmm, int T, double *Of1, double *Of2, double *Om1, double *Om2, double *Oo1, double *Oo2, double *pfb, int *snpdist, double *plogproba, double *trio_lrr_sd, int osex, int chrx_flag);
-double trioTransition (double cimcell, double chit, double **A1, double e1, double e2, int dist, int pdn, int pifat, int pimot, int pioff, int dn, int ifat, int imot, int ioff);
-void calculateCHIT (double a, double M[5][5][5][5][5][5]);
-void calculateCHIT_male_x (double a, double M[5][5][5][5][5][5]);
-void calculateCHIT_female_x (double a, double M[5][5][5][5][5][5]);
 
-double *** AllocXi(int T, int N);
-void FreeXi(double *** xi, int T, int N);
-
-void callCNVFromFile_SEQ (CHMM hmm, int T, FILE *fp, int *mlstate, double gamma_k, double gamma_theta);
-void ViterbiLogNP_SEQ (CHMM *phmm, int T, double k, double theta, double *O1, int *O2, double *pfb, int *length, double **delta, int **psi, int *q);
-double b4iot (int state, double *mean, double *sd, double uf, double pfb, int o1, int o2);
-double b3iot (int state, double *mean, double *sd, double uf, double o1, int length);
-void adjustHMMExpected (CHMM *phmm, double coverage);
+/// O2 emission probability
+double b2iot (int state, double *mean, double *sd, double uf, double pfb, double b);
 
 #endif // _HMM_H_
diff --git a/include/sv_caller.h b/include/sv_caller.h
new file mode 100644
index 00000000..e9aaebfd
--- /dev/null
+++ b/include/sv_caller.h
@@ -0,0 +1,38 @@
+// SVCaller: Detect SVs from long read alignments
+
+#ifndef SV_CALLER_H
+#define SV_CALLER_H
+
+#include "input_data.h"
+#include "cnv_data.h"
+#include "sv_data.h"
+#include "types.h"
+
+#include <htslib/sam.h>
+
+class SVCaller {
+    private:
+        //int max_indel_dist = 1000;  // Maximum distance between two indels to
+        //be considered as a single SV
+        int max_indel_dist = 10;  // Maximum distance between two indels to be considered as a single SV
+        //int min_sv_size = 50;       // Minimum SV size to be considered
+        //int min_sv_size = 30;       // Minimum SV size to be considered
+        int min_sv_size = 50;       // Minimum SV size to be considered
+        int min_mapq = 20;          // Minimum mapping quality to be considered
+        InputData* input_data;
+
+        // Detect SVs from long read alignments in the CIGAR string
+        void detectSVsFromCIGAR(bam_hdr_t* header, bam1_t* alignment, SVData& sv_calls);
+        //void detectSVsFromCIGAR(SVData& sv_calls, std::string chr, int32_t pos, uint32_t* cigar, int cigar_len, bool debug_mode);
+
+        // Detect SVs from split-read alignments (primary and supplementary)
+        SVData detectSVsFromSplitReads();
+
+    public:
+        SVCaller(InputData& input_data);
+
+        // Detect SVs and predict SV type from long read alignments and CNV calls
+        SVData run();
+};
+
+#endif // SV_CALLER_H
diff --git a/include/sv_data.h b/include/sv_data.h
new file mode 100644
index 00000000..73145fb5
--- /dev/null
+++ b/include/sv_data.h
@@ -0,0 +1,54 @@
+#ifndef SV_DATA_H
+#define SV_DATA_H
+
+#include "fasta_query.h"  // For querying the reference genome
+#include "types.h"
+
+/// @cond
+#include <string>
+#include <map>
+/// @endcond
+
+
+class SVData {
+    private:
+        // SV candidate to read depth map
+        SVDepthMap sv_calls;
+                
+        // Store a reference to the reference genome
+        FASTAQuery *ref_genome;
+
+        // SV type to string map (DEL, DUP, INV, INS, BND)
+        // DUPs [1] are INS with INFO/REPTYPE=DUP
+        std::map<int, std::string> sv_type_map = {
+            {0, "DEL"},
+            {1, "INS"},
+            {2, "INV"},
+            {3, "INS"},
+            {4, "BND"}
+        };
+        
+    public:
+        SVData(FASTAQuery& ref_genome);
+        void add(std::string chr, int64_t start, int64_t end, int sv_type, std::string alt_allele, std::string data_type);
+
+        std::string getRefGenome();
+        
+        // Query the reference genome for a given sequence
+        std::string getSequence(std::string chr, int64_t pos_start, int64_t pos_end);
+
+        // Update the SV type for a given SV candidate
+        void updateSVType(SVCandidate key, int sv_type);
+        
+        // Save SV calls to VCF
+        void saveToVCF(FASTAQuery& ref_genome, std::string output_dir);
+
+        // Begin and end iterators for the SV candidate map
+        SVDepthMap::iterator begin() { return this->sv_calls.begin(); }
+        SVDepthMap::iterator end() { return this->sv_calls.end(); }
+
+        // Define the size of the SV candidate map
+        int size() { return this->sv_calls.size(); }
+};
+
+#endif // SV_DATA_H
diff --git a/include/swig_interface.h b/include/swig_interface.h
new file mode 100644
index 00000000..c7f163ae
--- /dev/null
+++ b/include/swig_interface.h
@@ -0,0 +1,17 @@
+//
+// swig_interface.h:
+// Declare the C++ functions that will be wrapped by SWIG
+//
+
+#ifndef SWIG_INTERFACE_H
+#define SWIG_INTERFACE_H
+
+#include "input_data.h"
+
+/// @cond
+#include <string>
+/// @endcond
+
+int run(InputData input_data);
+
+#endif // SWIG_INTERFACE_H
diff --git a/include/types.h b/include/types.h
new file mode 100644
index 00000000..46896f09
--- /dev/null
+++ b/include/types.h
@@ -0,0 +1,63 @@
+// This file contains all the type definitions used in the project
+
+#ifndef TYPES_H
+#define TYPES_H
+
+/// @cond
+#include <string>
+#include <vector>
+#include <map>
+#include <set>
+/// @endcond
+
+// SNP data is a struct containing vectors used in predicting copy number states
+struct SNPData {
+    std::vector<int64_t> locations;
+    std::vector<double> pfbs;
+    std::vector<double> bafs;
+    std::vector<double> log2_ratios;
+    std::vector<int> state_sequence;
+
+    SNPData():
+        locations({}),\
+        pfbs({}), \
+        bafs({}), \
+        log2_ratios({}), \
+        state_sequence({}) {}
+};
+
+// CNV candidate location map
+// (chr, snp_pos) : cnv_type
+using SNPLocation = std::pair<std::string, int64_t>;
+using SNPToCNVMap = std::map<SNPLocation,  int>;
+
+// SV candidate read depth map. An SV is defined by its location, type, and
+// alternate allele.
+// (chr, start, end, sv_type, alt_allele) : (ref_allele, num_reads)
+using SVCandidate = std::tuple<std::string, int64_t, int64_t, std::string>;  // chr, start, end, alt_allele
+
+// Create a struct for storing SV information
+struct SVInfo {
+    int sv_type;
+    int read_depth;
+    std::set<std::string> data_type;
+    int sv_length;
+
+    SVInfo() :
+        sv_type(-1), read_depth(0), data_type({}), sv_length(0) {}
+        
+    SVInfo(int sv_type, int read_depth, std::string data_type, int sv_length) :
+        sv_type(sv_type), read_depth(read_depth), data_type({data_type}), sv_length(sv_length) {}
+};
+
+using SVDepthMap = std::map<SVCandidate, SVInfo>;  // Map for getting type and read depth of SV candidates
+
+// SV calling:
+// Alignment location (chr, start, end, depth)
+using AlignmentData   = std::tuple<std::string, int64_t, int64_t, int>;
+using AlignmentVector = std::vector<AlignmentData>;
+
+// Query map (query name, alignment vector)
+using QueryMap = std::map<std::string, AlignmentVector>;
+
+#endif // TYPES_H
diff --git a/include/utils.h b/include/utils.h
index e566781b..516018a6 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -1,10 +1,10 @@
-//
-// utils.h:
 // Utility functions
-//
 
-#ifndef CONTEXTSV_UTILS_H
-#define CONTEXTSV_UTILS_H
+#ifndef UTILS_H
+#define UTILS_H
 
 
-#endif //CONTEXTSV_UTILS_H
\ No newline at end of file
+// Print the progress of a task
+void printProgress(int progress, int total);
+
+#endif // UTILS_H
diff --git a/include/vcf_writer.h b/include/vcf_writer.h
new file mode 100644
index 00000000..800df144
--- /dev/null
+++ b/include/vcf_writer.h
@@ -0,0 +1,23 @@
+/// @cond
+#include <string>
+#include <vector>
+#include <fstream>
+/// @endcond
+
+class VcfWriter {
+public:
+    // Constructor
+    VcfWriter(const std::string& filename);
+    void writeHeader(const std::vector<std::string>& headerLines);
+    void writeRecord(const std::string& chrom, int pos, const std::string& id,
+                     const std::string& ref, const std::string& alt,
+                     const std::string& qual, const std::string& filter,
+                     const std::string& info, const std::string& format,
+                     const std::vector<std::string>& samples);
+
+    // Close the VCF file
+    void close();
+
+private:
+    std::ofstream file_stream;
+};
diff --git a/python/cnv_plots.py b/python/cnv_plots.py
new file mode 100644
index 00000000..df1b1ca7
--- /dev/null
+++ b/python/cnv_plots.py
@@ -0,0 +1,263 @@
+"""Plot the copy number variants and their log2_ratio, BAF values."""
+
+import os
+import sys
+import logging as log
+import plotly
+from plotly.subplots import make_subplots
+import pandas as pd
+
+from .utils import parse_region, get_info_field_column, get_info_field_value
+
+# Set up logging.
+log.basicConfig(
+    level=log.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    handlers=[
+        log.StreamHandler(sys.stdout)
+    ]
+)
+
+def run(vcf_file, cnv_data_file, output_path, region):
+    """
+    Saves a plot of the CNVs and their log2 ratio and B-allele frequency
+    values.
+    
+    Args:
+        vcf_path (str): The path to the VCF file.
+        cnv_data_path (str): The path to the CNV data file.
+        output_path (str): The path to the output directory.
+
+    Returns:
+        None
+    """
+    # Set the maximum number of CNVs to plot.
+    max_cnvs = 10
+
+    # Set the plot range for each CNV to 3 times its length on each side.
+    plot_range = 3
+
+    # Parse the region.
+    chromosome, start_position, end_position = parse_region(region)
+
+    if start_position is not None and end_position is not None:
+        log.info("Plotting CNVs in region %s:%d-%d.", chromosome, start_position, end_position)
+    else:
+        log.info("Plotting CNVs in region %s.", chromosome)
+
+    # Set up the CNV type string list for the 6 CNV states.
+    cnv_types = ["NAN", "DEL", "DEL", "NEUT", "NEUT", "DUP", "DUP"]
+
+    # Filter the VCF file to the region using pandas, and make the chromosome
+    # column a string.
+    vcf_data = pd.read_csv(vcf_file, sep="\t", comment="#", header=None, dtype={0: str})
+    if start_position is not None and end_position is not None:
+        vcf_data = vcf_data[(vcf_data[0] == chromosome) & (vcf_data[1] >= start_position) & (vcf_data[1] <= end_position)]
+    else:
+        vcf_data = vcf_data[(vcf_data[0] == chromosome)]
+
+    log.info("Loaded %d variants from %s", len(vcf_data), vcf_file)
+
+    # Filter the CNV data to the region using pandas, and make the chromosome
+    # column a string.
+    log.info("Loading CNV data from %s", cnv_data_file)
+    cnv_data = pd.read_csv(cnv_data_file, sep="\t", header=0, dtype={"chromosome": str})
+    if start_position is not None and end_position is not None:
+        cnv_data = cnv_data[(cnv_data["chromosome"] == chromosome) & (cnv_data["position"] >= start_position) & (cnv_data["position"] <= end_position)]
+    else:
+        cnv_data = cnv_data[(cnv_data["chromosome"] == chromosome)]
+
+    log.info("Loaded %d CNVs.", len(cnv_data))
+
+    # Create an output html file where we will append the CNV plots.
+    if start_position is not None and end_position is not None:
+        html_filename = f"cnv_plots_{chromosome}_{start_position}_{end_position}.html"
+    else:
+        html_filename = f"cnv_plots_{chromosome}.html"
+    
+    # Create the output directory if it doesn't exist.
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    # Create the output html file.
+    output_html_filepath = os.path.join(output_path, html_filename)
+    if os.path.exists(output_html_filepath):
+        os.remove(output_html_filepath)
+
+    with open(output_html_filepath, "w", encoding="utf-8") as output_html_file:
+
+        # Get the INFO field column by searching for the first column that
+        # contains SVTYPE=.
+        info_column = get_info_field_column(vcf_data)
+
+        # Loop through the VCF data and plot each CNV (DEL or DUP) along with log2
+        # ratio and BAF values for the SNPs in the CNV.
+        cnv_count = 0
+        for _, sv_data in vcf_data.iterrows():
+
+            # Get the INFO field
+            info_field = sv_data[info_column]
+
+            # Get the SVTYPE field value.
+            svtype = get_info_field_value(info_field, "SVTYPE")
+            if svtype == "INS" and get_info_field_value(info_field, "REPTYPE") == "DUP":
+                svtype = "DUP"
+
+            # Analyze the CNV if it is a DEL or DUP (=INS with INFO/REPTYE=DUP)
+            if svtype in ("DEL", "DUP"):
+
+                # Get the read depth (DP) value.
+                read_depth = int(get_info_field_value(info_field, "DP"))
+
+                # Get the start position.
+                start_position = int(sv_data[1])
+
+                # Get the SV length.
+                cnv_length = int(get_info_field_value(info_field, "SVLEN"))
+
+                # Get the end position using the start position and SV length.
+                end_position = start_position + cnv_length - 1
+
+                # Get the chromosome.
+                chromosome = sv_data[0]
+
+                # Get the plot range.
+                plot_start_position = start_position - (plot_range * cnv_length)
+                plot_end_position = end_position + (plot_range * cnv_length)
+
+                # Get the CNV state, log2 ratio, and BAF values for all SNPs in the
+                # plot range.
+                sv_data = cnv_data[(cnv_data["position"] >= plot_start_position) & (cnv_data["position"] <= plot_end_position)]
+                
+                # Get the marker colors for the state sequence.
+                marker_colors = []
+                for state in sv_data["cnv_state"]:
+                    if state in [1, 2]:
+                        marker_colors.append("red")
+                    elif state in [3, 4]:
+                        marker_colors.append("black")
+                    elif state in [5, 6]:
+                        marker_colors.append("blue")
+
+                # Get the hover text for the state sequence markers.
+                hover_text = []
+                for _, row in sv_data.iterrows():
+                    hover_text.append(f"TYPE: {cnv_types[row['cnv_state']]}<br>CHR: {row['chromosome']}<br>POS: {row['position']}<br>L2R: {row['log2_ratio']}<br>BAF: {row['b_allele_freq']}<br>PFB: {row['population_freq']}")
+
+                # Create the log2 ratio trace.
+                log2_ratio_trace = plotly.graph_objs.Scatter(
+                    x = sv_data["position"],
+                    y = sv_data["log2_ratio"],
+                    mode = "markers+lines",
+                    name = r'Log<sub>2</sub> Ratio',
+                    text = hover_text,
+                    hoverinfo = "text",
+                    marker = dict(
+                        color = marker_colors,
+                        size = 10,
+                    ),
+                    line = dict(
+                        color = "black",
+                        width = 0
+                    ),
+                    showlegend = False
+                )
+
+                # Create the B-allele frequency trace.
+                baf_trace = plotly.graph_objs.Scatter(
+                    x = sv_data["position"],
+                    y = sv_data["b_allele_freq"],
+                    mode = "markers+lines",
+                    name = "B-Allele Frequency",
+                    text = hover_text,
+                    marker = dict(
+                        color = marker_colors,
+                        size = 10,
+                    ),
+                    line = dict(
+                        color = "black",
+                        width = 0
+                    ),
+                    showlegend = False
+                )
+
+                # Create a subplot for the CNV plot and the BAF plot.
+                fig = make_subplots(
+                    rows=2,
+                    cols=1,
+                    shared_xaxes=True,
+                    vertical_spacing=0.05,
+                    subplot_titles=(r"SNP Log<sub>2</sub> Ratio", "SNP B-Allele Frequency")
+                )
+
+                # Add the traces to the figure.
+                fig.append_trace(log2_ratio_trace, 1, 1)
+                fig.append_trace(baf_trace, 2, 1)
+
+                # Set the x-axis title.
+                fig.update_xaxes(
+                    title_text = "Chromosome Position",
+                    row = 2,
+                    col = 1
+                )
+
+                # Set the y-axis titles.
+                fig.update_yaxes(
+                    title_text = r"Log<sub>2</sub> Ratio",
+                    row = 1,
+                    col = 1
+                )
+
+                fig.update_yaxes(
+                    title_text = "B-Allele Frequency",
+                    row = 2,
+                    col = 1
+                )
+
+                # Set the figure title.
+                fig.update_layout(
+                    title_text = f"{svtype} (DP={read_depth}, LEN={cnv_length}bp) at {chromosome}:{start_position}-{end_position}",
+                )
+
+                # Create a shaded rectangle for the CNV, layering it below the CNV
+                # trace and labeling it with the CNV type.
+                fig.add_vrect(
+                    x0 = start_position,
+                    x1 = end_position,
+                    fillcolor = "Black",
+                    layer = "below",
+                    line_width = 0,
+                    opacity = 0.1,
+                    annotation_text = svtype,
+                    annotation_position = "top left",
+                    annotation_font_size = 20,
+                    annotation_font_color = "black"
+                )
+
+                # Add vertical lines at the start and end positions of the CNV.
+                fig.add_vline(
+                    x = start_position,
+                    line_width = 2,
+                    line_color = "black",
+                    layer = "below"
+                )
+
+                fig.add_vline(
+                    x = end_position,
+                    line_width = 2,
+                    line_color = "black",
+                    layer = "below"
+                )
+
+                # Append the figure to the output html file.
+                output_html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
+                log.info("Plotted CNV %s %s:%d-%d.", svtype, chromosome, start_position, end_position)
+
+                # Increment the CNV count.
+                cnv_count += 1
+
+                # Break if the maximum number of CNVs has been reached.
+                if cnv_count == max_cnvs:
+                    break
+    
+    log.info("Saved CNV plots to %s.", output_html_filepath)
diff --git a/python/scoring_model.py b/python/scoring_model.py
new file mode 100644
index 00000000..d38f22fa
--- /dev/null
+++ b/python/scoring_model.py
@@ -0,0 +1,6 @@
+"""
+scoring_model.py: Score the structural variants using the binary classification model.
+"""
+
+import os
+import sys
diff --git a/python/train_model.py b/python/train_model.py
new file mode 100644
index 00000000..e5408313
--- /dev/null
+++ b/python/train_model.py
@@ -0,0 +1,69 @@
+"""
+train_model.py: Train the binary classification model.
+"""
+
+import os
+import sys
+import joblib
+from sklearn.linear_model import LogisticRegression
+import pandas as pd
+
+
+def train(true_positives_filepath, false_positives_filepath):
+    """Train the binary classification model."""
+    # Read in the true positive and false positive data.
+    tp_data = pd.read_csv(true_positives_filepath, sep="\t")
+    fp_data = pd.read_csv(false_positives_filepath, sep="\t")
+
+    # Get the training data.
+    tp_data["label"] = 1
+    fp_data["label"] = 0
+    training_data = pd.concat([tp_data, fp_data])
+
+    # Get the features and labels.
+    features = training_data[["lrr", "baf"]]
+    labels = training_data["label"]
+
+    # Train the model.
+    model = LogisticRegression()
+    model.fit(features, labels)
+
+    # Return the model.
+    return model
+
+# Run the program.
+def run(true_positives_filepath, false_positives_filepath, output_directory):
+    """Run the program."""
+    # Train the model.
+    model = train(true_positives_filepath, false_positives_filepath)
+
+    # Save the model
+    model_path = os.path.join(output_directory, "model.pkl")
+    joblib.dump(model, model_path)
+
+    # Print the model.
+    print(model)
+
+    # Return the model.
+    return model
+
+def score(model, cnv_data):
+    """Score the structural variants."""
+    # Get the features.
+    features = cnv_data[["lrr", "baf"]]
+
+    # Score the structural variants.
+    scores = model.predict_proba(features)
+
+    # Return the scores.
+    return scores
+
+
+if __name__ == '__main__':
+    # Get the command line arguments.
+    tp_filepath = sys.argv[1]
+    fp_filepath = sys.argv[2]
+    output_dir = sys.argv[3]
+
+    # Run the program.
+    run(tp_filepath, fp_filepath, output_dir)
diff --git a/python/utils.py b/python/utils.py
new file mode 100644
index 00000000..9176dc9f
--- /dev/null
+++ b/python/utils.py
@@ -0,0 +1,44 @@
+"""Utility functions for genome data analysis."""
+
+def parse_region(region):
+    """Parse a region string into its chromosome and start and end positions."""
+    region_parts = region.split(":")
+    chromosome = str(region_parts[0])
+
+    try:
+        start_position = int(region_parts[1].split("-")[0])
+        end_position = int(region_parts[1].split("-")[1])
+    except IndexError:
+        start_position, end_position = None, None
+
+    return chromosome, start_position, end_position
+
+def get_info_field_column(vcf_data):
+    """Return the column index of the INFO field in a VCF file."""
+    index = vcf_data.apply(lambda col: col.astype(str).str.contains("SVTYPE=").any(), axis=0).idxmax()
+    return index
+
+def get_info_field_value(info_field, field_name):
+    """
+    Get the value of a field in the INFO field of a VCF file.
+
+    Args:
+        info_field (str): The INFO field.
+        field_name (str): The name of the field to get the value of.
+
+    Returns:
+        str: The value of the field.
+    """
+
+    # Split the INFO field into its parts.
+    info_field_parts = info_field.split(";")
+
+    # Get the field value.
+    field_value = ""
+    for info_field_part in info_field_parts:
+        if info_field_part.startswith("{}=".format(field_name)):
+            field_value = info_field_part.split("=")[1]
+            break
+
+    # Return the field value.
+    return field_value
diff --git a/samtools/htslib b/samtools/htslib
deleted file mode 160000
index 4e61c128..00000000
--- a/samtools/htslib
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 4e61c128238f3e7cbb3b1f4e9c0fdb4880aa9a10
diff --git a/scripts/find_cigar_deletions.py b/scripts/find_cigar_deletions.py
new file mode 100644
index 00000000..f361570c
--- /dev/null
+++ b/scripts/find_cigar_deletions.py
@@ -0,0 +1,58 @@
+import re
+
+
+def find_deletions(cigar_string, ref_pos):
+    """
+    Given a cigar string and a reference position, returns a list of all deletions in reference coordinates.
+    """
+    query_pos = 0
+    deletions = []
+    
+    # Parse CIGAR string
+    for length, operation in parse_cigar(cigar_string):
+
+        if operation == 'D':  # Deletion
+            # deletions.append((ref_pos, ref_pos + length, length))
+
+            # Print out the current operation and length
+            print(operation, length, ref_pos, ref_pos + length)
+
+        if operation in ['M', 'D', 'N', '=', 'X']:
+            ref_pos += length
+        if operation not in ['I', 'S', 'H', 'P']:  # Not Insertion, Soft Clip, Hard Clip, Padding
+            query_pos += length
+    
+    return deletions
+
+# Function to parse CIGAR string into (length, operation) tuples
+def parse_cigar(cigar):
+    import re
+    cigar_tuples = re.findall(r'(\d+)([MIDNSHP=X])', cigar)
+    return [(int(length), operation) for length, operation in cigar_tuples]
+
+# Take user input (cigar string and reference position)
+# cigar_string = input("Enter cigar string: ")
+# ref_pos = int(input("Enter reference position: "))
+# # Call function
+# deletions = find_deletions(cigar_string, ref_pos)
+
+# cigar_string = "9S13M1D27M1I389M1I171M1I178M1D3M1D79M1D25M7D248M1D310M1D10M2D7M1D147M12I25M1D24M1D148M1I51M1D8M2D127M3D47M2I4M1D22M1D119M1D390M4D177M1I9M1D34M1D74M1D84M1I1M1I62M2I160M1D56M1D20M3D403M2D12M1I77M1D12M2D72M1I135M1I18M4D14M4I36M1I36M1D177M1D61M4I347M2D36M1D42M1D82M1D4M1I41M1I198M1I160M1D83M1D32M1D78M1D340M1D106M5D180M2D120M1D23M2D47M1D232M2D1M1D198M1D107M1D98M1D282M3I8M2D216M1D41M2D5M1D8M1I205M1I18M2I244M1D74M1D5M1D310M3D45M2I79M1D204M1I29M1I33M1I164M1I97M1D295M2I42M3I69M1D473M2D22M1I59M4I36M1D20M2D...1I2M4I1M1I8M1D25M2I37M1I23M3I8M1D56M1I66M3D4M1D12M1D55M1D22M1D16M2I25M1I27M2I14M1D5M2D13M1I7M1I64M1I16M1D13M1D52M2I45M1D38M1I2M1I53M1D1M2D5M2D82M1D28M3I26M1I57M2I83M1D85M1D161M1I11M2I20M4I14M1D16M5I70M2D51M1D13M3D53M1D19M1I150M1D101M2I22M1D2M1D3M2D176M2I118M3D75M1D7M1I34M2I9M3I68M2I57M4I102M1I38M1I1M4D8M2D35M1D100M1D4M1D76M1D7M1D143M1I69M1D133M2D68M1D149M1D90M1D81M3D1M1D5M1D1M2D12M2I5M1I45M1D75M1D78M1D5M2D68M2I22M1I7M2D299M1I5M2I142M2D17M1D113M62D12M1D140M1D4M3D78M1D17M1D443M1D5M2D136M1I19M28S"
+# ref_pos = 41191391  # 0-based leftmost coordinate of the reference sequence
+# deletions = find_deletions(cigar_string, ref_pos)
+
+# cigar_string= "6S150M1I11M2D127M1D25M2D11M1D29M1D30M2I100M2I7M2I54M1I15M1D12M5D30M1D8M2I57M1D29M1D61M1I88M1D42M1D8M1I52M1D7M5I148M2I31M1D33M3D15M1D324M2I42M3I4M1D26M5I30M1D18M1I55M1D88M1D41M3D1M2D12M1D37M1D7M1D31M1I26M1D31M4I111M3D5M2D4M4D37M1D3M1D16M2I47M1D2M3D160M2D7M1I70M1D142M3D30M4I76M1D25M1I25M1I15M4D115M1D56M1D26M1D21M1I69M2I6M1I29M1D63M2I1M3D182M3D30M1D14M1D16M2D3M2D30M1I22M3D2M2D46M2D81M1D6M2I114M2D10M1D66M3D2M2D142M2I112M1D143M1D39M2D3M1D78M1D21M1D54M1D29M2D84M1I13M4I51M4D76M66D148M1D39M1D6M1D4M1D53M1I40M3I32M1D204M1D45M2I15M1I190M1D71M2D12M2D53M1D55M1I54M31S"
+# ref_pos=41186795
+# deletions = find_deletions(cigar_string, ref_pos)
+
+cigar_string="4S68M1D157M2D36M1I174M1D7M1D64M1D35M4I98M2D212M1D54M2I102M1I139M1D331M1D81M1I12M1I211M2D102M1D126M1I270M1D148M1D164M1I16M1D58M1D212M1D16M1I75M1I62M1I39M2D43M1D144M3I45M1I73M2I79M2I428M1D8M1I28M1D73M1I407M1D81M1D242M2I68M2D175M2I6M3I15M4D85M1D39M1D152M1I54M1D55M3D223M3I4M3D564M1I3M1I4M1D20M1D77M1D348M1D104M1D20M3D48M1D120M1I22M1D3M1I61M2D69M1I1930M1I135M3D41M2I4M5D91M1D179M1I122M3D339M2I66M1D123M1D99M1I42M1D182M1I23M1D139M1D107M1D219M1I65M1I22M1D107M1D36M1I21M1D152M1I26M1D33M1I107M1D5M1D13M1D56M...1I37M1I43M1D2M1D124M1I37M1D251M1D100M2I22M1D2M2D4M1D20M4D4M2I320M1I97M1D21M2I10M3I6M1I142M1I39M2I145M1D76M2I175M1D273M1D74M1D21M2D8M1D32M1D94M1I47M2I397M1D318M2D264M2D49M2I32M2D254M2I95M1I50M1D100M2D11M3I32M1I121M2D38M1I6M1D52M1I45M1D277M2I200M1D98M1I73M1I33M1I52M1I72M2I233M1D22M3D64M2I139M3D260M1D497M1D1M2D91M22I136M2D27M1D292M1I409M1I1M1I132M1I3M1D112M2I18M1D117M1D8M2D30M1I63M1D8M1D16M4I8M2I12M1D42M1D19M1D50M1I189M1D160M1I178M1I125M3I2M2D63M3D240M1I123M1I14M1I304M1I151M1D3M1D14M1D238M1D6M27S"
+ref_pos=41149857
+deletions = find_deletions(cigar_string, ref_pos)
+
+# Print output
+# print("Deletions:")
+# for deletion in deletions:
+#     print(deletion)
+
+# print("Mismatches:")
+# for mismatch in mismatches:
+#     print(mismatch)
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 7b354654..13477948 100644
--- a/setup.py
+++ b/setup.py
@@ -3,44 +3,40 @@
     This file is used to install the package.
 """
 
-print("Running setup.py...")
-
 import os
 import glob
 from setuptools import setup, find_packages, Extension
 
+print("Running setup.py...")
+
 
 # Set the project metadata
-name = "contextsv"
-version = "0.0.1"
-author = "WGLab"
-description = "ContextSV: A tool for integrative structural variant detection."
+NAME = "contextsv"
+VERSION = "0.0.1"
+AUTHOR = "WGLab"
+DESCRIPTION = "ContextSV: A tool for integrative structural variant detection."
 
 # Get the conda environment's include path
 conda_prefix = os.environ.get("CONDA_PREFIX")
 if conda_prefix is None:
-    raise Exception("CONDA_PREFIX is not set.")
-conda_include_dir = os.path.join(conda_prefix, "include")
+    raise AssertionError("CONDA_PREFIX is not set.")
 
-print("CONDA_PREFIX: {}".format(conda_prefix))  # DEBUG
-print("include_dir: {}".format(conda_include_dir))  # DEBUG
+conda_include_dir = os.path.join(conda_prefix, "include")
 
 # Get the conda environment's lib path
 conda_lib_dir = os.path.join(conda_prefix, "lib")
 
-print("lib_dir: {}".format(conda_lib_dir))  # DEBUG
-
 # Set the project dependencies
-src_dir = "src"
-src_files = glob.glob(os.path.join(src_dir, "*.cpp"))
-include_dir = "include"
-include_files = glob.glob(os.path.join(include_dir, "*.h"))
+SRC_DIR = "src"
+SRC_FILES = glob.glob(os.path.join(SRC_DIR, "*.cpp"))
+INCLUDE_DIR = "include"
+INCLUDE_FILES = glob.glob(os.path.join(INCLUDE_DIR, "*.h"))
 
 # Set up the extension
 ext = Extension(
-    name="_" + name,
-    sources=src_files,
-    include_dirs=[include_dir, conda_include_dir],
+    name="_" + NAME,
+    sources=SRC_FILES,
+    include_dirs=[INCLUDE_DIR, conda_include_dir],
     extra_compile_args=["-std=c++11"],
     language="c++",
     libraries=["hts"],
@@ -49,12 +45,12 @@
 
 # Set up the module
 setup(
-    name=name,
-    version=version,
-    author=author,
-    description=description,
+    name=NAME,
+    version=VERSION,
+    author=AUTHOR,
+    description=DESCRIPTION,
     ext_modules=[ext],
-    py_modules=[name],
+    py_modules=[NAME],
     packages=find_packages(),
     test_suite="tests",
     entry_points={
diff --git a/src/cli.cpp b/src/cli.cpp
deleted file mode 100644
index 86a21399..00000000
--- a/src/cli.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-//
-// Created by jperdomo on 1/8/2023.
-//
-
-#include "cli.h"
-#include "integrative_caller.h"
-
-#include <algorithm>
-#include <string>
-#include <iostream>
-#include <iomanip>
-#include <stdexcept>
-#include <htslib/sam.h>
-
-
-// Run the CLI with the given parameters
-int run(std::string bam, std::string snps, std::string outdir, std::string region)
-{
-	// Create the common parameters
-	Common common;
-	common.set_bam_filepath(bam);
-	common.set_snp_vcf_filepath(snps);
-	common.set_output_dir(outdir);
-	common.set_region(region);
-
-	// Run the integrative caller
-	IntegrativeCaller caller_obj(common);
-	try
-	{	
-		caller_obj.run();
-	}
-
-	catch (std::exception& e)
-	{
-		std::cerr << e.what() << std::endl;
-		return -1;
-	}
-
-	return 0;
-}
diff --git a/src/cnv_caller.cpp b/src/cnv_caller.cpp
index 7c02a2b9..f6c69967 100644
--- a/src/cnv_caller.cpp
+++ b/src/cnv_caller.cpp
@@ -1,62 +1,76 @@
 
-// #include "khmm.h"
 #include "cnv_caller.h"
+#include "utils.h"
 
+#include <htslib/sam.h>
+
+/// @cond
 #include <iostream>
 #include <fstream>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>       /* log2 */
-#include <htslib/sam.h>
 #include <algorithm>
 #include <limits>
 #include <tuple>
 #include <iomanip>  // Progress bar
-#include "common.h"
 
 #define BUFFER_SIZE 1024
+// #define MIN_PFB 0.00001  // Minimum PFB value
+// #define MAX_PFB 0.99999  // Maximum PFB value
+#define MIN_PFB 0.01
+#define MAX_PFB 0.99
+/// @endcond
 
-CNVCaller::CNVCaller(Common common)
+
+CNVCaller::CNVCaller(InputData& input_data)
 {
-    this->common = common;
+    this->input_data = &input_data;
 }
 
-std::vector<double> CNVCaller::run()
+CNVData CNVCaller::run()
 {
     // Read SNP positions and BAF values from the VCF file
     std::cout << "Reading SNP positions and BAF values from the VCF file..." << std::endl;
-    std::pair<std::vector<int>, std::vector<double>> bafs_by_pos = readSNPBAFs();
-    std::cout << "Complete." << std::endl;
+    std::string snp_filepath = this->input_data->getSNPFilepath();
+    SNPData snp_data = readSNPBAFs(snp_filepath);
+    int snp_count = (int) snp_data.locations.size();
+    std::cout << "Complete. Number of SNPs: " << snp_count << std::endl;
 
-    // Extract the SNP positions and BAF values
-    std::vector<int> snp_positions = bafs_by_pos.first;
-    std::vector<double> baf = bafs_by_pos.second;
+    // Get the population frequencies for each SNP
+    if (this->input_data->getPFBFilepath() == "")
+    {
+        std::cout << "No PFB file provided. Using the minimum PFB value of " << MIN_PFB << " for all SNPs." << std::endl;
+        snp_data.pfbs = std::vector<double>(snp_count, MIN_PFB);
+    } else {
+        std::cout << "Using PFB file: " << this->input_data->getPFBFilepath() << std::endl;
+        std::cout << "Getting population frequencies for each SNP..." << std::endl;
+        snp_data.pfbs = getSNPPopulationFrequencies(snp_data.locations);
+        std::cout << "Population frequencies retrieved." << std::endl;
+    }
 
     // Calculate LRRs
-    std::cout << "Calculating LRRs at SNP positions..." << std::endl;
-    std::vector<double> lrr = calculateLogRRatiosAtSNPS(snp_positions);
-    std::cout << "LRRs calculated." << std::endl;
+    std::cout << "Calculating log2 ratio of coverage at SNP positions..." << std::endl;
+    snp_data.log2_ratios = calculateLog2RatioAtSNPS(snp_data.locations);
+    std::cout << "Log2 ratios calculated." << std::endl;
 
     // Read the HMM from file
     //std::string hmm_filepath = "data/wgs.hmm";
-    std::string hmm_filepath = "data/hh550.hmm";
+    std::string hmm_filepath = this->input_data->getHMMFilepath();
     std::cout << "Reading HMM from file: " << hmm_filepath << std::endl;
     CHMM hmm = ReadCHMM(hmm_filepath.c_str());
     std::cout << "HMM read from file." << std::endl;
 
-    // Set up the input variables
-    int num_probes = lrr.size();
-    double *lrr_ptr = lrr.data();
-    double *baf_ptr = baf.data();
-
-    // Create a double array for pop. frequency and snp distance (not used), and log probabilities
+    // Set up the data for the Viterbi algorithm
+    double *lrr_ptr = snp_data.log2_ratios.data();
+    double *baf_ptr = snp_data.bafs.data();
+    double *pfb_ptr = snp_data.pfbs.data();
     int *snpdist = NULL;
-    double *pfb = NULL;
     double *logprob = NULL;
 
     // Run the Viterbi algorithm
-    // Each of the 6 states corresponds to a CNV call:
+    // Each of the 6 state predictions corresponds to a copy number state:
     // 1: 0/0 (Two copy loss)
     // 2: 1/0 (One copy loss)
     // 3: 1/1 (Normal)
@@ -65,47 +79,75 @@ std::vector<double> CNVCaller::run()
     // 6: 2/2 (Two copy gain)
     std::cout << "Running the Viterbi algorithm..." << std::endl;
     std::vector<int> state_sequence;  // Create the output state sequence
-    state_sequence = testVit_CHMM(hmm, num_probes, lrr_ptr, baf_ptr, pfb, snpdist, logprob);
-    std::cout << "Viterbi algorithm complete." << std::endl;
+    state_sequence = testVit_CHMM(hmm, snp_count, lrr_ptr, baf_ptr, pfb_ptr, snpdist, logprob);
+    snp_data.state_sequence = state_sequence;
+    std::cout << "Complete." << std::endl;
+
+    // Save a TSV of the positions, LRRs, BAFs, and state sequence
+    std::cout << "Saving TSV of copy number prediction data..." << std::endl;
+    std::string output_tsv = this->input_data->getOutputDir() + "/cnv_data.tsv";
+    saveToTSV(snp_data, output_tsv);
+    std::cout << "Saved to: " << output_tsv << std::endl;
+
+    // Save a BED file of the CNV state sequence
+    std::cout << "Saving BED of CNV states..." << std::endl;
+    std::string output_bed = this->input_data->getOutputDir() + "/cnv_states.bed";
+    saveToBED(snp_data, output_bed);
+    std::cout << "Saved to: " << output_bed << std::endl;
+
+    // Get the chromosome as a C string
+    std::string chr = this->input_data->getRegionChr();
+    char *chr_cstr = new char[chr.length() + 1];
+    strcpy(chr_cstr, chr.c_str());
+
+    // Create a map of the state sequence by position
+    CNVData cnv_data;
+    for (int i = 0; i < snp_count; i++)
+    {
+        cnv_data.addCNVCall(chr_cstr, snp_data.locations[i], snp_data.state_sequence[i]);
+    }
 
-    // Save a CSV of the positions, LRRs, BAFs, and state sequence
-    std::cout << "Saving CSV of positions, LRRs, and BAFs..." << std::endl;
-    std::string output_filepath = this->common.get_output_dir() + "/snp_lrr_baf.csv";
-    saveSNPLRRBAFCSV(output_filepath, snp_positions, baf, lrr, state_sequence);
-    std::cout << "CSV saved to: " << output_filepath << std::endl;
+    // Free the C string
+    delete[] chr_cstr;
 
-    return std::vector<double>();
+    return cnv_data;
 }
 
-std::vector<double> CNVCaller::calculateLogRRatiosAtSNPS(std::vector<int> snp_positions)
+std::vector<double> CNVCaller::calculateLog2RatioAtSNPS(std::vector<int64_t> snp_locations)
 {
-    std::string target_chr = this->common.get_region_chr();
-    
-    // Calculate mean chromosome coverage
-    std::string input_filepath = this->common.get_bam_filepath();
-    std::cout <<  "\nCalculating coverage for chromosome: " << target_chr.c_str() << std::endl;
-    //double mean_chr_cov = calculateMeanChromosomeCoverage();  // Commented out for testing
-    // double mean_chr_cov = 39.4096;  // Chr6 mean coverage from test data
-    double mean_chr_cov = 39.561;  // Chr3 mean coverage from test data
+    std::string input_filepath = this->input_data->getBAMFilepath();
+    std::string chr = this->input_data->getRegionChr();
 
-    std::cout << "Mean coverage: " << mean_chr_cov << std::endl;
+    // Check if the chromosome coverage was passed in
+    double mean_chr_cov = -1;
+    if (this->input_data->getChrCov(chr, mean_chr_cov) == -1)
+    {
+        // Calculate the mean chromosome coverage
+        std::cout <<  "\nCalculating coverage for chromosome: " << chr << std::endl;
+        mean_chr_cov = calculateMeanChromosomeCoverage();
+        std::cout << "Mean coverage for chromosome " << chr << ": " << mean_chr_cov << std::endl;
+    } else {
+        std::cout << "Using user-provided mean coverage for chromosome " << chr << ": " << mean_chr_cov << std::endl;
+    }
 
     // Set the region start and end from the first and last SNPs
-    int region_start = snp_positions.front();
-    int region_end = snp_positions.back();
-    if (this->common.get_region_set()) {
-        region_start = std::max(region_start, this->common.get_region_start());
-        region_end = std::min(region_end, this->common.get_region_end());
+    std::cout << "Getting region start and end..." << std::endl;
+    int region_start = snp_locations.front();
+    int region_end = snp_locations.back();
+    std::cout << "Fixing region start and end..." << std::endl;
+    if (this->input_data->getRegionSet()) {
+        region_start = std::max(region_start, this->input_data->getRegionStart());
+        region_end = std::min(region_end, this->input_data->getRegionEnd());
     }
 
-    std::cout << "Beginning analysis of region: " << target_chr << ":" << region_start << "-" << region_end << std::endl;
+    std::cout << "Predicting CNV states for SNPs in region: " << chr << ":" << region_start << "-" << region_end << std::endl;
 
     // Loop through each SNP and calculate the LRR
     std::vector<double> snp_lrr;
-    int window_size = this->common.get_window_size();
-    int snp_count = (int) snp_positions.size();
+    int window_size = this->input_data->getWindowSize();
+    int snp_count = (int) snp_locations.size();
     for (int i = 0; i < snp_count; i++) {
-        int pos = snp_positions[i];
+        int pos = snp_locations[i];
 
         // Skip SNPs outside of the region
         if (pos < region_start || pos > region_end) {
@@ -120,9 +162,13 @@ std::vector<double> CNVCaller::calculateLogRRatiosAtSNPS(std::vector<int> snp_po
         // Set the LRR value
         snp_lrr.push_back(lrr);
 
-        // Update the progress bar
-        this->common.print_progress(i, snp_positions.size()-1);
+        // Update the progress bar every 1000 SNPs
+        if (i % 1000 == 0) {
+            printProgress(i, snp_count-1);
+        }
+        //printProgress(i, snp_count-1);
     }
+    std::cout << std::endl;  // End the progress bar
 
     return snp_lrr;
 }
@@ -130,36 +176,27 @@ std::vector<double> CNVCaller::calculateLogRRatiosAtSNPS(std::vector<int> snp_po
 /// Calculate the mean chromosome coverage
 double CNVCaller::calculateMeanChromosomeCoverage()
 {
-    std::string chr = this->common.get_region_chr();
-    std::string input_filepath = this->common.get_bam_filepath();
+    std::string chr = this->input_data->getRegionChr();
+    std::string input_filepath = this->input_data->getBAMFilepath();
 
     char cmd[BUFFER_SIZE];
     FILE *fp;
     char line[BUFFER_SIZE];
-    RegionCoverage cov;
 
     // Open a SAMtools process to calculate cumulative read depth and position
     // counts (non-zero depths only) for a single chromosome
 
     // Run the entire chromosome
-    snprintf(cmd, BUFFER_SIZE,\
-    "samtools depth -r %s %s | awk '{c++;s+=$3}END{print c, s}'",\
-    chr.c_str(), input_filepath.c_str());  // Remove '-a' for debugging
+        snprintf(cmd, BUFFER_SIZE,\
+        "samtools depth -r %s %s | awk '{c++;s+=$3}END{print c, s}'",\
+        chr.c_str(), input_filepath.c_str());  // Remove '-a' for debugging
+
+    std::cout << "Running command: " << cmd << std::endl;
 
     // Parse the output
     fp = popen(cmd, "r");
     if (fp == NULL) {
-        fprintf(stderr, "Failed to run command\n");
-        exit(1);
-    }
-    
-    fprintf(stdout, "%s\n", cmd);  // Print the command
-    fflush(stdout);
-    
-    fp = popen(cmd, "r");
-    if (fp == NULL)
-    {
-        fprintf(stderr, "Failed to run command\n");
+        std::cerr << "ERROR: Could not open pipe for command: " << cmd << std::endl;
         exit(EXIT_FAILURE);
     }
 
@@ -172,6 +209,9 @@ double CNVCaller::calculateMeanChromosomeCoverage()
         {
             // Calculate the mean chromosome coverage
             mean_chr_cov = (double) cum_depth / (double) pos_count;
+        } else {
+            std::cerr << "ERROR: Could not parse output from command: " << cmd << std::endl;
+            exit(EXIT_FAILURE);
         }
     }
     pclose(fp);  // Close the process
@@ -181,8 +221,8 @@ double CNVCaller::calculateMeanChromosomeCoverage()
 
 double CNVCaller::calculateWindowLogRRatio(double mean_chr_cov, int start_pos, int end_pos)
 {
-    std::string chr = this->common.get_region_chr();
-    std::string input_filepath = this->common.get_bam_filepath();
+    std::string chr = this->input_data->getRegionChr();
+    std::string input_filepath = this->input_data->getBAMFilepath();
 
     char cmd[BUFFER_SIZE];
     FILE *fp;
@@ -192,13 +232,11 @@ double CNVCaller::calculateWindowLogRRatio(double mean_chr_cov, int start_pos, i
     // counts (non-zero depths only) for a single region
 
     // Run the entire chromosome
-    snprintf(cmd, BUFFER_SIZE,\
-    "samtools depth -r %s:%d-%d %s | awk '{c++;s+=$3}END{print c, s}'",\
+        snprintf(cmd, BUFFER_SIZE,\
+        "samtools depth -r %s:%d-%d %s | awk '{c++;s+=$3}END{print c, s}'",\
         chr.c_str(), start_pos, end_pos, input_filepath.c_str());
-    //fprintf(stdout, "%s\n", cmd);  // Print the command
-    //fflush(stdout);
-    
-    fp = popen(cmd, "r");
+
+        fp = popen(cmd, "r");
     if (fp == NULL)
     {
         fprintf(stderr, "Failed to run command\n");
@@ -222,18 +260,15 @@ double CNVCaller::calculateWindowLogRRatio(double mean_chr_cov, int start_pos, i
     return region_lrr;
 }
 
-std::pair<std::vector<int>, std::vector<double>> CNVCaller::readSNPBAFs()
+SNPData CNVCaller::readSNPBAFs(std::string snp_filepath)
 {
-    // Get the VCF filepath with SNPs
-    std::string snp_filepath = this->common.get_snp_vcf_filepath();
-
     // Create a VCF filepath of filtered SNPs
-    std::string filtered_snp_vcf_filepath = this->common.get_output_dir() + "/filtered_snps.vcf";
+    std::string filtered_snp_vcf_filepath = this->input_data->getOutputDir() + "/filtered_snps.vcf";
 
     std::cout << "Parsing SNPs from " << snp_filepath << std::endl;
 
     // Filter variants by depth and quality and SNPs only
-    std::string region = this->common.get_region();
+    std::string region = this->input_data->getRegion();
     std::string cmd;
     if (region == "")
     {
@@ -246,10 +281,9 @@ std::pair<std::vector<int>, std::vector<double>> CNVCaller::readSNPBAFs()
 
     std::cout << "Filtered SNPs written to " << filtered_snp_vcf_filepath << std::endl;
 
-    // Extract all BAFs from the filtered SNPs and store in the pair vector
-    std::cout << "Extracting BAFs from filtered SNPs" << std::endl;
-    cmd = "bcftools query -f '%CHROM,%POS,[%VAF]\n' " + filtered_snp_vcf_filepath;
-    std::cout << "Command: " << cmd << std::endl;
+    // Extract B-allele frequency data from the VCF file
+    std::cout << "Extracting allelic depth data from filtered SNPs..." << std::endl;
+    cmd = "bcftools query -f '%CHROM,%POS,[%AD]\n' " + filtered_snp_vcf_filepath;
     FILE *fp = popen(cmd.c_str(), "r");
     if (fp == NULL)
     {
@@ -257,12 +291,10 @@ std::pair<std::vector<int>, std::vector<double>> CNVCaller::readSNPBAFs()
         exit(1);
     }
 
-    // Read the BAFs
+    // Read the DP and AD values (AD for alternate allele depth)
     char line[BUFFER_SIZE];
-
-    // Loop through the lines
-    std::vector<int> snp_positions;
-    std::vector<double> snp_bafs;
+    std::vector<int64_t> locations;
+    std::vector<double> bafs;
     while (fgets(line, BUFFER_SIZE, fp) != NULL)
     {
         // Parse the line
@@ -270,7 +302,9 @@ std::pair<std::vector<int>, std::vector<double>> CNVCaller::readSNPBAFs()
         int col = 0;  // Column index
         std::string chr = "";
         uint64_t pos = 0;
-        double baf = 0.0;
+        int ref_ad = 0;
+        int alt_ad = 0;
+        std::string alt_allele = "";
         while (tok != NULL)
         {
             // Get the chromosome from column 1
@@ -285,45 +319,291 @@ std::pair<std::vector<int>, std::vector<double>> CNVCaller::readSNPBAFs()
                 pos = atoi(tok);
             }
 
-            // Get the BAF from column 3
+            // Get the AD for the reference allele from column 3
             else if (col == 2)
             {
-                baf = atof(tok);
+                ref_ad = atoi(tok);
             }
 
+            // Get the AD for the non-reference allele from column 4
+            else if (col == 3)
+            {
+                alt_ad = atoi(tok);
+            }
+
+            // Move to the next token
             tok = strtok(NULL, ",");
             col++;
         }
 
-        // Store the position and BAF
-        snp_positions.push_back(pos);
-        snp_bafs.push_back(baf);
+        // Calculate the BAF
+        double baf = (double) alt_ad / (double) (ref_ad + alt_ad);
+
+        // Store the position, alternate allele, and BAF
+        locations.push_back(pos);
+        bafs.push_back(baf);
     }
 
     // Close the pipe
     pclose(fp);
 
-    // Create the pair vector
-    std::pair<std::vector<int>, std::vector<double>> snp_data = std::make_pair(snp_positions, snp_bafs);
+    // Exit if no SNPs were found
+    if (locations.size() == 0)
+    {
+        std::cerr << "ERROR: No SNPs found in the VCF file: " << snp_filepath << std::endl;
+        exit(1);
+    }
+
+    // Create the SNP data struct
+    SNPData snp_data;
+    snp_data.locations = locations;
+    snp_data.bafs      = bafs;
+    
+    //SNPData snp_data = std::make_tuple(locations, alt_alleles, bafs);
+    // std::pair<std::vector<int>, std::vector<double>> snp_data = std::make_pair(snp_locations, snp_bafs);
 
     return snp_data;
 }
 
-void CNVCaller::saveSNPLRRBAFCSV(std::string filepath, std::vector<int> snp_positions, std::vector<double> bafs, std::vector<double> logr_ratios, std::vector<int> state_sequence)
+std::vector<double> CNVCaller::getSNPPopulationFrequencies(std::vector<int64_t> snp_locations)
 {
-    // Open the CSV file for writing
-    std::ofstream csv_file(filepath);
+    // Get the PFB filepath
+    std::string pfb_filepath = this->input_data->getPFBFilepath();
+
+    // Open the PFB file
+    FILE *fp = fopen(pfb_filepath.c_str(), "r");
+    if (fp == NULL)
+    {
+        std::cerr << "ERROR: Could not open PFB file: " << pfb_filepath << std::endl;
+        exit(1);
+    }
+
+    // Read the AF values (AF for population frequency) into a map
+    std::map<std::string, std::map<int, double>> pfb_map;  // Map of population frequencies by SNP position (chr -> pos -> pfb)
+    char line[BUFFER_SIZE];
+    int pfb_count   = 0;
+    int pfb_min_hit = 0;
+    int pfb_max_hit = 0;
+    while (fgets(line, BUFFER_SIZE, fp) != NULL)
+    {
+        // Parse the tab-delimited line
+        char *tok = strtok(line, "\t");  // Tokenize the line
+        int col = 0;  // Column index
+        std::string chr = "";
+        uint64_t pos = 0;
+        double pfb = 0;
+        while (tok != NULL)
+        {
+            // Get the chromosome from column 1
+            if (col == 0)
+            {
+                chr = tok;
+            }
+
+            // Get the position from column 2
+            else if (col == 1)
+            {
+                pos = atoi(tok);
+            }
+
+            // Get the PFB from column 3
+            else if (col == 2)
+            {
+                pfb = atof(tok);
+            }
+
+            tok = strtok(NULL, "\t");
+            col++;
+        }
+
+        // Store the PFB value in the map, and fix within the range
+        //std::cout << "PFB: " << std::fixed << std::setprecision(6) << pfb << std::endl;
+        //std::cout << "PFB at " << chr << ":" << pos << " -> " << std::fixed << std::setprecision(6) << pfb << std::endl;
+        if (pfb < MIN_PFB)
+        {
+            pfb_map[chr][pos] = MIN_PFB;
+            pfb_min_hit++;
+        } else if (pfb > MAX_PFB) {
+            pfb_map[chr][pos] = MAX_PFB;
+            pfb_max_hit++;
+        } else {
+            pfb_map[chr][pos] = pfb;
+
+            // Print values for debugging
+            //std::cout << "PFB at " << chr << ":" << pos << " -> " << std::fixed << std::setprecision(6) << pfb << std::endl;
+        }
+        pfb_count++;
+    }
+
+    // Close the file
+    fclose(fp);
+
+    // Log the percentage of PFB values that were fixed
+    std::cout << "PFB values fixed: " << (double) (pfb_min_hit + pfb_max_hit) / (double) pfb_count * 100 << "%" << std::endl;
+    std::cout << "PFB min hit: " << (double) pfb_min_hit / (double) pfb_count * 100 << "%" << std::endl;
+    std::cout << "PFB max hit: " << (double) pfb_max_hit / (double) pfb_count * 100 << "%" << std::endl;
+
+    // Determine whether the chromosome is in chr notation (e.g. chr1) or not (e.g. 1)
+    bool chr_notation = false;
+    std::string first_chr = pfb_map.begin()->first;
+    if (first_chr.find("chr") != std::string::npos)
+    {
+        chr_notation = true;
+    }
+
+    // Create a vector of population frequencies for each SNP
+    std::cout << "Getting SNPs population frequencies..." << std::endl;
+    std::vector<double> snp_pfb;
+
+    // Get the chromosome
+    std::string chr = this->input_data->getRegionChr();
+
+    // Make sure the chromosome follows the same format as the PFB file
+    if (chr_notation)
+    {
+        // Add "chr" to the chromosome if it is not already there
+        if (chr.find("chr") == std::string::npos)
+        {
+            chr = "chr" + chr;
+        }
+    } else {
+        // Remove "chr" from the chromosome if it is there
+        if (chr.find("chr") != std::string::npos)
+        {
+            chr = chr.substr(3);
+        }
+    }
+
+    // Loop through each SNP position and get the population frequency
+    int snp_count = (int) snp_locations.size();
+    int found_count = 0;
+    for (int i = 0; i < snp_count; i++)
+    {
+        // Get the SNP position
+        int pos = snp_locations[i];
+
+        // Get the population frequency from the map
+        auto it = pfb_map[chr].find(pos);
+        if (it != pfb_map[chr].end())
+        {
+            // Store the population frequency
+            snp_pfb.push_back(it->second);
+            found_count++;
+        }
+        else
+        {
+            // Use 0.5 as the population frequency if it is not found
+            snp_pfb.push_back(0.5);
+        }
+    }
+
+    // Print the percentage of SNPs with population frequencies
+    std::cout << "Found population frequencies for " << found_count << " of " << snp_count << " SNPs (" << (double) found_count / (double) snp_count * 100 << "%)" << std::endl;
+
+    return snp_pfb;
+}
+
+void CNVCaller::saveToTSV(SNPData& snp_data, std::string filepath)
+{
+    // Open the TSV file for writing
+    std::ofstream tsv_file(filepath);
 
     // Write the header
-    csv_file << "position,baf,log2_ratio,cnv_state" << std::endl;
+    tsv_file << "chromosome\tposition\tb_allele_freq\tlog2_ratio\tcnv_state\tpopulation_freq" << std::endl;
+
+    // Write the data
+    std::string chr = this->input_data->getRegionChr();
+    int snp_count = (int) snp_data.locations.size();
+    for (int i = 0; i < snp_count; i++)
+    {
+        // Get the SNP data
+        int64_t pos        = snp_data.locations[i];
+        double  pfb        = snp_data.pfbs[i];
+        double  baf        = snp_data.bafs[i];
+        double  log2_ratio = snp_data.log2_ratios[i];
+        int     cn_state   = snp_data.state_sequence[i];
+
+        // Write the TSV line (chrom, pos, baf, lrr, state)
+        tsv_file << \
+            chr          << "\t" << \
+            pos          << "\t" << \
+            baf          << "\t" << \
+            log2_ratio   << "\t" << \
+            cn_state     << "\t" << \
+            pfb          << \
+        std::endl;
+    }
+
+    // Close the file
+    tsv_file.close();
+}
+
+void CNVCaller::saveToBED(SNPData& snp_data, std::string filepath)
+{
+    // Each of the 6 copy number states corresponds to the following:
+    // 1: 0/0 (Two copy loss)
+    // 2: 1/0 (One copy loss)
+    // 3: 1/1 (Normal)
+    // 4: 1/1 (Copy neutral LOH)
+    // 5: 2/1 (One copy gain)
+    // 6: 2/2 (Two copy gain)
+
+    // Create a map of descriptions and RGB colors for each CNV state
+    // Descriptions:
+    std::map<int, std::string> description_map = {
+        {1, "DEL"},  // DEL
+        {2, "DEL"},  // DEL
+        {3, "NEUT"},  // NEUTRAL
+        {4, "NEUT"},  // NEUTRAL
+        {5, "DUP"},  // DUP
+        {6, "DUP"}  // DUP
+    };
+
+    // RGB colors:
+    std::map<int, std::string> color_map = {
+        {1, "255,0,0"},  // DEL
+        {2, "255,0,0"},  // DEL
+        {3, "0,0,0"},  // NEUTRAL
+        {4, "0,0,0"},  // NEUTRAL
+        {5, "0,0,255"},  // DUP
+        {6, "0,0,255"}  // DUP
+    };
+
+    // Open the BED file for writing
+    std::ofstream bed_file(filepath);
+
+    // Write the track line (track type, name, description)
+    bed_file << "track name=\"CNV States\" description=\"CNV state predictions from ContextSV\" itemRgb=\"On\" visibility=\"2\" useScore=\"0\"" << std::endl;
 
     // Write the data
-    int snp_count = (int) snp_positions.size();
+    std::string chr = this->input_data->getRegionChr();
+    int snp_count = (int) snp_data.locations.size();
     for (int i = 0; i < snp_count; i++)
     {
-        csv_file << snp_positions[i] << "," << bafs[i] << "," << logr_ratios[i] << "," << state_sequence[i] << std::endl;
+        // Get the SNP position and state
+        int64_t pos        = snp_data.locations[i];
+        int     state   = snp_data.state_sequence[i];
+
+        // Get the state description
+        std::string state_str = description_map[state];
+
+        // Get the RGB color
+        std::string rgb_color = color_map[state];
+
+        // Write the BED line (chrom, start, end, name (state), score, strand, thickStart, thickEnd, itemRgb)
+        bed_file         << \
+            chr          << "\t" << \
+            pos          << "\t" << \
+            pos          << "\t" << \
+            state_str    << "\t" << \
+            "0"          << "\t" << \
+            "."          << "\t" << \
+            pos          << "\t" << \
+            pos          << "\t" << \
+            rgb_color    << \
+        std::endl;
     }
 
     // Close the file
-    csv_file.close();
+    bed_file.close();
 }
diff --git a/src/cnv_data.cpp b/src/cnv_data.cpp
new file mode 100644
index 00000000..9e869faa
--- /dev/null
+++ b/src/cnv_data.cpp
@@ -0,0 +1,114 @@
+#include "cnv_data.h"
+
+/// @cond
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <fstream>
+/// @endcond
+
+void CNVData::addCNVCall(std::string chr, int snp_pos, int cnv_type)
+{
+    // Add the CNV call to the map
+    SNPLocation key(chr, snp_pos);
+    this->cnv_calls[key] = cnv_type;
+}
+
+int CNVData::getMostCommonCNV(std::string chr, int start, int end)
+{
+    // Get the majority CNV type within the SV region start and end positions
+    // (0 = deletion, 1 = duplication, -1 = no CNV call)
+    int dup_count = 0;
+    int del_count = 0;
+    int no_call_count = 0;
+    int total_count = 0;
+    //int sv_len = end - start + 1;
+    
+    //std::cout << "Checking for CNV calls in " << chr << ":" << start << "-" << end << " (SVLEN=" << sv_len << ")" << std::endl;
+
+    for (int pos = start; pos <= end; pos++) {
+        SNPLocation key(chr, pos);
+        //std::cout << "Checking for CNV call at " << chr << ":" << pos <<
+        //std::endl;
+        
+        if (this->cnv_calls.find(key) != this->cnv_calls.end()) {
+            //std::cout << "State = " << this->cnv_calls[key] << std::endl;
+            if (this->cnv_calls[key] == 5 || this->cnv_calls[key] == 6) {
+                dup_count++;
+            } else if (this->cnv_calls[key] == 1 || this->cnv_calls[key] == 2) {
+                del_count++;
+            } else {
+                no_call_count++;
+            }
+            total_count++;
+        }
+    }
+
+    // Check if the SV region is mostly covered by CNV calls (at least 50%) and
+    // if the majority CNV type is an insertion or deletion
+    int cnv_type = -1;
+    if (total_count > 0) {
+        if (dup_count > del_count && (double) dup_count / total_count > 0.5) {
+            cnv_type = 1;
+            //std::cout << "CNV type is DUP, SVLEN=" << sv_len << std::endl;
+        } else if (del_count > dup_count && (double) del_count / total_count > 0.5) {
+            cnv_type = 0;
+            //std::cout << "CNV type is DEL, SVLEN=" << sv_len << std::endl;
+        } else {
+            //std::cout << "CNV type is no call, SVLEN=" << sv_len << std::endl;
+        }
+    }
+
+    return cnv_type;
+}
+
+void CNVData::loadFromFile(std::string filepath)
+{
+    // Load CNV calls from file
+    std::ifstream cnv_file(filepath);
+    std::string line;
+    std::string chr;
+    int snp_pos;
+    int cnv_type;
+
+    // Check if the file was opened successfully
+    if (!cnv_file.is_open()) {
+        std::cerr << "Error: Could not open CNV file " << filepath << std::endl;
+        exit(1);
+    }
+
+    // Skip the first line (header)
+    std::getline(cnv_file, line);
+
+    // Read the file line by line
+    int line_num = 1;
+    while (std::getline(cnv_file, line)) {
+
+        // Parse the line
+        std::istringstream iss(line);
+
+        // Get columns 1, 2, and 5 (chr, pos, cnv_type)
+        std::string chr;
+        std::getline(iss, chr, '\t');
+
+        std::string pos_str;
+        std::getline(iss, pos_str, '\t');
+        snp_pos = std::stoi(pos_str);
+
+        std::string skip_str;
+        std::getline(iss, skip_str, '\t');
+        std::getline(iss, skip_str, '\t');
+
+        std::string cnv_type_str;
+        std::getline(iss, cnv_type_str, '\t');
+        cnv_type = std::stoi(cnv_type_str);
+
+        // Add the CNV call to the map
+        this->addCNVCall(chr, snp_pos, cnv_type);
+
+        line_num++;
+    }
+    cnv_file.close();
+
+    std::cout << "Loaded " << line_num << " CNV calls" << std::endl;
+}
diff --git a/src/common.cpp b/src/common.cpp
deleted file mode 100644
index e5cc3e04..00000000
--- a/src/common.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-#include "common.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <iostream>
-
-#define BUFFER_SIZE 1024
-
-std::string Common::get_bam_filepath()
-{
-    return this->bam_filepath;
-}
-
-void Common::set_bam_filepath(std::string filepath)
-{
-    this->bam_filepath = filepath;
-}
-
-std::string Common::get_ref_filepath()
-{
-    return this->ref_filepath;
-}
-
-void Common::set_ref_filepath(std::string filepath)
-{
-    this->ref_filepath = filepath;
-}
-
-std::string Common::get_output_dir()
-{
-    return this->output_dir;
-}
-
-void Common::set_output_dir(std::string dirpath)
-{
-    this->output_dir = dirpath;
-
-    // Create the output directory
-    std::string cmd = "mkdir -p " + output_dir;
-    system(cmd.c_str());
-}
-
-std::string Common::get_region()
-{
-    return this->region;
-}
-
-void Common::set_region(std::string region)
-{
-    this->region = region;
-
-    // Parse the region
-    char *tok = strtok((char *)region.c_str(), ":");
-    int col = 0;
-    while (tok != NULL)
-    {
-        // Get the chromosome
-        if (col == 0)
-        {
-            this->region_chr = tok;
-        }
-
-        // Get the start and end positions
-        else if (col == 1)
-        {
-            // Check if empty
-            if (strcmp(tok, "") == 0)
-            {
-                break;
-            }
-
-            // Split the start and end positions
-            char *start_tok = strtok(tok, "-");
-            char *end_tok = strtok(NULL, "-");
-
-            // Get the start position
-            if (start_tok != NULL)
-            {
-                this->region_start = atoi(start_tok);
-            }
-
-            // Get the end position
-            if (end_tok != NULL)
-            {
-                this->region_end = atoi(end_tok);
-            }
-        }
-        tok = strtok(NULL, ":");
-        col++;
-    }
-
-    // Print the region
-    if (this->region_start == 0 && this->region_end == 0)
-    {
-        this->region_set = false;
-        std::cout << "Parsed region = " << this->region_chr << std::endl;
-    } else {
-        this->region_set = true;
-        std::cout << "Parsed region = " << this->region_chr << ":" << this->region_start << "-" << this->region_end << std::endl;
-    }
-}
-
-int Common::get_window_size()
-{
-    return this->window_size;
-}
-
-void Common::set_window_size(int window_size)
-{
-    this->window_size = window_size;
-}
-
-std::string Common::get_snp_vcf_filepath()
-{
-    return this->snp_vcf_filepath;
-}
-
-void Common::set_snp_vcf_filepath(std::string filepath)
-{
-    this->snp_vcf_filepath = filepath;
-}
-
-std::string Common::get_region_chr()
-{
-    return this->region_chr;
-}
-
-int Common::get_region_start()
-{
-    return this->region_start;
-}
-
-int Common::get_region_end()
-{
-    return this->region_end;
-}
-
-bool Common::get_region_set()
-{
-    return this->region_set;
-}
-
-void Common::print_progress(int progress, int total)
-{
-    // Get the percentage
-    float percent = (float)progress / (float)total * 100.0;
-
-    // Get the number of hashes
-    int num_hashes = (int)(percent / 2.0);
-
-    // Print the progress bar
-    printf("\r[");
-    for (int i = 0; i < num_hashes; i++)
-    {
-        printf("#");
-    }
-    for (int i = 0; i < 50 - num_hashes; i++)
-    {
-        printf(" ");
-    }
-    printf("] %3.2f%%", percent);
-    fflush(stdout);
-
-    // Print a new line if finished
-    if (progress == total)
-    {
-        printf("\n");
-    }
-}
diff --git a/src/contextsv.cpp b/src/contextsv.cpp
new file mode 100644
index 00000000..b3bceddb
--- /dev/null
+++ b/src/contextsv.cpp
@@ -0,0 +1,87 @@
+#include "contextsv.h"
+#include "cnv_caller.h"
+#include "sv_caller.h"
+
+#include <htslib/sam.h>
+
+/// @cond
+#include <iostream>
+#include <string>
+#include <vector>
+/// @endcond
+
+ContextSV::ContextSV(InputData& input_data)
+{
+    this->input_data = &input_data;
+}
+
+// Entry point
+int ContextSV::run()
+{
+    // Check if a file with CNV data was provided
+    CNVData cnv_calls;
+    if (this->input_data->getCNVFilepath() != "") {
+        // Load CNV data
+        std::cout << "Loading CNV data..." << std::endl;
+        cnv_calls.loadFromFile(this->input_data->getCNVFilepath());
+    } else {
+        // Call CNVs using the SNP positions
+        std::cout << "Calling CNVs..." << std::endl;
+        CNVCaller cnv_caller(*this->input_data);
+        cnv_calls = cnv_caller.run();
+    }
+
+    // Call SVs from long read alignments and CNV calls
+    std::cout << "Calling SVs..." << std::endl;
+    SVCaller sv_caller(*this->input_data);
+    SVData sv_calls = sv_caller.run();
+
+    // Check if the ref genome was set
+    if (sv_calls.getRefGenome() == "") {
+        std::cout << "Warning: Reference genome not set in SVData" << std::endl;
+    }
+
+    // Classify SVs based on CNV calls
+    std::cout << "Labeling CNVs..." << std::endl;
+    this->labelCNVs(cnv_calls, sv_calls);
+
+    // Write SV calls to file
+    std::cout << "Writing SV calls to file..." << std::endl;
+    // Get the reference genome
+    FASTAQuery ref_genome = this->input_data->getRefGenome();
+    std::string output_dir = this->input_data->getOutputDir();
+    sv_calls.saveToVCF(ref_genome, output_dir);
+
+    std::cout << "Done!" << std::endl;
+
+    return 0;
+}
+
+// Label SVs based on CNV calls
+void ContextSV::labelCNVs(CNVData cnv_calls, SVData& sv_calls)
+{
+    // Iterate over SV calls
+    for (auto const& sv_call : sv_calls) {
+
+        SVCandidate candidate = sv_call.first;
+        
+        // Get the SV coordinates
+        std::string chr = std::get<0>(candidate);
+        int start_pos = std::get<1>(candidate);
+        int end_pos = std::get<2>(candidate);
+
+        // Get CNV calls within the SV coordinate range and identify the most
+        // common call
+        int cnv_call = cnv_calls.getMostCommonCNV(chr, start_pos, end_pos);
+
+        // Update the SV call's type if the CNV call is not unknown
+        if (cnv_call != -1) {
+            sv_calls.updateSVType(candidate, cnv_call);
+        }
+
+        // // Print the updated SV call if the type was changed, and if it is not unknown
+        // if (cnv_call != -1 && cnv_call != sv_type) {
+        //     std::cout << "Updated SV call from " << sv_type << " to " << cnv_call << std::endl;
+        // }
+    }
+}
diff --git a/src/fasta_query.cpp b/src/fasta_query.cpp
new file mode 100644
index 00000000..1ca2d732
--- /dev/null
+++ b/src/fasta_query.cpp
@@ -0,0 +1,168 @@
+#include "fasta_query.h"
+
+/// @cond
+#include <string.h>
+#include <iostream>
+#include <map>
+#include <string>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <algorithm>
+/// @endcond
+
+
+int FASTAQuery::setFilepath(std::string fasta_filepath)
+{
+    if (fasta_filepath == "")
+    {
+        std::cout << "No FASTA filepath provided" << std::endl;
+        return 1;
+    }
+
+    this->fasta_filepath = fasta_filepath;
+
+    std::cout << "Reading FASTA file " << fasta_filepath << std::endl;
+
+    // Parse the FASTA file
+    std::ifstream fasta_file(fasta_filepath);
+    if (!fasta_file.is_open())
+    {
+        std::cout << "Could not open FASTA file " << fasta_filepath << std::endl;
+        exit(1);
+    }
+
+    // Get the sequence
+    std::map<std::string, std::string> chr_to_seq;
+    std::string current_chr = "";
+    std::string sequence = "";
+    std::string line_str = "";
+
+    while (std::getline(fasta_file, line_str))
+    {
+        // Check if the line is a header
+        if (line_str[0] == '>')
+        {
+            // Header line, indicating a new chromosome
+            // Store the previous chromosome and sequence
+            if (current_chr != "")
+            {
+                chr_to_seq[current_chr] = sequence;
+                //std::cout << "Read chromosome " << current_chr << " with length " << sequence.length() << std::endl;
+                sequence = "";  // Reset the sequence
+            }
+
+            // Get the new chromosome
+            current_chr = line_str.substr(1);
+
+            // Remove the description
+            size_t space_pos = current_chr.find(" ");
+            if (space_pos != std::string::npos)
+            {
+                current_chr.erase(space_pos);
+            }
+
+            // Check if the chromosome is already in the map
+            if (chr_to_seq.find(current_chr) != chr_to_seq.end())
+            {
+                std::cout << "Duplicate chromosome " << current_chr << std::endl;
+                exit(1);
+            }
+        } else {
+            // Sequence line
+            sequence += line_str;
+        }
+    }
+
+    // Add the last chromosome at the end of the file
+    if (current_chr != "")
+    {
+        chr_to_seq[current_chr] = sequence;
+    }
+
+    // Close the file
+    std::cout << "Closing FASTA file..." << std::endl;
+    fasta_file.close();
+
+    // Set the map
+    this->chr_to_seq = chr_to_seq;
+
+    std::cout << "Done." << std::endl;
+
+    return 0;
+}
+
+std::string FASTAQuery::getFilepath()
+{
+    return this->fasta_filepath;
+}
+
+// Function to get the reference sequence at a given position range
+std::string FASTAQuery::query(std::string chr, int64_t pos_start, int64_t pos_end)
+{
+    //std::cout << "Querying " << chr << ":" << pos_start << "-" << pos_end << std::endl;
+
+    // Check if a FASTA file has been set
+    if (this->fasta_filepath == "")
+    {
+        std::cout << "No FASTA file set" << std::endl;
+        return "";
+    }
+
+    // Check if the chromosome is in the map
+    if (this->chr_to_seq.find(chr) == this->chr_to_seq.end())
+    {
+        std::cout << "Chromosome " << chr << " not found in FASTA file" << std::endl;
+        return "";
+    }
+    
+    //std::cout << "Querying " << chr << ":" << pos_start << "-" << pos_end << std::endl;
+
+    // Get the sequence
+    std::string sequence = this->chr_to_seq[chr];
+
+    //std::cout << "Sequence length: " << sequence.length() << std::endl;
+
+    // Get the substring
+    std::string subsequence = sequence.substr(pos_start, pos_end - pos_start);  // (start, length)
+
+    //std::cout << "Subsequence length: " << subsequence.length() << std::endl;
+
+    // If the subsequence is empty, return VCF missing data character
+    if (subsequence == "")
+    {
+        return ".";
+    }
+
+    return subsequence;
+}
+
+// Function to get the chromosome contig lengths in VCF header format
+std::string FASTAQuery::getContigHeader()
+{
+    std::string contig_header = "";
+
+    // Sort the chromosomes
+    std::vector<std::string> chromosomes;
+    for (auto const& chr_seq : this->chr_to_seq)
+    {
+        chromosomes.push_back(chr_seq.first);
+    }
+    std::sort(chromosomes.begin(), chromosomes.end());
+
+    // Iterate over the chromosomes and add them to the contig header
+    for (auto const& chr : chromosomes)
+    {
+        // Add the contig header line
+        contig_header += "##contig=<ID=" + chr + ",length=" + std::to_string(this->chr_to_seq[chr].length()) + ">\n";
+    }
+    // {
+    //     std::string chr = chr_seq.first;
+    //     std::string seq = chr_seq.second;
+
+    //     // Add the contig header line
+    //     contig_header += "##contig=<ID=" + chr + ",length=" + std::to_string(seq.length()) + ">\n";
+    // }
+
+    return contig_header;
+}
\ No newline at end of file
diff --git a/src/input_data.cpp b/src/input_data.cpp
new file mode 100644
index 00000000..65ec3b78
--- /dev/null
+++ b/src/input_data.cpp
@@ -0,0 +1,324 @@
+#include "input_data.h"
+
+/// @cond
+#include <stdio.h>
+#include <string.h>
+#include <iostream>
+#include <sstream>
+/// @endcond
+
+#define BUFFER_SIZE 1024
+
+// Constructor
+InputData::InputData()
+{
+    this->bam_filepath = "";
+    this->ref_filepath = "";
+    this->snp_vcf_filepath = "";
+    this->pfb_filepath = "";
+    this->output_dir = "";
+    this->region = "";
+    this->window_size = 10000;
+    this->region_chr = "";
+    this->region_start = 0;
+    this->region_end = 0;
+    this->region_set = false;
+    this->thread_count = 1;
+    this->hmm_filepath = "data/wgs.hmm";
+}
+
+std::string InputData::getBAMFilepath()
+{
+    return this->bam_filepath;
+}
+
+void InputData::setBAMFilepath(std::string filepath)
+{
+    this->bam_filepath = filepath;
+}
+
+void InputData::setRefGenome(std::string fasta_filepath)
+{
+    // Set the reference genome
+    this->fasta_query.setFilepath(fasta_filepath);
+}
+
+FASTAQuery InputData::getRefGenome()
+{
+    return this->fasta_query;
+}
+
+std::string InputData::getOutputDir()
+{
+    return this->output_dir;
+}
+
+void InputData::setOutputDir(std::string dirpath)
+{
+    this->output_dir = dirpath;
+
+    // Create the output directory
+    std::string cmd = "mkdir -p " + output_dir;
+    system(cmd.c_str());
+}
+
+std::string InputData::getRegion()
+{
+    return this->region;
+}
+
+void InputData::setRegion(std::string region)
+{
+    this->region = region;
+
+    // Parse the region
+    char *tok = strtok((char *)region.c_str(), ":");
+    int col = 0;
+    while (tok != NULL)
+    {
+        // Get the chromosome
+        if (col == 0)
+        {
+            this->region_chr = tok;
+        }
+
+        // Get the start and end positions
+        else if (col == 1)
+        {
+            // Check if empty
+            if (strcmp(tok, "") == 0)
+            {
+                break;
+            }
+
+            // Split the start and end positions
+            char *start_tok = strtok(tok, "-");
+            char *end_tok = strtok(NULL, "-");
+
+            // Get the start position
+            if (start_tok != NULL)
+            {
+                this->region_start = atoi(start_tok);
+            }
+
+            // Get the end position
+            if (end_tok != NULL)
+            {
+                this->region_end = atoi(end_tok);
+            }
+        }
+        tok = strtok(NULL, ":");
+        col++;
+    }
+
+    // Check if a valid chromosome was parsed
+    if (this->region_chr == "")
+    {
+        std::cerr << "Error: Region chromosome not set" << std::endl;
+        exit(1);
+    }
+
+    // Check if only a chromosome was parsed
+    if (this->region_start == 0 && this->region_end == 0)
+    {
+        // Use the entire chromosome as the region
+        this->region_set = false;
+        std::cout << "Parsed region = " << this->region_chr << std::endl;
+    } else {
+        // Check if a valid chromosome start and end position were parsed
+        if (this->region_start == 0 || this->region_end == 0 || this->region_start > this->region_end)
+        {
+            std::cerr << "Error: Region start and end positions not set" << std::endl;
+            exit(1);
+        } else {
+            // Set the region
+            this->region_set = true;
+            std::cout << "Parsed region = " << this->region_chr << ":" << this->region_start << "-" << this->region_end << std::endl;
+        }
+    }
+}
+
+int InputData::getWindowSize()
+{
+    return this->window_size;
+}
+
+void InputData::setWindowSize(int window_size)
+{
+    this->window_size = window_size;
+}
+
+std::string InputData::getSNPFilepath()
+{
+    return this->snp_vcf_filepath;
+}
+
+void InputData::setSNPFilepath(std::string filepath)
+{
+    this->snp_vcf_filepath = filepath;
+}
+
+std::string InputData::getRegionChr()
+{
+    return this->region_chr;
+}
+
+int InputData::getRegionStart()
+{
+    return this->region_start;
+}
+
+int InputData::getRegionEnd()
+{
+    return this->region_end;
+}
+
+bool InputData::getRegionSet()
+{
+    return this->region_set;
+}
+
+void InputData::setChrCov(std::string chr_cov)
+{
+    // Update the chromosome coverage map if the string is not empty
+    if (chr_cov != "")
+    {
+        // Split the string by commas
+        std::istringstream ss(chr_cov);
+        std::string token;
+        std::vector<std::string> chr_cov_pairs;
+
+        while (std::getline(ss, token, ','))
+        {
+            chr_cov_pairs.push_back(token);
+        }
+
+        // Iterate over the pairs
+        for (auto const &pair : chr_cov_pairs)
+        {
+            // Split the pair by colon
+            std::istringstream ss(pair);
+            std::string token;
+            std::vector<std::string> chr_cov;
+
+            while (std::getline(ss, token, ':'))
+            {
+                chr_cov.push_back(token);
+            }
+
+            // Check if the pair is valid
+            if (chr_cov.size() == 2)
+            {
+                // Get the chromosome and coverage
+                std::string chr = chr_cov[0];
+                double cov = std::stod(chr_cov[1]);
+
+                // Add the pair to the map
+                this->chr_cov[chr] = cov;
+
+                // Print the pair
+                std::cout << "Set mean coverage for " << chr << " to " << cov << std::endl;
+            }
+        }
+    }
+}
+
+int InputData::getChrCov(std::string chr, double &cov)
+{
+    // Check if the chromosome is in the map
+    if (this->chr_cov.find(chr) != this->chr_cov.end())
+    {
+        // Get the coverage
+        cov = this->chr_cov[chr];
+
+        // Return 0 if the chromosome is in the map
+        return 0;
+    }
+    else
+    {
+        // Return -1 if the chromosome is not in the map
+        return -1;
+    }
+}
+
+std::string InputData::getPFBFilepath()
+{
+    return this->pfb_filepath;
+}
+
+void InputData::setPFBFilepath(std::string filepath)
+{
+    this->pfb_filepath = filepath;
+
+    // Check if empty string
+    if (filepath == "")
+    {
+        return;
+        
+    } else {
+        // Check if the file exists
+        FILE *fp = fopen(filepath.c_str(), "r");
+        if (fp == NULL)
+        {
+            std::cerr << "PFB file does not exist: " << filepath << std::endl;
+            exit(1);
+        }
+    }
+}
+
+void InputData::setThreadCount(int thread_count)
+{
+    this->thread_count = thread_count;
+}
+
+int InputData::getThreadCount()
+{
+    return this->thread_count;
+}
+
+std::string InputData::getHMMFilepath()
+{
+    return this->hmm_filepath;
+}
+
+void InputData::setHMMFilepath(std::string filepath)
+{
+    // Check if empty string
+    if (filepath == "")
+    {
+        std::cout << "Using default HMM file: " << this->hmm_filepath << std::endl;
+        return;
+        
+    } else {
+        // Check if the file exists
+        FILE *fp = fopen(filepath.c_str(), "r");
+        if (fp == NULL)
+        {
+            std::cerr << "HMM file does not exist: " << filepath << std::endl;
+            exit(1);
+        } else {
+            this->hmm_filepath = filepath;
+            std::cout << "Using HMM file: " << this->hmm_filepath << std::endl;
+        }
+    }
+}
+
+void InputData::setDisableCIGAR(bool disable_cigar)
+{
+    this->disable_cigar = disable_cigar;
+}
+
+bool InputData::getDisableCIGAR()
+{
+    return this->disable_cigar;
+}
+
+void InputData::setCNVFilepath(std::string filepath)
+{
+    this->cnv_filepath = filepath;
+}
+
+std::string InputData::getCNVFilepath()
+{
+    return this->cnv_filepath;
+}
diff --git a/src/integrative_caller.cpp b/src/integrative_caller.cpp
deleted file mode 100644
index 37b326f6..00000000
--- a/src/integrative_caller.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-
-#include "integrative_caller.h"
-#include "cnv_caller.h"
-#include "common.h"
-
-#include <htslib/sam.h>
-#include <iostream>
-#include <string>
-
-
-IntegrativeCaller::IntegrativeCaller(Common common)
-{
-    this->common = common;
-}
-
-/// Entry point
-int IntegrativeCaller::run()
-{
-    // Call SNVs
-    // SNVCaller snv_obj(this->common);
-    // snv_obj.run();
-
-    // Get the SNP VCF file
-    std::string snp_vcf_filename = this->common.get_snp_vcf_filepath();
-    std::cout << "SNP VCF file = " << snp_vcf_filename << std::endl;
-
-    // Get the positions of SNPs in the region
-    //snv_obj.run(filepath);
-
-    // Print the size of the SNP positions vector
-    // std::cout << "SNP positions vector size = " << snv_obj.get_snp_positions().size() << std::endl;
-
-    // Call CNVs
-    //CNVCaller cnv_obj(this->common, snv_obj.get_snp_positions());
-    //std::vector<int> snp_positions;
-    CNVCaller cnv_obj(this->common);
-    cnv_obj.run();
-
-    return 0;
-}
diff --git a/src/kc.cpp b/src/kc.cpp
index abffdb14..453af803 100644
--- a/src/kc.cpp
+++ b/src/kc.cpp
@@ -1,8 +1,10 @@
+#include "kc.h"
+
+/// @cond
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
-
-#include "kc.h"
+/// @endcond
 
 /************************   DESCRIPTION OF THE kc.c FILE   *******************
 
@@ -2570,13 +2572,13 @@ cumulative normal distribution
                  -inf
 */
 {
-	return (1+erf((x-mu)/(sigma*sqrt(2))))/2;
+	return (1+errorf((x-mu)/(sigma*sqrt(2))))/2;
 }
 
 double cdf_stdnormal (double x)
 /*Returns the cumulative density function for a standard normal distribution (see http://en.wikipedia.org/wiki/Normal_distribution)*/
 {
-	return (1+erf(x/sqrt(2)))/2;
+	return (1+errorf(x/sqrt(2)))/2;
 }
 
 
@@ -3698,18 +3700,18 @@ the formula is adapted from http://www.theorie.physik.uni-muenchen.de/~serge/erf
 	return y;
 }
 
-double erf(double x)
+double errorf(double x)
 /* calculate the error function (http://en.wikipedia.org/wiki/Error_function)
 The error function is defined by
                         x     2
                   2    /    -t
-     erf(x) = -------- |  e    dt
+     errorf(x) = -------- |  e    dt
               sqrt(pi) /
                         0
 */
 {
-        double gammp(double a, double x);
-        return (x < 0.0) ? (-gammp(0.5,x*x)) : gammp(0.5,x*x);
+	double gammp(double a, double x);
+	return (x < 0.0) ? (-gammp(0.5,x*x)) : gammp(0.5,x*x);
 }
 
 
diff --git a/src/khmm.cpp b/src/khmm.cpp
index b4a421d7..88075dd9 100644
--- a/src/khmm.cpp
+++ b/src/khmm.cpp
@@ -1,8 +1,14 @@
-#include <iostream>
-
 #include "khmm.h"
 #include "kc.h"
 
+/// @cond
+#include <iostream>
+#include <sstream>
+#include <vector>
+#include <map>
+/// @endcond
+
+#define BUFFER_SIZE 1024
 #define STATE_CHANGE 100000.0 /*this is the expected changes (D value) in the transition matrix*/
 #define VITHUGE 100000000000.0
 #define FLOAT_MINIMUM 1.175494351e-38; /*this is indeed machine dependent*/
@@ -27,26 +33,23 @@ std::vector<int> testVit_CHMM(CHMM hmm, int T, double *O1, double *O2, double *p
 	// Note: PLOGPROBA is replaced with DELTA (delta matrix) after running the
 	// HMM, which is used to calculate the probability of the most likely state
 
-	// int *q;			/* Array for the state sequence q[1..T] */
 	double **delta; // Matrix
 	int **psi;		// Matrix
-
-	// These 3 variables will be updated with the HMM results
-	// q = ivector(1, T);                // Allocate an int vector for each
-	// probe
 	delta = dmatrix(1, T, 1, hmm.N); // Allocate a TxN  double matrix (N=6 states)
 	psi = imatrix(1, T, 1, hmm.N);	 // Allocate a TxN  int matrix (N=6 states)
-
-	// Allocate pfb and plogproba
-	pfb = dvector(1, T);
+	
+	// Set initial log probability for each state
 	plogproba = dvector(1, hmm.N);
+	for (int i = 1; i <= hmm.N; i++)
+	{
+		plogproba[i] = -VITHUGE;
+	}
 
 	// Run the HMM
 	std::vector<int> q;  // State sequence
 	q = ViterbiLogNP_CHMM(&hmm, T, O1, O2, pfb, snpdist, delta, psi, plogproba);
 
 	// Free the variables
-	// free_ivector(q, 1, T);
 	free_imatrix(psi, 1, T, 1, hmm.N);
 	free_dmatrix(delta, 1, T, 1, hmm.N);
 
@@ -57,7 +60,9 @@ std::vector<int> testVit_CHMM(CHMM hmm, int T, double *O1, double *O2, double *p
 	return q;
 }
 
-// Emission probability: Calculate the state observationn likelihood b_j(O_t) of the observation symbol O_t given the current state j
+// Emission probability: Calculate the state observationn likelihood b_j(O_t) of
+// the observation symbol O_t given the current state j
+// O_t is the LRR value
 double b1iot(int state, double *mean, double *sd, double uf, double o)
 {
 	// UF = previous alpha
@@ -65,7 +70,6 @@ double b1iot(int state, double *mean, double *sd, double uf, double o)
 	p = uf;
 
 	// PDF normal is the transition probability distrubution a_ij (initialized as pi_n) from state i to j
-	// P += (1-alpha_t-1) *
 	p += (1 - uf) * pdf_normal(o, mean[state], sd[state]);
 
 	// Prevent divide by zero error
@@ -76,21 +80,32 @@ double b1iot(int state, double *mean, double *sd, double uf, double o)
 	return log(p);
 }
 
+// Emission probability: Calculate the state observationn likelihood b_j(O_t) of
+// the observation symbol O_t given the current state j
+// O_t is the BAF value
 double b2iot(int state, double *mean, double *sd, double uf, double pfb, double b)
 {
 	double p = 0;
-	double mean0 = mean[1];
-	double mean25 = mean[2];
-	double mean33 = mean[3];
-	double mean50 = mean[4];
-	double mean50_state1 = mean[5];
-	double sd0 = sd[1];
-	double sd25 = sd[2];
-	double sd33 = sd[3];
-	double sd50 = sd[4];
-	double sd50_state1 = sd[5];
+	double mean0 = mean[1];  // mean[1] = 0
+	double mean25 = mean[2];  // mean[2] = 0.25
+	double mean33 = mean[3];  // mean[3] = 0.33
+	double mean50 = mean[4];  // mean[4] = 0.5
+	double mean50_state1 = mean[5];  // mean[5] = 0.5
+	double sd0 = sd[1];  // sd[1] = 0
+	double sd25 = sd[2];  // sd[2] = 0.25
+	double sd33 = sd[3];  // sd[3] = 0.33
+	double sd50 = sd[4];  // sd[4] = 0.5
+	double sd50_state1 = sd[5];  // sd[5] = 0.5
+
+	p = uf;  // UF = previous alpha (transition probability)
+
+	// PDF normal is the transition probability distrubution a_ij (initialized
+	// as pi_n) from state i to j
+	// Here, we calculate the probability of the observation symbol O_t given
+	// the current state j. The observation symbol is the BAF value.
+	// P += (1-alpha_t-1) * pdf_normal(b, mean[state], sd[state]);
+	// b = BAF value
 
-	p = uf;
 	if (state == 1)
 	{
 		if (b == 0)
@@ -192,9 +207,10 @@ double b2iot(int state, double *mean, double *sd, double uf, double pfb, double
 			p += (1 - uf) * pfb * pfb * pfb * pfb * pdf_normal(b, 1 - mean0, sd0);
 		}
 	}
-	if (p == 0)
+	if (p == 0)  // Prevent divide by zero error
 		p = FLOAT_MINIMUM;
-	return log(p);
+
+	return log(p);  // Return the log probability of the observation symbol O_t
 }
 
 // SV calling with the HMM via the Viterbi algorithm
@@ -204,7 +220,6 @@ std::vector<int> ViterbiLogNP_CHMM(CHMM *phmm, int T, double *O1, double *O2, do
 	int t;	  /* time index */
 
 	int snp_count = 0;
-	int cn_count = 0;
 
 	int maxvalind;
 	double maxval, val;
@@ -232,58 +247,63 @@ std::vector<int> ViterbiLogNP_CHMM(CHMM *phmm, int T, double *O1, double *O2, do
 	{
 		if (phmm->pi[i] == 0)
 			phmm->pi[i] = 1e-9; /*eliminate problems with zero probability*/
-		phmm->pi[i] = log(phmm->pi[i]);
+		phmm->pi[i] = log(phmm->pi[i]);  // Convert to log probability due to underflow
 	}
 
 	// Bi(Ot) is an NxT matrix of emission probabilities (observation likelihoods)
-	// expressing the probability of an observation Ot being generated from a state i
-	biot = dmatrix(1, phmm->N, 1, T);
+	// expressing the probability of an observation Ot being generated from a
+	// state i. Ot is the observation symbol at time t (in this case, the LRR
+	// and BAF values).
+
+	// Initialize the emission probability matrix
+	biot = dmatrix(1, phmm->N, 1, T);  // Allocate a NxT double matrix
+
+	std::cout << "[HMM] Running Viterbi algorithm with " << phmm->N << " states and " << T << " probes\n";
 
 	// Loop through each state N
+	// Start at 1 because states are 1-based (1-6)
 	for (i = 1; i <= phmm->N; i++)
 	{
 
-		// Loop through each probe T
-		for (t = 1; t <= T; t++)
+		// Loop through each probe T (0-based)
+		//for (t = 1; t <= T; t++)
+		for (t = 0; t < T; t++)
 		{
-			// If it's non-polymorphic (BAF > 1)
-			if (O2[t] > 1)
-			{ /*normally BAF>=0 and BAF<=1; we use BAF>1 to indicate a Non-Polymorphic marker*/
-				if (!phmm->NP_flag)
-					std::cerr << "FATAL ERROR: CN probe detected but HMM model does not contain parameters for them\n";
-
-				// Update emissions based on LRR
-				biot[i][t] = b1iot(i, phmm->B3_mean, phmm->B3_sd, phmm->B3_uf, O1[t]);
-				cn_count++;
-
-				// If it's a regular SNP
-			}
-			else
-			{ /*a regular SNP marker; we use both LRR and BAF information to calculate logProb for the marker*/
-				// Update emissions based on LRR (O1)
-				biot[i][t] = b1iot(i, phmm->B1_mean, phmm->B1_sd, phmm->B1_uf, O1[t]);
-
-				// Update emissions based on BAF (O2)
-				// *Set pfb to all 1's to ignore BAF population frequency in
-				// this implementation
-				biot[i][t] += b2iot(i, phmm->B2_mean, phmm->B2_sd, phmm->B2_uf, pfb[t], O2[t]);
-				snp_count++;
-			}
+			// A regular SNP marker; we use both LRR and BAF information to
+			// calculate the joint probability of the marker being in state i
+
+			// Get the state observation likelihood b_j(O_t) of the observation
+			// symbol O_t given the current state j
+			// B1_uf is the previous alpha (transition probability)
+			double O1_val = O1[t];
+			double O1_logprob = b1iot(i, phmm->B1_mean, phmm->B1_sd, phmm->B1_uf, O1_val);
+
+			double O2_val = O2[t];
+			double O2_logprob = b2iot(i, phmm->B2_mean, phmm->B2_sd, phmm->B2_uf, pfb[t], O2_val);
+			// double O2_logprob = b1iot(i, phmm->B2_mean, phmm->B2_sd, phmm->B2_uf, O2_val);
+
+			// Update the emission probability matrix with the joint probability
+			// of the marker being in state i at time t (log probability) based
+			// on both LRR and BAF values
+			biot[i][t] = O1_logprob + O2_logprob;
+
+			// Update the SNP count
+			snp_count++;
 		}
 	}
+
 	/*fprintf(stderr, "NOTICE: Encounterd %i snp probes and %i cn probes\n", snp_count, cn_count);*/
 
 	/* 1. Initialization  */
 
 	for (i = 1; i <= phmm->N; i++)
 	{
-		delta[1][i] = phmm->pi[i] + biot[i][1];
-		psi[1][i] = 0;
+		delta[1][i] = phmm->pi[i] + biot[i][1];  // Initialize the delta matrix (log probability)
+		psi[1][i] = 0;  // Initialize the psi matrix (state sequence) to 0 (no state)
 		pprob[1] = -VITHUGE;  // Initialize log probabilities for each state to -inf
 	}
 
 	/* 2. Recursion */
-
 	for (t = 2; t <= T; t++)
 	{
 
@@ -298,16 +318,16 @@ std::vector<int> ViterbiLogNP_CHMM(CHMM *phmm, int T, double *O1, double *O2, do
 			maxvalind = 1;
 			for (i = 1; i <= phmm->N; i++)
 			{
-				val = delta[t - 1][i] + log(A1[i][j]);
-				if (val > maxval)
+				val = delta[t - 1][i] + log(A1[i][j]);  // Update the delta matrix (log probability)
+				if (val > maxval)  // Update the max value
 				{
 					maxval = val;
 					maxvalind = i;
 				}
 			}
 
-			delta[t][j] = maxval + biot[j][t];
-			psi[t][j] = maxvalind;
+			delta[t][j] = maxval + biot[j][t];  // Update the delta matrix (log probability)
+			psi[t][j] = maxvalind;  // Update the psi matrix (state sequence)
 		}
 	}
 
@@ -519,4 +539,3 @@ void FreeCHMM(CHMM *phmm)
 		free_dvector(phmm->B3_sd, 1, phmm->N);
 	}
 }
-
diff --git a/src/sv_caller.cpp b/src/sv_caller.cpp
new file mode 100644
index 00000000..483801f9
--- /dev/null
+++ b/src/sv_caller.cpp
@@ -0,0 +1,350 @@
+//
+// sv_caller.cpp:
+// Detect SVs from long read alignments
+//
+
+#include "sv_caller.h"
+
+#include <htslib/sam.h>
+
+/// @cond
+#include <iostream>
+#include <string>
+#include <vector>
+#include <map>
+/// @endcond
+
+SVCaller::SVCaller(InputData& input_data)
+{
+    this->input_data = &input_data;
+}
+
+// Main function for SV detection
+SVData SVCaller::run()
+{
+    // Get SV calls from split read alignments (primary and supplementary) and
+    // directly from the CIGAR string
+    SVData sv_calls = this->detectSVsFromSplitReads();
+    
+    // Return the SV calls
+    return sv_calls;
+}
+
+// Detect SVs from the CIGAR string of a read alignment.
+void SVCaller::detectSVsFromCIGAR(bam_hdr_t* header, bam1_t* alignment, SVData& sv_calls)
+{
+    // Get the chromosome
+    std::string chr = header->target_name[alignment->core.tid];
+
+    // Get the position
+    int32_t pos = alignment->core.pos;
+
+    // Get the CIGAR string
+    uint32_t* cigar = bam_get_cigar(alignment);
+
+    // Get the CIGAR length
+    int cigar_len = alignment->core.n_cigar;
+
+    // Track the query position
+    int query_pos = 0;
+
+    // Loop through the CIGAR string and detect insertions and deletions in
+    // reference coordinates
+    // POS is the leftmost position of where the alignment maps to the reference:
+    // https://genome.sph.umich.edu/wiki/SAM
+    for (int i = 0; i < cigar_len; i++) {
+
+        // Get the CIGAR operation
+        int op = bam_cigar_op(cigar[i]);
+
+        // Get the CIGAR operation length
+        int op_len = bam_cigar_oplen(cigar[i]);
+        
+        // Check if the CIGAR operation is an insertion
+        if (op == BAM_CINS) {
+
+            // Add the SV if greater than the minimum SV size
+            if (op_len >= this->min_sv_size) {
+
+                // Get the sequence of the insertion from the query
+                std::string ins_seq_str = "";
+                uint8_t* seq_ptr = bam_get_seq(alignment);
+                for (int j = 0; j < op_len; j++) {
+                    ins_seq_str += seq_nt16_str[bam_seqi(seq_ptr, query_pos + j)];
+                }
+
+                // if (pos == 47026017 && op_len == 140) {
+                //     std::cout << "Insertion sequence: " << ins_seq_str << std::endl;
+
+                //     // Check if the TCGCCTG substring is present in the
+                //     // insertion
+                //     std::string substring = "TCGCCTG";
+                //     if (ins_seq_str.find(substring) != std::string::npos) {
+                //         std::cout << "Found substring " << substring << std::endl;
+                //     } else {
+                //         std::cout << "Did not find substring " << substring << std::endl;
+                //     }
+                // }
+
+                // Add the insertion to the SV calls
+                sv_calls.add(chr, pos, pos + op_len, 3, ins_seq_str, "CIGARINS");
+            }
+
+        // Check if the CIGAR operation is a deletion
+        } else if (op == BAM_CDEL) {
+
+            // Add the SV if greater than the minimum SV size
+            if (op_len >= this->min_sv_size) {
+                
+                // Add the deletion to the SV calls
+                sv_calls.add(chr, pos, pos + op_len, 0, ".", "CIGARDEL");
+            }
+        }
+
+        // Update the reference coordinate based on the CIGAR operation (M, D,
+        // N, =, X)
+        // https://samtools.github.io/hts-specs/SAMv1.pdf
+        if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) {
+            pos += op_len;
+        } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP || op == BAM_CPAD) {
+            // Do nothing
+        } else {
+            std::cerr << "ERROR: Unknown CIGAR operation " << op << std::endl;
+            exit(1);
+        }
+
+        // Update the query position based on the CIGAR operation (M, I, S, H)
+        if (op == BAM_CMATCH || op == BAM_CINS || op == BAM_CSOFT_CLIP || op == BAM_CEQUAL || op == BAM_CDIFF) {
+            query_pos += op_len;
+        } else if (op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CHARD_CLIP || op == BAM_CPAD) {
+            // Do nothing
+        } else {
+            std::cerr << "ERROR: Unknown CIGAR operation " << op << std::endl;
+            exit(1);
+        }
+    }
+}
+
+// Detect SVs from split read alignments (primary and supplementary) and
+// directly from the CIGAR string
+SVData SVCaller::detectSVsFromSplitReads()
+{
+    bool debug_cigar = false;
+
+    // Open the BAM file
+    samFile *fp_in = sam_open(this->input_data->getBAMFilepath().c_str(), "r");
+    if (fp_in == NULL) {
+        std::cerr << "ERROR: failed to open " << this->input_data->getBAMFilepath() << std::endl;
+        exit(1);
+    }
+
+    // Get the header
+    bam_hdr_t *bamHdr = sam_hdr_read(fp_in);
+    if (bamHdr == NULL) {
+        std::cerr << "ERROR: failed to read header for " << this->input_data->getBAMFilepath() << std::endl;
+        exit(1);
+    }
+
+    // Get the region
+    std::string region = this->input_data->getRegion();
+
+    // Get the index
+    hts_idx_t *idx = sam_index_load(fp_in, this->input_data->getBAMFilepath().c_str());
+    if (idx == NULL) {
+        std::cerr << "ERROR: failed to load index for " << this->input_data->getBAMFilepath() << std::endl;
+        exit(1);
+    }
+
+    // Set up the iterator
+    hts_itr_t *itr = sam_itr_querys(idx, bamHdr, region.c_str());
+
+    // Set up the read
+    bam1_t *bam1 = bam_init1();
+
+    // Create a map of primary and supplementary alignments by QNAME (query template name)
+    int num_alignments = 0;
+    //int num_sv_calls = 0;
+    FASTAQuery ref_genome = this->input_data->getRefGenome();
+    SVData sv_calls(ref_genome);
+    QueryMap primary_alignments;  // TODO: Add depth to primary alignments
+    QueryMap supplementary_alignments;
+    std::cout << "Reading alignments..." << std::endl;
+    while (sam_itr_next(fp_in, itr, bam1) >= 0) {
+
+        // Get the QNAME (query template name) for associating split reads
+        std::string qname = bam_get_qname(bam1);
+
+        // Skip secondary and unmapped alignments
+        if (bam1->core.flag & BAM_FSECONDARY || bam1->core.flag & BAM_FUNMAP) {
+            // Do nothing
+
+        // Skip alignments with low mapping quality
+        } else if (bam1->core.qual < this->min_mapq) {
+            // Do nothing
+            // std::cout << "Skipping alignment with low mapping quality" << std::endl;
+            // std::cout << bam1->core.qual << " < " << this->min_mapq << std::endl;
+        } else {
+
+            // Process primary alignments
+            if (!(bam1->core.flag & BAM_FSUPPLEMENTARY)) {
+
+                // Add the primary alignment to the map
+                std::string chr = bamHdr->target_name[bam1->core.tid];
+                int64_t start = bam1->core.pos;
+                int64_t end = bam_endpos(bam1);
+                int depth = 0;  // Placeholder for now
+                AlignmentData alignment(chr, start, end, depth);
+                primary_alignments[qname].push_back(alignment);
+
+                // Print the entire CIGAR string
+                if (debug_cigar) {
+                    std::cout << "Full CIGAR string: " << std::endl;
+                    for (uint32_t i = 0; i < bam1->core.n_cigar; i++) {
+                        std::cout << bam1->core.n_cigar << bam_cigar_opchr(bam1->core.n_cigar) << bam_cigar_oplen(bam1->core.n_cigar) << " ";
+                    }
+                    std::cout << std::endl;
+                }
+            
+                // Finally, call SVs directly from the CIGAR string
+                if (!this->input_data->getDisableCIGAR()) {
+
+                    //std::cout << "Calling SVs from CIGAR string" << std::endl;
+                    this->detectSVsFromCIGAR(bamHdr, bam1, sv_calls);
+                }
+
+            // Process supplementary alignments
+            } else if (bam1->core.flag & BAM_FSUPPLEMENTARY) {
+
+                //std::cout << "Found supplementary alignment" << std::endl;
+
+                // Add the supplementary alignment to the map
+                std::string chr = bamHdr->target_name[bam1->core.tid];
+                int32_t start = bam1->core.pos;
+                int32_t end = bam_endpos(bam1);
+                int32_t depth = bam1->core.n_cigar;
+                AlignmentData alignment(chr, start, end, depth);
+
+                // Add the supplementary alignment to the map
+                supplementary_alignments[qname].push_back(alignment);
+            }
+        }
+
+        // Increment the number of alignment records processed
+        num_alignments++;
+
+        // Print the number of alignments processed every 10 thousand
+        if (num_alignments % 10000 == 0) {
+            std::cout << num_alignments << " alignments processed" << std::endl;
+        }
+    }
+    
+    // Print the number of alignments processed
+    std::cout << num_alignments << " alignments processed" << std::endl;
+
+    // Loop through the map of primary alignments by QNAME and find gaps and
+    // overlaps from supplementary alignments
+    std::cout << "Running split read analysis..." << std::endl;
+    for (const auto& entry : primary_alignments) {
+
+        // Get the QNAME
+        std::string qname = entry.first;
+
+        // Get the first primary alignment
+        AlignmentData primary_alignment = entry.second[0];
+
+        // Get the primary alignment chromosome
+        std::string primary_chr = std::get<0>(primary_alignment);
+
+        // Get the start and end positions of the primary alignment
+        int32_t primary_start = std::get<1>(primary_alignment);
+        int32_t primary_end = std::get<2>(primary_alignment);
+
+        // Loop through the supplementary alignments and find gaps and overlaps
+        AlignmentVector supp_alignments = supplementary_alignments[qname];
+        for (const auto& supp_alignment : supp_alignments) {
+
+            // Get the supplementary alignment chromosome
+            std::string supp_chr = std::get<0>(supp_alignment);
+
+            // Skip supplementary alignments that are on a different chromosome
+            // for now (TODO: Use for translocations)
+            if (primary_chr != supp_chr) {
+                std::cout << "Supplementary alignment on different chromosome" << std::endl;
+                continue;
+            }
+
+            // Get the start and end positions of the supplementary alignment
+            int32_t supp_start = std::get<1>(supp_alignment);
+            int32_t supp_end = std::get<2>(supp_alignment);
+
+
+            // Determine the type of gap between alignments
+            if (supp_start < primary_start && supp_end < primary_start) {
+                // Gap with supplementary before primary:
+                // [supp_start] [supp_end] -- [primary_start] [primary_end]
+
+                // Use the gap ends as the SV endpoints (Deletion)
+                if (primary_start - supp_end >= this->min_sv_size) {
+                    sv_calls.add(supp_chr, supp_end, primary_start, -1, ".", "GAPINNER_1");
+                }
+
+                // Use the alignment ends as the SV endpoints (Insertion)
+                if (primary_end - supp_start >= this->min_sv_size) {
+                    sv_calls.add(supp_chr, supp_start, primary_end, -1, ".", "GAPOUTER_1");
+                }
+
+                //std::cout << "FWD GAP at " << supp_chr << ":" << gap_start << "-" << gap_end << std::endl;
+                
+            } else if (supp_start > primary_end && supp_end > primary_end) {
+                // Gap with supplementary after primary:
+                // [primary_start] [primary_end] -- [supp_start] [supp_end]
+
+                // Use the gap ends as the SV endpoints (Deletion)
+                if (supp_start - primary_end >= this->min_sv_size) {
+                    sv_calls.add(supp_chr, primary_end, supp_start, -1, ".", "GAPINNER_2");
+                }
+
+                // Use the alignment ends as the SV endpoints (Insertion)
+                if (supp_end - primary_start >= this->min_sv_size) {
+                    sv_calls.add(supp_chr, primary_start, supp_end, -1, ".", "GAPOUTER_2");
+                }
+
+                //std::cout << "REV GAP at " << supp_chr << ":" << gap_start << "-" << gap_end << std::endl;
+
+            } else {
+                // Overlap between alignments:
+                // [supp_start] [primary_start] [supp_end] [primary_end]
+                // [primary_start] [supp_start] [primary_end] [supp_end]
+
+                // Use the overlap ends as the SV endpoints (Deletion)
+                if (supp_end - primary_start >= this->min_sv_size) {
+                    sv_calls.add(supp_chr, primary_start, supp_end, -1, ".", "OVERLAP_1");
+                } else if (primary_end - supp_start > this->min_sv_size) {
+                    sv_calls.add(supp_chr, supp_start, primary_end, -1, ".", "OVERLAP_2");
+                }
+
+                // Use the alignment ends as the SV endpoints (Insertion)
+                if (primary_end - supp_start >= this->min_sv_size) {
+                    sv_calls.add(supp_chr, supp_start, primary_end, -1, ".", "OVERLAP_3");
+                } else if (supp_end - primary_start > this->min_sv_size) {
+                    sv_calls.add(supp_chr, primary_start, supp_end, -1, ".", "OVERLAP_4");
+                }
+
+                //std::cout << "OVERLAP at " << supp_chr << ":" << gap_start << "-" << gap_end << std::endl;
+            }
+        }
+    }
+
+    // Destroy objects and close files
+    bam_destroy1(bam1);  // Destroy the read 
+    hts_itr_destroy(itr);  // Destroy the iterator
+    sam_close(fp_in);  // Close the BAM file
+    bam_hdr_destroy(bamHdr);  // Destroy the header
+    hts_idx_destroy(idx);  // Destroy the index
+
+    // Print the number of SV calls
+    std::cout << sv_calls.size() << " SV calls" << std::endl;
+
+    // Return the SV calls
+    return sv_calls;
+}
diff --git a/src/sv_data.cpp b/src/sv_data.cpp
new file mode 100644
index 00000000..a79d82c7
--- /dev/null
+++ b/src/sv_data.cpp
@@ -0,0 +1,194 @@
+#include "sv_data.h"
+#include "vcf_writer.h"
+
+/// @cond
+#include <iostream>
+#include <fstream>
+/// @endcond
+
+void SVData::add(std::string chr, int64_t start, int64_t end, int sv_type, std::string alt_allele, std::string data_type)
+{
+    // Add the SV call to the map of candidate locations
+    SVCandidate candidate(chr, start, end, alt_allele);
+    if (this->sv_calls.find(candidate) != this->sv_calls.end()) {
+
+        // Update the read depth
+        SVInfo& sv_info = this->sv_calls[candidate];
+        sv_info.read_depth += 1;
+
+        // Update the SV type if it is unknown
+        if (sv_info.sv_type == -1) {
+            sv_info.sv_type = sv_type;
+        }
+
+        // Update the alignment type used to call the SV
+        sv_info.data_type.insert(data_type);
+
+    } else {
+        // Determine the SV length
+        int sv_length = end - start;
+
+        // Create a new SVInfo object
+        SVInfo sv_info(sv_type, 1, data_type, sv_length);
+
+        // Add the SV candidate to the map
+        this->sv_calls[candidate] = sv_info;
+    }
+}
+
+SVData::SVData(FASTAQuery &ref_genome)
+{
+    // Set the reference genome
+    this->ref_genome = &ref_genome;
+}
+
+std::string SVData::getRefGenome()
+{
+    return this->ref_genome->getFilepath();
+}
+
+std::string SVData::getSequence(std::string chr, int64_t pos_start, int64_t pos_end)
+{
+    // Query the reference genome
+    return this->ref_genome->query(chr, pos_start, pos_end);
+}
+
+void SVData::updateSVType(SVCandidate candidate, int sv_type)
+{
+    // Update the SV type for a given SV candidate
+    if (this->sv_calls.find(candidate) != this->sv_calls.end()) {
+        // Update the SV type
+        SVInfo& sv_info = this->sv_calls[candidate];
+        sv_info.sv_type = sv_type;
+    } else {
+        std::cerr << "Error: Unable to update SV type for SV candidate (" << std::get<0>(candidate) << ", " << std::get<1>(candidate) << ", " << std::get<2>(candidate) << ", " << std::get<3>(candidate) << ")" << std::endl;
+    }
+}
+
+void SVData::saveToVCF(FASTAQuery& ref_genome, std::string output_dir)
+{
+    // Create a VCF writer
+    std::string output_vcf = output_dir + "/sv_calls.vcf";
+    VcfWriter vcf_writer(output_vcf);
+
+    // Set the sample name
+    std::string sample_name = "SAMPLE";
+
+    // Set the header lines
+    std::vector<std::string> header_lines = {
+        std::string("##reference=") + ref_genome.getFilepath(),
+        ref_genome.getContigHeader(),
+        "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">",
+        "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">",
+        "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Difference in length between REF and ALT alleles\">",
+        "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total read depth at the locus\">",
+        "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Method used to call the structural variant\">",
+        "##INFO=<ID=ALN,Number=1,Type=String,Description=\"Alignment type used to call the structural variant\">",
+        "##INFO=<ID=REPTYPE,Number=1,Type=String,Description=\"Repeat type of the structural variant\">",
+        "##FILTER=<ID=PASS,Description=\"All filters passed\">",
+        "##FILTER=<ID=LowQual,Description=\"Low quality\">",
+        "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
+        "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Read depth\">"
+    };
+
+    // Write the header lines
+    vcf_writer.writeHeader(header_lines);
+
+    // Iterate over the SV calls
+    std::cout << "Saving SV calls to " << output_vcf << "..." << std::endl;
+    std::string sv_method = "CONTEXTSVv0.1";
+    for (auto const& sv_call : this->sv_calls) {
+
+        // Get the SV candidate and SV info
+        SVCandidate candidate = sv_call.first;
+        SVInfo info = sv_call.second;
+        int sv_type = info.sv_type;
+        int depth = info.read_depth;
+        int sv_length = info.sv_length;
+        std::set<std::string> data_type = info.data_type;
+
+        // Convert the data type set to a string
+        std::string data_type_str = "";
+        for (auto const& type : data_type) {
+            data_type_str += type + ",";
+        }
+
+        // Get the CHROM, POS, END, and ALT
+        std::string chr = std::get<0>(candidate);
+        int pos = std::get<1>(candidate);
+        int end = std::get<2>(candidate);
+
+        // If the SV type is unknown, skip it
+        if (sv_type == -1) {
+            continue;
+        }
+    
+        // Process by SV type
+        std::string ref_allele = ".";
+        std::string alt_allele = ".";
+        std::string repeat_type = "NA";
+
+        // Deletion
+        if (sv_type == 0) {
+            // Get the reference allele from the reference genome as well as the
+            // previous base preceding the SV
+            int preceding_pos = std::max(1, pos-1);  // Make sure the position is not negative
+            ref_allele = ref_genome.query(chr, preceding_pos, end);
+
+            // Use the previous base as the alternate allele
+            alt_allele = ref_allele.substr(0, 1);
+
+            // Make the SV length negative
+            sv_length = -1 * sv_length;
+
+            repeat_type = "CONTRAC";  // Deletion
+        
+        // Duplications and insertions
+        } else if (sv_type == 1 || sv_type == 3) {
+            // Use the preceding base as the reference allele
+            int preceding_pos = std::max(1, pos-1);  // Make sure the position is not negative
+            ref_allele = ref_genome.query(chr, preceding_pos, preceding_pos+1);
+
+            // Use the insertion sequence as the alternate allele
+            alt_allele = std::get<3>(candidate);
+
+            // Insert the reference base into the alternate allele
+            alt_allele.insert(0, ref_allele);
+
+            // Update the end position to the start position to change from
+            // query to reference coordinates
+            end = pos;
+
+            if (sv_type == 1) {
+                repeat_type = "DUP";  // Duplication
+            }
+        }
+        
+        // Get the SV type string
+        std::string sv_type_str = this->sv_type_map[sv_type];
+
+        // For now, set the QUAL and FILTER as unknown
+
+        // Set the genotype as unspecified for now (Haven't distinguished b/w homozygous, heterozygous)
+        std::string genotype = "./.";
+
+        // Create the INFO string
+        std::string info_str = "END=" + std::to_string(end) + ";SVTYPE=" + sv_type_str + ";SVLEN=" + std::to_string(sv_length) + ";DP=" + std::to_string(depth) + ";SVMETHOD=" + sv_method + ";ALN=" + data_type_str + ";REPTYPE=" + repeat_type;
+
+        // Create the FORMAT string
+        std::string format_str = "GT:DP";
+
+        // Create the sample string
+        std::string sample_str = genotype + ":" + std::to_string(depth);
+        std::vector<std::string> samples = {sample_str};
+
+        // Write the SV call to the file
+        vcf_writer.writeRecord(chr, pos, ".", ref_allele, alt_allele, ".", ".", info_str, format_str, samples);
+        //output_stream << chr << "\t" << pos << "\t" << "." << "\t" << ref_allele << "\t" << alt_allele << "\t" << "." << "\t" << "." << "\t" << info_str << "\t" << format_str << "\t" << sample_str << std::endl;
+    }
+
+    // Close the output stream
+    vcf_writer.close();
+    // output_stream.close();
+    std::cout << "Saved SV calls to " << output_vcf << std::endl;
+}
diff --git a/src/swig_interface.cpp b/src/swig_interface.cpp
new file mode 100644
index 00000000..87334fec
--- /dev/null
+++ b/src/swig_interface.cpp
@@ -0,0 +1,31 @@
+//
+// Created by jperdomo on 1/8/2023.
+//
+
+#include "swig_interface.h"
+#include "contextsv.h"
+
+/// @cond
+#include <iostream>
+/// @endcond
+
+
+// Run the CLI with the given parameters
+int run(InputData input_data)
+{
+
+	// Run ContextSV
+	ContextSV contextsv(input_data);
+	try
+	{	
+		contextsv.run();
+	}
+
+	catch (std::exception& e)
+	{
+		std::cerr << e.what() << std::endl;
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/src/swig_wrapper.i b/src/swig_wrapper.i
index f42ae4ed..d398087c 100644
--- a/src/swig_wrapper.i
+++ b/src/swig_wrapper.i
@@ -6,7 +6,8 @@ SWIG wrapper for C++ code.
 
 // Include header
 %{
-#include "cli.h"
+#include "swig_interface.h"
+#include "input_data.h"
 %}
 
 // Set up types
@@ -21,5 +22,9 @@ SWIG wrapper for C++ code.
     }
 }
 
+// Expose the InputData class
+%include "input_data.h"
+
 // Include functions
-int run(const std::string& bam, const std::string& snps, const std::string& outdir, const std::string& region);
+int run(InputData input_data);
+
diff --git a/src/utils.cpp b/src/utils.cpp
index e69de29b..a307a663 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -0,0 +1,36 @@
+#include "utils.h"
+
+/// @cond
+#include <stdio.h>
+#include <string.h>
+#include <iostream>
+/// @endcond
+
+// Print a progress bar
+void printProgress(int progress, int total)
+{
+    // Get the percentage
+    float percent = (float)progress / (float)total * 100.0;
+
+    // Get the number of hashes
+    int num_hashes = (int)(percent / 2.0);
+
+    // Print the progress bar
+    printf("\r[");
+    for (int i = 0; i < num_hashes; i++)
+    {
+        printf("#");
+    }
+    for (int i = 0; i < 50 - num_hashes; i++)
+    {
+        printf(" ");
+    }
+    printf("] %3.2f%%", percent);
+    fflush(stdout);
+
+    // Print a new line if finished
+    if (progress == total)
+    {
+        printf("\n");
+    }
+}
diff --git a/src/vcf_writer.cpp b/src/vcf_writer.cpp
new file mode 100644
index 00000000..013816f2
--- /dev/null
+++ b/src/vcf_writer.cpp
@@ -0,0 +1,73 @@
+#include "vcf_writer.h"
+
+/// @cond
+#include <iostream>
+#include <fstream>
+/// @endcond
+
+VcfWriter::VcfWriter(const std::string &filename)
+{
+    // Remove the file if it already exists
+    std::cout << "Removing previous VCF..." << std::endl;
+    std::remove(filename.c_str());
+
+    // Open the VCF file
+    std::cout << "Opening VCF file..." << std::endl;
+    this->file_stream.open(filename);
+    if (!this->file_stream.is_open()) {
+        std::cerr << "Error: Unable to open " << filename << std::endl;
+        exit(1);
+    } else {
+        std::cout << "Done." << std::endl;
+    }
+}
+
+void VcfWriter::writeHeader(const std::vector<std::string> &headerLines)
+{
+    std::cout << "Writing VCF header..." << std::endl;
+
+    // Add the file format
+    std::string file_format = "##fileformat=VCFv4.2";
+    this->file_stream << file_format << std::endl;
+
+    // Add date and time
+    time_t rawtime;
+    struct tm * timeinfo;
+    char buffer[80];
+    time (&rawtime);
+    timeinfo = localtime(&rawtime);
+    strftime(buffer, sizeof(buffer), "%Y%m%d", timeinfo);
+    file_stream << "##fileDate=" << buffer << std::endl;
+
+    // Add source
+    std::string source = "##source=ContexSV";
+    this->file_stream << source << std::endl;
+
+    // Loop over the header metadata lines
+    for (auto &line : headerLines) {
+        this->file_stream << line << std::endl;
+    }
+
+    // Add the header line
+    std::string header_line = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE";
+    this->file_stream << header_line << std::endl;
+
+    // Flush the stream to ensure that the header is written
+    this->file_stream.flush();
+
+    std::cout << "Done." << std::endl;
+}
+
+void VcfWriter::writeRecord(const std::string &chrom, int pos, const std::string &id, const std::string &ref, const std::string &alt, const std::string &qual, const std::string &filter, const std::string &info, const std::string &format, const std::vector<std::string> &samples)
+{
+    // Write a record to the VCF file
+    this->file_stream << chrom << "\t" << pos << "\t" << id << "\t" << ref << "\t" << alt << "\t" << qual << "\t" << filter << "\t" << info << "\t" << format << "\t" << samples[0] << std::endl;
+}
+
+void VcfWriter::close()
+{
+    // Close the VCF file
+    std::cout << "Closing VCF file..." << std::endl;
+    this->file_stream.close();
+    std::cout << "Done." << std::endl;
+}
diff --git a/tests/data/ref.fa b/tests/data/ref.fa
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_general.py b/tests/test_general.py
index f71cfa29..a268df3b 100644
--- a/tests/test_general.py
+++ b/tests/test_general.py
@@ -16,24 +16,33 @@
 
 # Get the path to the test data files.
 TEST_BAM_FILE = os.path.join(TEST_DATA_DIR, 'test.bam')
+TEST_REF_FILE = ""  # Empty string for now.
 TEST_SNPS_FILE = os.path.join(TEST_DATA_DIR, 'snps.vcf.gz')
+TEST_PFB_FILE = ""
 
 # Set the output directory.
 TEST_OUTDIR = os.path.join(os.path.dirname(__file__), 'output')
 
-
+# Test the main function.
 def test_run():
-    """Test the run function."""
-    # Run the program.
-    contextsv.run(
-        TEST_BAM_FILE,
-        TEST_SNPS_FILE,
-        TEST_OUTDIR,
-        "chr3:60380533-60390533"
-    )
+    
+    # Set input parameters.
+    input_data = contextsv.InputData()
+    input_data.setBAMFilepath(TEST_BAM_FILE)
+    input_data.setRefGenome(TEST_REF_FILE)
+    input_data.setSNPFilepath(TEST_SNPS_FILE)
+    input_data.setRegion("chr3:60380533-60390533")
+    input_data.setThreadCount(1)
+    input_data.setChrCov("chr3:39.561,chr6:39.4096")
+    input_data.setPFBFilepath("")
+    input_data.setHMMFilepath("")
+    input_data.setOutputDir(TEST_OUTDIR)
+
+    # Run the analysis.
+    contextsv.run(input_data)
 
     # Check that the output file exists.
-    output_file = os.path.join(TEST_OUTDIR, 'snp_lrr_baf.csv')
+    output_file = os.path.join(TEST_OUTDIR, 'cnv_data.tsv')
     assert os.path.exists(output_file)
 
     # Check that the output file is not empty.
@@ -45,9 +54,14 @@ def test_run():
 
     # Check that the output file has the correct header.
     with open(output_file, 'r') as f:
-        assert f.readline().strip() == "position,baf,log2_ratio,cnv_state"
+        assert f.readline().strip() == "chromosome\tposition\tb_allele_freq\tlog2_ratio\tcnv_state\tpopulation_freq"
 
-    # Check that the output file has the correct last line.
+    # Check that the output file has the correct SNP values (excluding predicted
+    # state) in the last line
     with open(output_file, 'r') as f:
-        last_line = f.readlines()[-1].strip()
-        assert last_line == "60389325,0.590909,-0.048852,1"
+        last_line = f.readlines()[-1].strip('\n')
+        print(last_line)
+        actual_line="chr3\t60389325\t0.590909\t-0.048852\t6\t0.01"
+        print(actual_line)
+        assert last_line == actual_line
+