diff --git a/.gitmodules b/.gitmodules
index 017fa01..7a8a2a7 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -6,3 +6,6 @@
 	path = cld2
 	url = https://github.com/bitextor/cld2.git
     ignore = dirty
+[submodule "fasttext"]
+	path = fasttext
+	url = https://github.com/kpuatfb/fastText.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9e90b00..f3c3ed5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,9 +2,12 @@ cmake_minimum_required(VERSION 2.8.3)
 
 project(warc2text)
 
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
 
 
+set(CMAKE_CXX_FLAGS_DEBUG "-g")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3")
 set(CMAKE_CXX_FLAGS "-Wall -Wextra -DBOOST_LOG_DYN_LINK ${CMAKE_CXX_FLAGS}")
 if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I /usr/local/opt/icu4c/include")
@@ -41,10 +44,12 @@ if (NOT SKIP_PREPROCESS_BUILD)
 endif()
 
 target_include_directories(warc2text_lib PUBLIC ${PREPROCESS_PATH})
+target_include_directories(warc2text_lib PRIVATE fasttext/src)
 #
 
 # add libcld2.so
 add_subdirectory(cld2)
+add_subdirectory(fasttext EXCLUDE_FROM_ALL)
 #
 
 # define executables
@@ -53,6 +58,7 @@ target_link_libraries(warc2text
     warc2text_lib
     ${Boost_LIBRARIES}
     cld2_full
+    fasttext-static
 )
 
 include(GNUInstallDirs)
diff --git a/README.md b/README.md
index 2aad126..93c2efe 100644
--- a/README.md
+++ b/README.md
@@ -28,11 +28,15 @@ mkdir build
 cd build
 cmake -DCMAKE_INSTALL_PREFIX=/your/prefix/path ..
 # cmake .. -DCMAKE_BUILD_TYPE=Debug # for debug
+# cmake .. -DICU_ROOT_DIR=(brew --prefix icu4c)/lib # for macOS
 make -j
 make install
 ```
 
 ## Usage
+
+**note:** for warcs with many languages you might hit the open file limit quite quickly. It is therefore advised to increase it, e.g. `ulimit -n 8192`.
+
 ```
 warc2text -o <output_folder> [ -f <output_files> ] [ --pdfpass <output_warc> ]
           [ --paragraph-identification ] [ --tag-filters <filters_file> ] <warc_file>...
@@ -41,6 +45,8 @@ warc2text -o <output_folder> [ -f <output_files> ] [ --pdfpass <output_warc> ]
 * `--files`/`-f` list of output files separated by commas (and without `.gz`); `text` and `url` are always written, while `mime` and `html` are optional
 * `--pdfpass` WARC file where PDF records will be stored
 * `--paragraph-identification` print the paragraph identifier for each sentence extracted from the HTML
+* `--classifier` classifier to use: `cld2` or `fasttext`.
+* `--fasttext-model` path to FastText model for fasttext classifier.
 * `--tag-filters` file containing filters that are used to eliminate matching documents
 * `--invert-tag-filters` output only documents that match the filter
 * `--url-filters` file containing regular expressions that match urls of documents to eliminate
diff --git a/fasttext b/fasttext
new file mode 160000
index 0000000..ffee8e4
--- /dev/null
+++ b/fasttext
@@ -0,0 +1 @@
+Subproject commit ffee8e4d72a4d2ecd859575007877d12acbee5b3
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index cfdd65b..6db72e0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -26,6 +26,8 @@ add_library(warc2text_lib
     record.cc
     html.cc
     lang.cc
+    lang_cld2.cc
+    lang_fasttext.cc
     util.cc
     bilangwriter.cc
     xh_scanner.cc
diff --git a/src/bilangwriter.cc b/src/bilangwriter.cc
index a6f1936..e1b71cb 100644
--- a/src/bilangwriter.cc
+++ b/src/bilangwriter.cc
@@ -1,5 +1,6 @@
 #include "bilangwriter.hh"
 #include "util.hh"
+#include "util/exception.hh"
 #include <cassert>
 #include <string>
 
@@ -50,6 +51,7 @@ namespace warc2text{
 
     void GzipWriter::open(const std::string& filename) {
         dest = std::fopen(filename.c_str(), "wb");
+        UTIL_THROW_IF(!dest, util::ErrnoException, "while creating " << filename);
     }
 
     void GzipWriter::write(const char* text, std::size_t size) {
@@ -108,39 +110,22 @@ namespace warc2text{
         return result;
     }
 
-    void BilangWriter::write(const Record& record, bool multilang, bool paragraph_identification) {
+    void BilangWriter::write(const Record& record, bool paragraph_identification) {
         std::string base64text;
         std::string base64html;
 
-        if (multilang) {
+        if (output_files.count("html") == 1)
+            util::encodeBase64(record.getPayload(), base64html);
 
-            if (output_files.count("html") == 1)
-                util::encodeBase64(record.getPayload(), base64html);
-
-            for (const auto& it : record.getTextByLangs()) {
-                std::string payload = it.second;
-
-                if (paragraph_identification) {
-                    payload = get_paragraph_id(payload);
-                }
-
-                util::encodeBase64(payload, base64text);
-                this->write(it.first, base64text, record.getURL(), record.getHTTPcontentType(), base64html);
-            }
-
-        } else {
-            std::string payload = record.getPlainText();
+        for (const auto& it : record.getTextByLangs()) {
+            std::string payload = it.second;
 
             if (paragraph_identification) {
                 payload = get_paragraph_id(payload);
             }
 
             util::encodeBase64(payload, base64text);
-
-            if (output_files.count("html") == 1)
-                util::encodeBase64(record.getPayload(), base64html);
-
-            this->write(record.getLanguage(), base64text, record.getURL(), record.getHTTPcontentType(), base64html);
+            this->write(it.first, base64text, record.getURL(), record.getHTTPcontentType(), base64html);
         }
     }
 
diff --git a/src/bilangwriter.hh b/src/bilangwriter.hh
index 3b520f6..1b7f83e 100644
--- a/src/bilangwriter.hh
+++ b/src/bilangwriter.hh
@@ -57,7 +57,7 @@ namespace warc2text {
                 output_files(output_files)
             {};
 
-            void write(const Record& record, bool multilang = false, bool paragraph_identification = false);
+            void write(const Record& record, bool paragraph_identification = false);
 
     };
 
diff --git a/src/lang.cc b/src/lang.cc
index ff57913..19e52f0 100644
--- a/src/lang.cc
+++ b/src/lang.cc
@@ -1,70 +1,7 @@
-#include "src/lang.hh"
+#include "lang.hh"
 
 namespace warc2text {
-    // hint = {content language code(s), tld, original encoding, CLD2::Language}
-    const CLD2::CLDHints NO_HINT = {nullptr, nullptr, CLD2::UNKNOWN_ENCODING, CLD2::UNKNOWN_LANGUAGE};
+	
+const std::string LanguageDetector::kUnknownLanguageLabel = "unk";
 
-    bool detectLanguage(const std::string& text, std::unordered_map<std::string, std::string>& text_by_lang){
-        CLD2::Language langs[3] = {CLD2::UNKNOWN_LANGUAGE, CLD2::UNKNOWN_LANGUAGE, CLD2::UNKNOWN_LANGUAGE};
-        int percents[3] = {0,0,0};
-        double scores[3] = {0.0, 0.0, 0.0};
-
-        bool reliable = false;
-        int text_bytes;
-        int valid_prefix_bytes;
-
-        CLD2::ResultChunkVector chunks;
-
-        CLD2::ExtDetectLanguageSummaryCheckUTF8(text.data(), text.size(), true, &NO_HINT, 0, &langs[0], &percents[0], &scores[0], &chunks, &text_bytes, &reliable, &valid_prefix_bytes);
-
-        text_by_lang.clear();
-
-        if (not reliable) return reliable;
-
-        std::string* top1 = nullptr;
-        std::string* top2 = nullptr;
-        std::string* top3 = nullptr;
-
-        if (langs[0] != CLD2::UNKNOWN_LANGUAGE and percents[0] > 0) {
-            top1 = &text_by_lang[CLD2::LanguageCode(langs[0])];
-            top1->reserve(text.size() * (percents[0] + 1));
-        }
-
-        if (langs[1] != CLD2::UNKNOWN_LANGUAGE and percents[1] > 0) {
-            top2 = &text_by_lang[CLD2::LanguageCode(langs[1])];
-            top2->reserve(text.size() * (percents[1] + 1));
-        }
-
-        if (langs[2] != CLD2::UNKNOWN_LANGUAGE and percents[2] > 0) {
-            top3 = &text_by_lang[CLD2::LanguageCode(langs[2])];
-            top3->reserve(text.size() * (percents[2] + 1));
-        }
-
-        for (const CLD2::ResultChunk& chunk : chunks) {
-            std::string* ref = static_cast<CLD2::Language>(chunk.lang1) == langs[0] ? top1 :
-                        static_cast<CLD2::Language>(chunk.lang1) == langs[1] ? top2 :
-                        static_cast<CLD2::Language>(chunk.lang1) == langs[2] ? top3 : nullptr;
-            if (ref == nullptr) continue;
-            ref->append(text, chunk.offset, chunk.bytes);
-        }
-
-        // remove empty texts from text_by_lang
-        // apparently it is possible that the reported percentage is > 0, but the language does not appear in chunks
-        for (auto it = text_by_lang.cbegin(); it != text_by_lang.cend(); ){
-            if (it->second.size() == 0) text_by_lang.erase(it++);
-            else ++it;
-        }
-
-        // TODO: do something with the scores?
-
-        return reliable;
-    }
-
-    bool detectLanguage(const std::string& text, std::string& lang){
-        bool reliable = false;
-        int valid_prefix_bytes = 0;
-        CLD2::Language l = CLD2::DetectLanguageCheckUTF8(text.data(), text.size(), true, &reliable, &valid_prefix_bytes);
-        lang = CLD2::LanguageCode(l);
-        return reliable;
-    }
 } // namespace warc2text
diff --git a/src/lang.hh b/src/lang.hh
index a00a4bb..3552892 100644
--- a/src/lang.hh
+++ b/src/lang.hh
@@ -1,18 +1,49 @@
 #ifndef WARC2TEXT_LANG_HH
 #define WARC2TEXT_LANG_HH
 
+#include <memory>
 #include <string>
 #include <unordered_map>
-#include <utility>
-#include "cld2/public/compact_lang_det.h"
-#include "cld2/public/encodings.h"
+
+namespace fasttext {
+class FastText;
+} // namespace fasttext
 
 namespace warc2text {
-    // detect language of plain text, return top 3 languages
-    bool detectLanguage(const std::string& text, std::unordered_map<std::string, std::string>& chunks);
 
-    // detect top language of plain text
-    bool detectLanguage(const std::string& text, std::string& lang);
-}
+class LanguageDetector {
+  public:
+    virtual ~LanguageDetector() {};
+
+    // detect language of plain text, return top languages
+    virtual void detect(const std::string& text, std::unordered_map<std::string, std::string>& chunks) const = 0;
+
+    // Label used for text (chunks) that cannot reliably be identified
+    static const std::string kUnknownLanguageLabel;
+};
+
+class FastTextDetector : public LanguageDetector {
+  public:
+    explicit FastTextDetector(const std::string &filename);
+    virtual ~FastTextDetector();
+    virtual void detect(const std::string& text, std::unordered_map<std::string, std::string>& chunks) const;
+
+  private:
+    std::unique_ptr<fasttext::FastText> classifier_;
+};
+
+class CLD2Detector : public LanguageDetector {
+public:
+  virtual void detect(const std::string& text, std::unordered_map<std::string, std::string>& chunks) const;
+  virtual ~CLD2Detector();
+};
+
+class CLD2MultiLangDetector : public LanguageDetector {
+public:
+  virtual void detect(const std::string& text, std::unordered_map<std::string, std::string>& chunks) const;
+  virtual ~CLD2MultiLangDetector();
+};
+
+} // namespace warc2text
 
 #endif
diff --git a/src/lang_cld2.cc b/src/lang_cld2.cc
new file mode 100644
index 0000000..30e8cd9
--- /dev/null
+++ b/src/lang_cld2.cc
@@ -0,0 +1,76 @@
+#include "src/lang.hh"
+#include "cld2/public/compact_lang_det.h"
+#include "cld2/public/encodings.h"
+
+namespace warc2text {
+    // hint = {content language code(s), tld, original encoding, CLD2::Language}
+    const CLD2::CLDHints NO_HINT = {nullptr, nullptr, CLD2::UNKNOWN_ENCODING, CLD2::UNKNOWN_LANGUAGE};
+
+    CLD2Detector::~CLD2Detector() {}
+
+    void CLD2Detector::detect(const std::string& text, std::unordered_map<std::string, std::string>& text_by_lang) const {
+        bool reliable = false;
+        int valid_prefix_bytes = 0;
+        CLD2::Language l = CLD2::DetectLanguageCheckUTF8(text.data(), text.size(), true, &reliable, &valid_prefix_bytes);
+        text_by_lang[reliable ? CLD2::LanguageCode(l) : kUnknownLanguageLabel] = text;
+    }
+
+    CLD2MultiLangDetector::~CLD2MultiLangDetector() {}
+
+    void CLD2MultiLangDetector::detect(const std::string& text, std::unordered_map<std::string, std::string>& text_by_lang) const {
+        CLD2::Language langs[3] = {CLD2::UNKNOWN_LANGUAGE, CLD2::UNKNOWN_LANGUAGE, CLD2::UNKNOWN_LANGUAGE};
+        int percents[3] = {0,0,0};
+        double scores[3] = {0.0, 0.0, 0.0};
+
+        bool reliable = false;
+        int text_bytes;
+        int valid_prefix_bytes;
+
+        CLD2::ResultChunkVector chunks;
+
+        CLD2::ExtDetectLanguageSummaryCheckUTF8(text.data(), text.size(), true, &NO_HINT, 0, &langs[0], &percents[0], &scores[0], &chunks, &text_bytes, &reliable, &valid_prefix_bytes);
+
+        text_by_lang.clear();
+
+        if (not reliable) {
+            text_by_lang[kUnknownLanguageLabel] = text;
+            return;
+        }
+
+        std::string* top1 = nullptr;
+        std::string* top2 = nullptr;
+        std::string* top3 = nullptr;
+
+        if (langs[0] != CLD2::UNKNOWN_LANGUAGE and percents[0] > 0) {
+            top1 = &text_by_lang[CLD2::LanguageCode(langs[0])];
+            top1->reserve(text.size() * (percents[0] + 1));
+        }
+
+        if (langs[1] != CLD2::UNKNOWN_LANGUAGE and percents[1] > 0) {
+            top2 = &text_by_lang[CLD2::LanguageCode(langs[1])];
+            top2->reserve(text.size() * (percents[1] + 1));
+        }
+
+        if (langs[2] != CLD2::UNKNOWN_LANGUAGE and percents[2] > 0) {
+            top3 = &text_by_lang[CLD2::LanguageCode(langs[2])];
+            top3->reserve(text.size() * (percents[2] + 1));
+        }
+
+        for (const CLD2::ResultChunk& chunk : chunks) {
+            std::string* ref = static_cast<CLD2::Language>(chunk.lang1) == langs[0] ? top1 :
+                        static_cast<CLD2::Language>(chunk.lang1) == langs[1] ? top2 :
+                        static_cast<CLD2::Language>(chunk.lang1) == langs[2] ? top3 : nullptr;
+            if (ref == nullptr) continue;
+            ref->append(text, chunk.offset, chunk.bytes);
+        }
+
+        // remove empty texts from text_by_lang
+        // apparently it is possible that the reported percentage is > 0, but the language does not appear in chunks
+        for (auto it = text_by_lang.cbegin(); it != text_by_lang.cend(); ){
+            if (it->second.size() == 0) text_by_lang.erase(it++);
+            else ++it;
+        }
+
+        // TODO: do something with the scores?
+    }
+} // namespace warc2text
diff --git a/src/lang_fasttext.cc b/src/lang_fasttext.cc
new file mode 100644
index 0000000..0f6f955
--- /dev/null
+++ b/src/lang_fasttext.cc
@@ -0,0 +1,40 @@
+#include "src/lang.hh"
+
+#include "fasttext.h"
+#include "util/exception.hh"
+
+#include <cstdlib>
+#include <cstring>
+
+namespace warc2text {
+
+FastTextDetector::FastTextDetector(const std::string &filename)
+  : classifier_(new fasttext::FastText) {
+  classifier_->loadModel(filename);
+}
+
+FastTextDetector::~FastTextDetector() {}
+
+const char kLabelPrefix[] = "__label__";
+
+void FastTextDetector::detect(const std::string& text, std::unordered_map<std::string, std::string>& chunks) const {
+  const float kThreshold = 0.5f;
+  std::vector<int32_t> words, labels;
+  classifier_->getDictionary()->getStringNoNewline(text, words, labels);
+  fasttext::Predictions predictions;
+  classifier_->predict(1, words, predictions, kThreshold);
+  if (predictions.empty()) {
+    chunks[kUnknownLanguageLabel] = text;
+    return;
+  }
+
+  // Labels look like __label__eng
+  std::string label = classifier_->getDictionary()->getLabel(predictions[0].second);
+  UTIL_THROW_IF2(strncmp(label.c_str(), kLabelPrefix, sizeof(kLabelPrefix) - 1), "Was expecting text classifier labels to begin with " << kLabelPrefix << " but they look like " << label);
+  label.erase(0, sizeof(kLabelPrefix) - 1);
+
+  // For better or worse, we're currently doing everything as one chunk.
+  chunks[label] = text;
+}
+
+} // namespace warc2text
diff --git a/src/record.cc b/src/record.cc
index b398f61..57d2b75 100644
--- a/src/record.cc
+++ b/src/record.cc
@@ -239,10 +239,8 @@ namespace warc2text {
         return text_by_langs;
     }
 
-    int Record::detectLanguage(bool multilang){
-        if (not multilang) return warc2text::detectLanguage(plaintext, language);
-
-        warc2text::detectLanguage(plaintext, text_by_langs);
+    int Record::detectLanguage(LanguageDetector const &detector){
+        detector.detect(plaintext, text_by_langs);
         return text_by_langs.size();
     }
 
diff --git a/src/record.hh b/src/record.hh
index b66a740..13d9c7e 100644
--- a/src/record.hh
+++ b/src/record.hh
@@ -38,7 +38,7 @@ namespace warc2text {
 
         int cleanPayload();
         int cleanPayload(const util::umap_tag_filters_regex& tagFilters);
-        int detectLanguage(bool multilang);
+        int detectLanguage(LanguageDetector const &detector);
 
         static std::string readZipPayload(const std::string& content_type, const std::string& payload);
         static std::string isPayloadZip(const std::string& content_type, const std::string& uri);
diff --git a/src/warcpreprocessor.cc b/src/warcpreprocessor.cc
index f87fef0..1d90079 100644
--- a/src/warcpreprocessor.cc
+++ b/src/warcpreprocessor.cc
@@ -1,4 +1,5 @@
 #include "warcpreprocessor.hh"
+#include "src/lang.hh"
 #include "zipreader.hh"
 #include "util/compress.hh"
 #include <boost/log/trivial.hpp>
@@ -8,10 +9,12 @@ namespace warc2text {
     const std::unordered_set<std::string> WARCPreprocessor::removeExtensions = {".jpg", ".jpeg", ".gif", ".png", ".css", ".js", ".mp3",
                                                                                 ".mp4", ".flv", ".wmv", ".gz", ".zip", ".rar" };
 
-    WARCPreprocessor::WARCPreprocessor(const std::string& outputFolder, const std::unordered_set<std::string>& output_files,
+    WARCPreprocessor::WARCPreprocessor(const LanguageDetector &detector, 
+                                       const std::string& outputFolder, const std::unordered_set<std::string>& output_files,
                                        const std::string& pdf_warc_filename, const std::string& tagFiltersFile, bool invert,
-                                       const std::string& urlFiltersFile, bool multilang, bool encodeURLs,
+                                       const std::string& urlFiltersFile, bool encodeURLs,
                                        bool paragraph_identification) :
+        detector(detector),
         writer(outputFolder, output_files),
         totalRecords(0),
         textRecords(0),
@@ -22,7 +25,6 @@ namespace warc2text {
         tagFilters(),
         pdf_warc_filename(pdf_warc_filename),
         invert(invert),
-        multilang(multilang),
         encodeURLs(encodeURLs),
         paragraph_identification(paragraph_identification) {
             if (!tagFiltersFile.empty())
@@ -146,21 +148,28 @@ namespace warc2text {
             ++textRecords;
             textBytes += record.getPlainText().size();
 
-            n_langs = record.detectLanguage(multilang);
-            if (n_langs == 1) {
-                langBytes += record.getPlainText().size();
-            } else if (n_langs > 1) {
+            record.detectLanguage(detector);
+            n_langs = 0;
+            for (auto const &chunk : record.getTextByLangs()) {
+                // Don't count the unknown language chunks
+                if (chunk.first == LanguageDetector::kUnknownLanguageLabel)
+                    continue;
+                
+                langBytes += chunk.second.size();
+                ++n_langs;
+            }
+
+            if (n_langs > 1) {
                 BOOST_LOG_TRIVIAL(trace) << "Record " << record.getURL() << ": multiple (" << n_langs << ") languages detected";
-                for (auto it : record.getTextByLangs())
-                    langBytes += it.second.size();
+            } else if (n_langs == 1) {
+
             } else {
                 BOOST_LOG_TRIVIAL(trace) << "Record " << record.getURL() << ": language not detected";
-                continue;
             }
 
             langRecords += n_langs;
 
-            writer.write(record, multilang, paragraph_identification);
+            writer.write(record, paragraph_identification);
         }
         pdf_warc_writer.close();
     }
diff --git a/src/warcpreprocessor.hh b/src/warcpreprocessor.hh
index 8e8548f..b332523 100644
--- a/src/warcpreprocessor.hh
+++ b/src/warcpreprocessor.hh
@@ -2,6 +2,7 @@
 #define WARC2TEXT_WARCPREPROCESSOR_HH
 
 #include "record.hh"
+#include "src/lang.hh"
 #include "warcreader.hh"
 #include "bilangwriter.hh"
 #include "util.hh"
@@ -24,6 +25,7 @@ namespace warc2text {
 
     class WARCPreprocessor {
         private:
+            LanguageDetector const &detector;
             BilangWriter writer;
             unsigned int totalRecords;
             unsigned int textRecords;
@@ -35,7 +37,6 @@ namespace warc2text {
             boost::regex urlFilter;
             std::string pdf_warc_filename;
             bool invert;
-            bool multilang;
             bool encodeURLs;
             bool paragraph_identification;
 
@@ -43,9 +44,10 @@ namespace warc2text {
             bool URLfilter(const std::string& url);
 
         public:
-            explicit WARCPreprocessor(const std::string& outputFolder, const std::unordered_set<std::string>& output_files = {},
+            explicit WARCPreprocessor(LanguageDetector const &detector,
+                                      const std::string& outputFolder, const std::unordered_set<std::string>& output_files = {},
                                       const std::string& pdf_warc_filename = "", const std::string& tagFiltersFile = "",
-                                      bool invert = false, const std::string& urlFiltersFile = "", bool multilang = false,
+                                      bool invert = false, const std::string& urlFiltersFile = "",
                                       bool encodeURLs = false, bool paragraph_identification = false);
             void process(const std::string &filename);
             void printStatistics() const;
diff --git a/warc2text_main.cc b/warc2text_main.cc
index dac24e4..1f165ac 100644
--- a/warc2text_main.cc
+++ b/warc2text_main.cc
@@ -8,6 +8,7 @@
 #include <boost/log/utility/setup/console.hpp>
 #include <boost/log/utility/setup/common_attributes.hpp>
 #include <boost/algorithm/string/split.hpp>
+#include "src/lang.hh"
 #include "src/warcpreprocessor.hh"
 
 using namespace warc2text;
@@ -25,6 +26,8 @@ struct Options {
     std::string url_filters_filename;
     bool multilang{};
     bool encodeURLs{};
+    std::string classifier;
+    std::string fasttext_model;
 };
 
 void parseArgs(int argc, char *argv[], Options& out) {
@@ -43,6 +46,8 @@ void parseArgs(int argc, char *argv[], Options& out) {
         ("verbose,v", po::bool_switch(&out.verbose)->default_value(false), "Verbosity level")
         ("silent,s", po::bool_switch(&out.silent)->default_value(false))
         ("multilang", po::bool_switch(&out.multilang)->default_value(false), "Detect multiple languages in a single record")
+        ("classifier", po::value(&out.classifier)->default_value("cld2"), "Language classifier: cld2 or fasttext (default cld2)")
+        ("fasttext-model", po::value(&out.fasttext_model)->default_value(""), "Path to fasttext model")
         ("encode-urls", po::bool_switch(&out.encodeURLs)->default_value(false), "Encode URLs obtained from WARC records");
 
     po::positional_options_description pd;
@@ -58,6 +63,8 @@ void parseArgs(int argc, char *argv[], Options& out) {
                 " -f <output_files>                List of output files separated by commas\n"
                 "                                  Default (mandatory): \"url,text\"\n"
                 "                                  Optional values: \"mime,html\"\n"
+                " --classifier                     Classifier to use: cld2 or fasttext\n"
+                " --fasttext-model <model_file>    Path to FastText model for fasttext classifier\n"
                 " --multilang                      Detect multiple languages in documents (up to 3),\n"
                 "                                  write as many text records as languages detected\n"
                 " --tag-filters <filters_files>    File containing html tag filters\n"
@@ -94,9 +101,29 @@ int main(int argc, char *argv[]) {
     boost::algorithm::split(files_list, options.files, [](char c) {return c == ',';});
     std::unordered_set<std::string> output_files(files_list.begin(), files_list.end());
 
+    std::unique_ptr<LanguageDetector> detector;
+
+    if (options.classifier == "cld2") {
+        if (options.multilang) {
+            detector.reset(new CLD2MultiLangDetector());
+        } else {
+            detector.reset(new CLD2Detector());
+        }
+    } else if (options.classifier == "fasttext") {
+        if (options.multilang) {
+            BOOST_LOG_TRIVIAL(error) << "FastText classifier doesn't do multilang at the moment";
+            abort();
+        } else {
+            detector.reset(new FastTextDetector(options.fasttext_model));
+        }
+    } else {
+        BOOST_LOG_TRIVIAL(error) << "Unsupported classifier option";
+        abort();
+    }
+
     std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
-    WARCPreprocessor warcpproc(options.output, output_files, options.pdf_warc_filename, options.tag_filters_filename,
-                               options.tag_filters_invert, options.url_filters_filename, options.multilang,
+    WARCPreprocessor warcpproc(*detector, options.output, output_files, options.pdf_warc_filename, options.tag_filters_filename,
+                               options.tag_filters_invert, options.url_filters_filename,
                                options.encodeURLs, options.paragraph_identification);
     for (const std::string& file : options.warcs){
         warcpproc.process(file);