diff --git a/.github/workflows/mkdocs.yml b/.github/workflows/mkdocs.yml
new file mode 100644
index 0000000..5e041c6
--- /dev/null
+++ b/.github/workflows/mkdocs.yml
@@ -0,0 +1,26 @@
+name: Build Document by Mkdocs
+on:
+  push:
+    branches:
+      - develop
+
+jobs:
+  build:
+    name: Deploy docs
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout develop
+        uses: actions/checkout@v2
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
+      - name: Install dependencies
+        run: python -m pip install -e '.[docs]'
+      - name: Build
+        run: mkdocs build
+      - name: Deploy
+        uses: peaceiris/actions-gh-pages@v3
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: ./site
\ No newline at end of file
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 0000000..7ba9c7b
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,31 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.7'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install setuptools wheel twine
+      - name: Build and publish
+        env:
+          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        run: |
+          python setup.py sdist bdist_wheel
+          twine upload dist/*
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index ef5ea86..9f4cd89 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -1,4 +1,4 @@
-name: unit-tests
+name: Unit Tests
 
 on: [push]
 
@@ -7,17 +7,21 @@ jobs:
 
     runs-on: ubuntu-latest
 
+    strategy:
+      matrix:
+        python-version: ["3.7"]
+
     steps:
-      - uses: actions/checkout@v1
-      - name: Set up Python 3.7
-        uses: actions/setup-python@v1
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
         with:
-          python-version: 3.7
+          python-version: "${{ matrix.python-version }}"
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           python -m pip install torch
-          python -m pip install -e '.[dev]'
+          python -m pip install -e '.[dev, ctx]'
       - name: Test with pytest
         run: |
           python -m pytest tests
diff --git a/.gitignore b/.gitignore
index a4b0cc6..5149fdb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -640,3 +640,4 @@ GitHub.sublime-settings
 *.ptx
 *.cubin
 *.fatbin
+!/site/
diff --git a/README.md b/README.md
index 5365e39..eb43acc 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # TorchGlyph
 
-[![Actions Status](https://github.com/speedcell4/torchglyph/workflows/unit-tests/badge.svg)](https://github.com/speedcell4/torchglyph/actions)
+![Unit Tests](https://github.com/speedcell4/torchglyph/workflows/Unit%20Tests/badge.svg)
+![Upload Python Package](https://github.com/speedcell4/torchglyph/workflows/Upload%20Python%20Package/badge.svg)
 
 ## Requirements
 
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..f1a0311
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,59 @@
+# Welcome to TorchGlyph
+
+Data Processor Combinators for Natural Language Processing
+
+[![Actions Status](https://github.com/speedcell4/torchglyph/workflows/unit-tests/badge.svg)](https://github.com/speedcell4/torchglyph/actions)
+
+## Installation
+
+Simply run this command in your terminal,
+
+```bash
+pip install torchglyph
+```
+
+## Quickstart
+
+The atomic data processor of TorchGlyph is called `Proc`. Compose operator `+` is provided to produce complex `Proc` by composing two simple `Proc`s. 
+
+```python
+ToLower() + ReplaceDigits(repl_token='<digits>')
+```
+
+Composed `Proc`s act like data `Pipe`lines, where raw textual data is processed incrementally. According to the stages, they are roughly categorized into four-groups:
+
++ `pre` for processing *before* building vocabulary;
++ `vocab` for building and updating *vocabulary*;
++ `post` for precessing *after* building vocabulary;
++ `batch` for collating examples to build *batches*.
+
+Defining the `Pipe`s of your dataset is the first step to build a dataset, you can build it from scratch, 
+
+```python
+class PackedIdxSeqPipe(Pipe):
+    def __init__(self, device, dtype=torch.long) -> None:
+        super(PackedIdxSeqPipe, self).__init__(
+            pre=None,
+            vocab=None,
+            post=ToTensor(dtype=dtype),
+            batch=PackSeq(enforce_sorted=False) + ToDevice(device=device),
+        )
+```
+
+or you can simply manipulate existing `Pipe`s by calling `.with_` method.
+
+```python
+class PackedTokSeqPipe(PackedIdxSeqPipe):
+    def __init__(self, device, unk_token, special_tokens=(),
+                 threshold=THRESHOLD, dtype=torch.long) -> None:
+        super(PackedTokSeqPipe, self).__init__(device=device, dtype=dtype)
+        self.with_(
+            pre=UpdateCounter(),
+            vocab=[
+                BuildVocab(unk_token=unk_token, pad_token=None,
+                           special_tokens=special_tokens),
+                StatsVocab(threshold=threshold),
+            ],
+            post=Numbering() + ...,
+        )
+```
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..07f06a4
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,4 @@
+site_name: TorchGlyph
+nav:
+  - Home: index.md
+theme: alabaster
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 736ca61..eddd429 100644
--- a/setup.py
+++ b/setup.py
@@ -1,18 +1,16 @@
 from setuptools import setup, find_packages
 
-with open('README.md', 'r', encoding='utf-8') as fp:
-    long_description = fp.read()
+name = 'torchglyph'
 
 setup(
-    name='torchglyph',
-    version='0.1.0',
-    packages=find_packages(),
-    url='https://github.com/speedcell4/torchglyph',
+    name=name,
+    version='0.1.1',
+    packages=[package for package in find_packages() if package.startswith(name)],
+    url=f'https://speedcell4.github.io/torchglyph',
     license='MIT',
     author='speedcell4',
     author_email='speedcell4@gmail.com',
     description='Data Processor Combinators for Natural Language Processing',
-    long_description=long_description,
     install_requires=[
         'tqdm',
         'numpy',
@@ -23,5 +21,14 @@
             'pytest',
             'hypothesis',
         ],
+        'ctx': [
+            'transformers',
+            'allennlp',
+            'elmoformanylangs',
+        ],
+        'docs': [
+            'mkdocs',
+            'mkdocs-alabaster',
+        ]
     }
 )
diff --git a/tests/test_datasets/test_sequential_labeling.py b/tests/test_datasets/test_sequential_labeling.py
index d76f233..fab9eb7 100644
--- a/tests/test_datasets/test_sequential_labeling.py
+++ b/tests/test_datasets/test_sequential_labeling.py
@@ -1,14 +1,29 @@
-from torchglyph.datasets.sequential_labeling import CoNLL2000Chunking, CoNLL2003NER
+from torchglyph.datasets import CoNLL2000Chunking, CoNLL2003NER
+from torchglyph.datasets import SemEval2010T1NERCatalan, SemEval2010T1NERSpanish
 
 
-def test_conll2000_chunking() -> None:
-    train, test = CoNLL2000Chunking.new(batch_size=1, word_dim=None)
-    assert len(train) == 8936
-    assert len(test) == 2012
+def test_conll2000_chunking():
+    train, test = CoNLL2000Chunking.new(batch_size=1, word_dim=None, remove_missing=True)
+    assert len(train.dataset) == 8936
+    assert len(test.dataset) == 2012
 
 
-def test_conll2003_ner() -> None:
-    train, dev, test = CoNLL2003NER.new(batch_size=1, word_dim=None)
-    assert len(train) == 14987
-    assert len(dev) == 3466
-    assert len(test) == 3684
+def test_conll2003_ner():
+    train, dev, test = CoNLL2003NER.new(batch_size=1, word_dim=None, remove_missing=True)
+    assert len(train.dataset) == 14987
+    assert len(dev.dataset) == 3466
+    assert len(test.dataset) == 3684
+
+
+def test_semeval2010_catalan():
+    train, dev, test = SemEval2010T1NERCatalan.new(batch_size=1, word_dim=None, remove_missing=True)
+    assert len(train.dataset) == 8709
+    assert len(dev.dataset) == 1445
+    assert len(test.dataset) == 1698
+
+
+def test_semeval2010_spanish():
+    train, dev, test = SemEval2010T1NERSpanish.new(batch_size=1, word_dim=None, remove_missing=True)
+    assert len(train.dataset) == 9022
+    assert len(dev.dataset) == 1419
+    assert len(test.dataset) == 1705
diff --git a/tests/test_datasets/test_text_classification.py b/tests/test_datasets/test_text_classification.py
index 587e0c1..f130e80 100644
--- a/tests/test_datasets/test_text_classification.py
+++ b/tests/test_datasets/test_text_classification.py
@@ -2,6 +2,6 @@
 
 
 def test_agnews():
-    train, test = AgNews.new(batch_size=1, word_dim=None)
+    train, test = AgNews.new(batch_size=1, word_dim=None, remove_missing=True)
     assert len(train) == 120000
     assert len(test) == 7600
diff --git a/tests/test_nn/test_connection.py b/tests/test_nn/test_connection.py
new file mode 100644
index 0000000..05550a3
--- /dev/null
+++ b/tests/test_nn/test_connection.py
@@ -0,0 +1,44 @@
+import torch
+from hypothesis import given, strategies as st
+from torch import nn
+
+from torchglyph.nn.connection import ResNorm, DenseNorm, ReZero
+
+
+@given(
+    batch_sizes=st.lists(st.integers(1, 10), min_size=0, max_size=4),
+    input_dim=st.integers(1, 20),
+)
+def test_resnorm_shape_grad(batch_sizes, input_dim):
+    layer = ResNorm(input_dim=input_dim, sub_layer=nn.Linear(input_dim, input_dim))
+    x = torch.rand((*batch_sizes, input_dim), requires_grad=True)
+    y = layer(x)
+
+    assert y.size() == (*batch_sizes, layer.output_dim)
+    assert y.requires_grad
+
+
+@given(
+    batch_sizes=st.lists(st.integers(1, 10), min_size=0, max_size=4),
+    input_dim=st.integers(1, 20),
+)
+def test_densenorm_shape_grad(batch_sizes, input_dim):
+    layer = DenseNorm(input_dim=input_dim, sub_layer=nn.Linear(input_dim, input_dim))
+    x = torch.rand((*batch_sizes, input_dim), requires_grad=True)
+    y = layer(x)
+
+    assert y.size() == (*batch_sizes, layer.output_dim)
+    assert y.requires_grad
+
+
+@given(
+    batch_sizes=st.lists(st.integers(1, 10), min_size=0, max_size=4),
+    input_dim=st.integers(1, 20),
+)
+def test_rezero_shape_grad(batch_sizes, input_dim):
+    layer = ReZero(input_dim=input_dim, sub_layer=nn.Linear(input_dim, input_dim))
+    x = torch.rand((*batch_sizes, input_dim), requires_grad=True)
+    y = layer(x)
+
+    assert y.size() == (*batch_sizes, layer.output_dim)
+    assert y.requires_grad
diff --git a/torchglyph/__init__.py b/torchglyph/__init__.py
index f1f93f6..3c126bf 100644
--- a/torchglyph/__init__.py
+++ b/torchglyph/__init__.py
@@ -4,7 +4,7 @@
 import torch
 from torch.nn.utils.rnn import PackedSequence
 
-data_path = Path.home() / '.torchglyph'
+data_path = (Path.home() / '.torchglyph').expanduser().absolute()
 if not data_path.exists():
     data_path.mkdir(parents=True, exist_ok=True)
 
diff --git a/torchglyph/dataset.py b/torchglyph/dataset.py
index e451fbe..e7f11d3 100644
--- a/torchglyph/dataset.py
+++ b/torchglyph/dataset.py
@@ -2,7 +2,7 @@
 import uuid
 from collections import namedtuple
 from pathlib import Path
-from typing import Iterable, Any, TextIO
+from typing import Iterable, Any, TextIO, Optional
 from typing import Union, List, Type, Tuple, NamedTuple, Dict
 
 from torch.utils import data
@@ -14,6 +14,7 @@
 
 
 class Dataset(data.Dataset):
+    name: Optional[str]
     urls: List[Union[Tuple[str, ...]]]
 
     def __init__(self, pipes: List[Dict[str, Pipe]], **load_kwargs) -> None:
@@ -62,14 +63,16 @@ def collate_fn(self, batch: List[NamedTuple]) -> NamedTuple:
 
     @classmethod
     def paths(cls, root: Path = data_path) -> Tuple[Path, ...]:
+        root = root / getattr(cls, 'name', cls.__name__).lower()
+
         ans = []
         for url, name, *filenames in cls.urls:
             if len(filenames) == 0:
                 filenames = [name]
-            if any(not (root / cls.__name__.lower() / n).exists() for n in filenames):
-                download_and_unzip(url, root / cls.__name__.lower() / name)
-            for n in filenames:
-                ans.append(root / cls.__name__.lower() / n)
+            if any(not (root / filename).exists() for filename in filenames):
+                download_and_unzip(url, root / name)
+            for filename in filenames:
+                ans.append(root / filename)
 
         return tuple(ans)
 
diff --git a/torchglyph/datasets/__init__.py b/torchglyph/datasets/__init__.py
index b5b186c..0c2ab4f 100644
--- a/torchglyph/datasets/__init__.py
+++ b/torchglyph/datasets/__init__.py
@@ -1,2 +1,3 @@
 from torchglyph.datasets.sequential_labeling import CoNLL2000Chunking, CoNLL2003NER
+from torchglyph.datasets.sequential_labeling import SemEval2010T1NERCatalan, SemEval2010T1NERSpanish
 from torchglyph.datasets.text_classification import AgNews
diff --git a/torchglyph/datasets/sequential_labeling.py b/torchglyph/datasets/sequential_labeling.py
index f91ed5c..edaa85a 100644
--- a/torchglyph/datasets/sequential_labeling.py
+++ b/torchglyph/datasets/sequential_labeling.py
@@ -1,14 +1,19 @@
 import logging
 from pathlib import Path
-from typing import Iterable, List, Any, Tuple, Optional, NamedTuple, TextIO
+from typing import Iterable, Any
+from typing import Optional, List, Tuple, NamedTuple
+from typing import TextIO
 
 from tqdm import tqdm
 
 from torchglyph.dataset import Dataset, DataLoader
 from torchglyph.formats import conllx
-from torchglyph.pipe import PackedTokSeqPipe, SeqLengthTensorPipe, RawPipe, PackedTokPtrSeqPipe
+from torchglyph.pipe import PackedTokSeqPipe, SeqLengthTensorPipe, RawPipe, PackedTokPtrSeqPipe, PackedPtrSeqPipe, \
+    ToSubList, UpdateCounter, Lift
 from torchglyph.pipe import PaddedTokSeqPipe, PackedTokBlockPipe
-from torchglyph.proc import ToLower, ReplaceDigits, Identity, LoadGlove
+from torchglyph.proc import ReplaceDigits, Identity, LoadGlove, LoadFastText, Prepend
+
+logger = logging.getLogger(__name__)
 
 
 class CoNLL2000Chunking(Dataset):
@@ -20,7 +25,7 @@ class CoNLL2000Chunking(Dataset):
     @classmethod
     def load(cls, path: Path) -> Iterable[List[Any]]:
         for sent in tqdm(conllx.load(path, sep=' '), desc=f'reading {path}'):
-            word, pos, chunk = list(zip(*sent))
+            word, pos, chunk = map(list, zip(*sent))
             yield [word, pos, chunk]
 
     def dump(self, fp: TextIO, batch: NamedTuple, prediction: List[List[int]], *args, **kwargs) -> None:
@@ -31,10 +36,15 @@ def dump(self, fp: TextIO, batch: NamedTuple, prediction: List[List[int]], *args
             conllx.dump(zip(raw_word, raw_pos, raw_chunk, pred_chunk), fp, sep=' ')
 
     @classmethod
-    def new(cls, batch_size: int, word_dim: Optional[int], device: int = -1) -> Tuple[DataLoader, ...]:
+    def new(cls, batch_size: int, word_dim: Optional[int],
+            remove_missing: bool, device: int = -1) -> Tuple[DataLoader, ...]:
+        if word_dim is not None:
+            vectors = LoadGlove(name='6B', dim=word_dim, remove_missing=remove_missing)
+        else:
+            vectors = Identity()
         word = PackedTokSeqPipe(device=device, unk_token='<unk>').with_(
-            pre=ToLower() + ReplaceDigits(repl_token='<digits>') + ...,
-            vocab=... + (Identity() if word_dim is None else LoadGlove('6B', word_dim, str.lower)),
+            pre=ReplaceDigits(repl_token='<digits>') + ...,
+            vocab=... + vectors,
         )
         length = SeqLengthTensorPipe(device=device)
         char = PackedTokBlockPipe(device=device, unk_token='<unk>')
@@ -53,7 +63,7 @@ def new(cls, batch_size: int, word_dim: Optional[int], device: int = -1) -> Tupl
         test = cls(path=test, pipes=pipes)
 
         for name, pipe in train.pipes.items():
-            logging.info(f'{name} => {pipe}')
+            logger.info(f'{name} => {pipe}')
 
         word.build_vocab(train, test, name='word')
         char.build_vocab(train, test, name='char')
@@ -76,7 +86,7 @@ class CoNLL2003NER(Dataset):
     @classmethod
     def load(cls, path: Path) -> Iterable[List[Any]]:
         for sent in tqdm(conllx.load(path, sep=' '), desc=f'reading {path}', unit=' sents'):
-            word, pos, chunk, ner = list(zip(*sent))
+            word, pos, chunk, ner = map(list, zip(*sent))
             yield [word, pos, chunk, ner]
 
     def dump(self, fp: TextIO, batch: NamedTuple, prediction: List[List[int]], *args, **kwargs) -> None:
@@ -87,10 +97,15 @@ def dump(self, fp: TextIO, batch: NamedTuple, prediction: List[List[int]], *args
             conllx.dump(zip(raw_word, raw_pos, raw_chunk, raw_ner, pred_ner), fp, sep=' ')
 
     @classmethod
-    def new(cls, batch_size: int, word_dim: Optional[int], device: int = -1) -> Tuple[DataLoader, ...]:
+    def new(cls, batch_size: int, word_dim: Optional[int],
+            remove_missing: bool, device: int = -1) -> Tuple[DataLoader, ...]:
+        if word_dim is not None:
+            vectors = LoadGlove(name='6B', dim=word_dim, remove_missing=remove_missing)
+        else:
+            vectors = Identity()
         word = PackedTokSeqPipe(device=device, unk_token='<unk>').with_(
-            pre=ToLower() + ReplaceDigits(repl_token='<digits>') + ...,
-            vocab=... + (Identity() if word_dim is None else LoadGlove(name='6B', dim=word_dim)),
+            pre=ReplaceDigits(repl_token='<digits>') + ...,
+            vocab=... + vectors,
         )
         length = SeqLengthTensorPipe(device=device)
         char = PackedTokBlockPipe(device=device, unk_token='<unk>')
@@ -112,7 +127,7 @@ def new(cls, batch_size: int, word_dim: Optional[int], device: int = -1) -> Tupl
         test = cls(path=test, pipes=pipes)
 
         for name, pipe in train.pipes.items():
-            logging.info(f'{name} => {pipe}')
+            logger.info(f'{name} => {pipe}')
 
         word.build_vocab(train, dev, test, name='word')
         char.build_vocab(train, dev, test, name='char')
@@ -124,3 +139,88 @@ def new(cls, batch_size: int, word_dim: Optional[int], device: int = -1) -> Tupl
             (train, dev, test),
             batch_size=batch_size, shuffle=True,
         )
+
+
+class SemEval2010T1NER(Dataset):
+    lang: str
+
+    @classmethod
+    def load(cls, path: Path, **kwargs) -> Iterable[Any]:
+        for sent in tqdm(conllx.load(path, sep='\t'), desc=f'reading {path}', unit=' sentences'):
+            _, word, _, pos, _, _, head, drel, _, _, ner = map(list, zip(*sent))
+            yield [word, pos, [int(h) for h in head], drel, ner]
+
+    def dump(self, fp: TextIO, batch: NamedTuple, prediction: List[Any], *args, **kwargs) -> None:
+        ner_vocab = self.pipes['ner'].vocab.stoi
+        for raw_word, raw_pos, raw_ner, pred in \
+                zip(batch.raw_word, batch.raw_pos, batch.raw_ner, prediction):
+            assert len(raw_word) == len(raw_pos) == len(raw_ner) == len(pred)
+
+            pred_ner = [ner_vocab[p] for p in pred]
+            conllx.dump(zip(raw_word, raw_pos, raw_ner, pred_ner), fp, sep=' ')
+
+    @classmethod
+    def new(cls, batch_size: int, word_dim: Optional[int],
+            remove_missing: bool, device: int = -1) -> Tuple['DataLoader', ...]:
+        if word_dim is not None:
+            vectors = LoadFastText(str.lower, lang=cls.lang, remove_missing=remove_missing)
+        else:
+            vectors = Identity()
+        word = PackedTokSeqPipe(device=device, unk_token='<unk>').with_(
+            pre=Prepend('<root>', 1) + ReplaceDigits(repl_token='<digits>') + ...,
+            vocab=... + vectors,
+        )
+        length = SeqLengthTensorPipe(device=device).with_(pre=Prepend('<root>', 1) + ...)
+        char = PackedTokBlockPipe(device=device, unk_token='<unk>').with_(
+            pre=ToSubList() + Lift(Prepend('<root>', 1)) + Lift(UpdateCounter()),
+        )
+        word_ptr = PackedTokPtrSeqPipe(device=device, reverse=False).with_(pre=Prepend(0, 1) + ...)
+        pos = PackedTokSeqPipe(device=device, unk_token='<unk>').with_(pre=Prepend('<root>', 1) + ...)
+        head = PackedPtrSeqPipe(device=device).with_(pre=Prepend(0, 1) + ...)
+        drel = PackedTokSeqPipe(device=device, unk_token='root').with_(pre=Prepend('<root>', 1) + ...)
+        ner = PaddedTokSeqPipe(device=device, unk_token='O', pad_token='O')
+
+        pipes = [
+            dict(word=word, length=length, char=char, word_ptr=word_ptr, raw_word=RawPipe()),
+            dict(pos=pos, raw_pos=RawPipe()),
+            dict(head=head),
+            dict(drel=drel, raw_drel=RawPipe()),
+            dict(ner=ner, raw_ner=RawPipe()),
+        ]
+
+        train, dev, test = cls.paths()
+        train = cls(path=train, pipes=pipes)
+        dev = cls(path=dev, pipes=pipes)
+        test = cls(path=test, pipes=pipes)
+
+        for name, pipe in train.pipes.items():
+            logger.info(f'{name} => {pipe}')
+
+        word.build_vocab(train, dev, test, name='word')
+        char.build_vocab(train, dev, test, name='char')
+        pos.build_vocab(train, name='pos')
+        drel.build_vocab(train, name='drel')
+        ner.build_vocab(train, name='ner')
+
+        return DataLoader.new(
+            (train, dev, test),
+            batch_size=batch_size, shuffle=True,
+        )
+
+
+class SemEval2010T1NERCatalan(SemEval2010T1NER):
+    urls = [
+        ('https://www.dropbox.com/s/nqedh3zmk5k80n7/train.sd.conllx?dl=1', 'train.sd.conllx'),
+        ('https://www.dropbox.com/s/027umbuks3njwry/dev.sd.conllx?dl=1', 'dev.sd.conllx'),
+        ('https://www.dropbox.com/s/ldwn6z1xl5vki4y/test.sd.conllx?dl=1', 'test.sd.conllx'),
+    ]
+    lang = 'ca'
+
+
+class SemEval2010T1NERSpanish(SemEval2010T1NER):
+    urls = [
+        ('https://www.dropbox.com/s/lyxgvc161ai20v0/train.sd.conllx?dl=1', 'train.sd.conllx'),
+        ('https://www.dropbox.com/s/8tmbi7ki6ctasez/dev.sd.conllx?dl=1', 'dev.sd.conllx'),
+        ('https://www.dropbox.com/s/nnj94hdmlq3jjm8/test.sd.conllx?dl=1', 'test.sd.conllx'),
+    ]
+    lang = 'es'
diff --git a/torchglyph/datasets/text_classification.py b/torchglyph/datasets/text_classification.py
index 013709f..a228479 100644
--- a/torchglyph/datasets/text_classification.py
+++ b/torchglyph/datasets/text_classification.py
@@ -12,6 +12,8 @@
 from torchglyph.pipe import PackedTokSeqPipe, TokTensorPipe, RawPipe
 from torchglyph.proc import Identity, LoadGlove
 
+logger = logging.getLogger(__name__)
+
 
 class AgNews(Dataset):
     urls = [
@@ -39,9 +41,14 @@ def dump(self, fp: TextIO, batch: NamedTuple, prediction: List[int], *args, **kw
             csv.dump((' '.join(raw_title), ' '.join(raw_text), raw_target, vocab.itos[pred]), fp)
 
     @classmethod
-    def new(cls, batch_size: int, word_dim: Optional[int], device: int = -1) -> Tuple['DataLoader', ...]:
+    def new(cls, batch_size: int, word_dim: Optional[int],
+            remove_missing: bool, device: int = -1) -> Tuple['DataLoader', ...]:
+        if word_dim is not None:
+            vectors = LoadGlove(name='6B', dim=word_dim, remove_missing=remove_missing)
+        else:
+            vectors = Identity()
         word = PackedTokSeqPipe(device=device, unk_token='<unk>').with_(
-            vocab=... + (Identity() if word_dim is None else LoadGlove(name='6B', dim=word_dim)),
+            vocab=... + vectors,
         )
         target = TokTensorPipe(device=device, unk_token=None)
 
@@ -56,7 +63,7 @@ def new(cls, batch_size: int, word_dim: Optional[int], device: int = -1) -> Tupl
         test = cls(path=test, target_vocab=target_vocab, pipes=pipes)
 
         for name, pipe in train.pipes.items():
-            logging.info(f'{name} => {pipe}')
+            logger.info(f'{name} => {pipe}')
 
         word.build_vocab(train, test, name='word')
         target.build_vocab(train, test, name='target')
diff --git a/torchglyph/functional.py b/torchglyph/functional.py
index ce2ea73..5462117 100644
--- a/torchglyph/functional.py
+++ b/torchglyph/functional.py
@@ -1,9 +1,12 @@
 import functools
-from typing import Union, Tuple, Dict, Any
+from typing import Any
+from typing import Union, Tuple, Dict
 
 import torch
 from torch import Tensor
-from torch.nn.utils.rnn import PackedSequence
+from torch import nn
+from torch.nn.utils.rnn import PackedSequence, pad_packed_sequence
+from torch.nn.utils.rnn import pack_padded_sequence
 
 
 def support_pack(fn):
@@ -17,9 +20,21 @@ def wrap(x: Union[Tensor, PackedSequence], *args, **kwargs) -> Union[Tensor, Pac
     return wrap
 
 
-class SupportPack(type):
+class SupportPack(nn.Module):
+    def __init__(self, module: nn.Module) -> None:
+        super(SupportPack, self).__init__()
+        self.module = module
+
+    def __repr__(self) -> str:
+        return f'Packed{self.module.__repr__()}'
+
+    def forward(self, x: Union[Tensor, PackedSequence], *args, **kwargs) -> Union[Tensor, PackedSequence]:
+        return support_pack(self.module)(x)
+
+
+class SupportPackMeta(type):
     def __new__(cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any]):
-        forward_fn = bases[0].forward
+        forward_fn = attrs.get('forward', bases[0].forward)
 
         @functools.wraps(forward_fn)
         def forward(self, x: Union[Tensor, PackedSequence], *args, **kwargs) -> Union[Tensor, PackedSequence]:
@@ -29,3 +44,33 @@ def forward(self, x: Union[Tensor, PackedSequence], *args, **kwargs) -> Union[Te
                 return x._replace(data=forward_fn(self, x.data, *args, **kwargs))
 
         return type(name, bases, {**attrs, 'forward': forward})
+
+
+def head_pack(pack: PackedSequence) -> Tensor:
+    return pack.data[:pack.batch_sizes[0].item()]
+
+
+def prepend_pack(pack: PackedSequence, value: Union[int, bool, float, Tensor]) -> PackedSequence:
+    if not torch.is_tensor(value):
+        value = torch.full_like(head_pack(pack), fill_value=value)
+    return pack._replace(
+        data=torch.cat([value, pack.data], dim=0),
+        batch_sizes=torch.cat([pack.batch_sizes[:1], pack.batch_sizes], dim=0),
+    )
+
+
+def tail_pack(pack: PackedSequence) -> Tensor:
+    data, lengths = pad_packed_sequence(pack, batch_first=True)  # type: (Tensor, Tensor)
+    indices = torch.arange(lengths.size(0), dtype=torch.long, device=data.device)
+    return data[indices, lengths - 1]
+
+
+def append_pack(pack: PackedSequence, value: Union[int, bool, float, Tensor]) -> PackedSequence:
+    if not torch.is_tensor(value):
+        value = torch.full_like(head_pack(pack), fill_value=value)
+    data, lengths = pad_packed_sequence(pack, batch_first=True)  # type: (Tensor, Tensor)
+    indices = torch.arange(lengths.size(0), dtype=torch.long, device=data.device)
+    return pack_padded_sequence(
+        torch.cat([data, value[:, None]], dim=1).index_put((indices, lengths), value),
+        lengths + 1, batch_first=True, enforce_sorted=False,
+    )
diff --git a/torchglyph/io.py b/torchglyph/io.py
index fc018d1..e2b7025 100644
--- a/torchglyph/io.py
+++ b/torchglyph/io.py
@@ -1,30 +1,32 @@
 import gzip
 import logging
 import os
+import re
 import shutil
 import tarfile
 import zipfile
 from contextlib import contextmanager
 from pathlib import Path
-from typing import Union, TextIO
+from typing import Union, TextIO, Pattern
 from urllib.request import urlretrieve
 
 from tqdm import tqdm
 
+logger = logging.getLogger(__name__)
+
 IO = Union[str, Path, TextIO]
 
 
 @contextmanager
 def open_io(f: IO, mode: str, encoding: str):
-    if isinstance(f, (str, Path)):
-        fp = open(f, mode=mode, encoding=encoding)
-    else:
-        fp = f
     try:
-        yield fp
+        if isinstance(f, (str, Path)):
+            with open(f, mode=mode, encoding=encoding) as fp:
+                yield fp
+        else:
+            yield f
     finally:
-        if isinstance(f, Path):
-            fp.close()
+        pass
 
 
 # copied and modified from https://github.com/pytorch/text
@@ -50,7 +52,10 @@ def inner(b=1, bsize=1, tsize=None) -> None:
 
 
 # copied and modified from https://github.com/pytorch/text
-def download_and_unzip(url: str, dest: Path) -> None:
+def download_and_unzip(url: str, dest: Path) -> Path:
+    if dest.exists():
+        return dest
+
     if not dest.parent.exists():
         dest.parent.mkdir(parents=True, exist_ok=True)
 
@@ -62,14 +67,23 @@ def download_and_unzip(url: str, dest: Path) -> None:
             raise err
 
     if dest.suffix == '.zip':
-        logging.info(f'extracting {dest}')
+        logger.info(f'extracting {dest}')
         with zipfile.ZipFile(dest, "r") as fp:
             fp.extractall(path=dest.parent)
-    elif dest.suffixes[:-2] == ['.tar', '.gz']:
-        logging.info(f'extracting {dest}')
+    elif dest.suffixes[-2:] == ['.tar', '.gz']:
+        logger.info(f'extracting {dest}')
         with tarfile.open(dest, 'r:gz') as fp:
             fp.extractall(path=dest.parent)
     elif dest.suffix == '.gz':
-        with gzip.open(dest, mode='rb') as fsrc:
-            with dest.with_suffix('').open(mode='wb') as fdst:
-                shutil.copyfileobj(fsrc, fdst)
+        logger.info(f'extracting {dest}')
+        with gzip.open(dest, mode='rb') as fs:
+            with dest.with_suffix('').open(mode='wb') as fd:
+                shutil.copyfileobj(fs, fd)
+
+    return dest
+
+
+def toggle_loggers(pattern: Union[str, Pattern], enable: bool) -> None:
+    for name in logging.root.manager.loggerDict:  # type:str
+        if re.match(pattern, name) is not None:
+            logging.getLogger(name).disabled = not enable
diff --git a/torchglyph/nn/__init__.py b/torchglyph/nn/__init__.py
index a879623..249b291 100644
--- a/torchglyph/nn/__init__.py
+++ b/torchglyph/nn/__init__.py
@@ -1,2 +1,2 @@
-from torchglyph.nn.embedding import *
-from torchglyph.nn.rnn import *
+from torchglyph.nn.embedding import TokEmbedding, SubLstmEmbedding
+from torchglyph.nn.rnn import ContextualLSTM
diff --git a/torchglyph/nn/connection.py b/torchglyph/nn/connection.py
new file mode 100644
index 0000000..81b2df2
--- /dev/null
+++ b/torchglyph/nn/connection.py
@@ -0,0 +1,81 @@
+from typing import Union
+
+import torch
+from torch import Tensor
+from torch import nn
+from torch.nn.utils.rnn import PackedSequence
+
+
+class ResNorm(nn.Module):
+    """
+    https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/He_Deep_Residual_Learning_CVPR_2016_paper.pdf
+    """
+
+    def __init__(self, input_dim: int, *, sub_layer: nn.Module) -> None:
+        super(ResNorm, self).__init__()
+        self.input_dim = input_dim
+        self.output_dim = input_dim
+
+        self.sub_layer = sub_layer
+        self.layer_norm = nn.LayerNorm(input_dim)
+
+    def forward(self, x: Union[Tensor, PackedSequence], *args, **kwargs) -> Union[Tensor, PackedSequence]:
+        z = self.sub_layer(x, *args, **kwargs)
+        if torch.is_tensor(z):
+            return self.layer_norm(x + z)
+        elif isinstance(z, PackedSequence):
+            return z._replace(data=self.layer_norm(x.data + z.data))
+        else:
+            raise NotImplementedError
+
+
+class DenseNorm(nn.Module):
+    """
+    http://openaccess.thecvf.com/content_cvpr_2017/papers/Huang_Densely_Connected_Convolutional_CVPR_2017_paper.pdf
+    """
+
+    def __init__(self, input_dim: int, *, sub_layer: nn.Module) -> None:
+        super(DenseNorm, self).__init__()
+        self.input_dim = input_dim
+        self.output_dim = input_dim * 2
+
+        self.sub_layer = sub_layer
+        self.layer_norm = nn.LayerNorm(input_dim * 2)
+
+    def forward(self, x: Union[Tensor, PackedSequence], *args, **kwargs) -> Union[Tensor, PackedSequence]:
+        z = self.sub_layer(x, *args, **kwargs)
+        if torch.is_tensor(z):
+            return self.layer_norm(torch.cat([x, z], dim=-1))
+        elif isinstance(z, PackedSequence):
+            return z._replace(data=self.layer_norm(torch.cat([x.data, z.data], dim=-1)))
+        else:
+            raise NotImplementedError
+
+
+class ReZero(nn.Module):
+    """
+    https://arxiv.org/pdf/2003.04887.pdf
+    """
+
+    def __init__(self, input_dim: int, *, sub_layer: nn.Module) -> None:
+        super(ReZero, self).__init__()
+        self.input_dim = input_dim
+        self.output_dim = input_dim
+
+        self.sub_layer = sub_layer
+        self.scale = nn.Parameter(
+            torch.tensor([0.], dtype=torch.float32),
+            requires_grad=True,
+        )
+
+    def extra_repr(self) -> str:
+        return f'(scale): Parameter({self.scale.data})'
+
+    def forward(self, x: Union[Tensor, PackedSequence], *args, **kwargs) -> Union[Tensor, PackedSequence]:
+        z = self.sub_layer(x, *args, **kwargs)
+        if torch.is_tensor(z):
+            return x + z * self.scale
+        elif isinstance(z, PackedSequence):
+            return z._replace(data=x.data + z.data * self.scale)
+        else:
+            raise NotImplementedError
diff --git a/torchglyph/nn/contextual.py b/torchglyph/nn/contextual.py
new file mode 100644
index 0000000..9ec87c4
--- /dev/null
+++ b/torchglyph/nn/contextual.py
@@ -0,0 +1,222 @@
+import json
+import logging
+from pathlib import Path
+from typing import List
+from typing import Union
+
+from allennlp.modules import Elmo as AllenELMo
+from elmoformanylangs.elmo import read_list, create_batches, recover
+from elmoformanylangs.frontend import Model
+from elmoformanylangs.modules.embedding_layer import EmbeddingLayer
+from torch import Tensor
+from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence
+from torch.nn.utils.rnn import pack_sequence
+
+from torchglyph import data_path
+from torchglyph.io import download_and_unzip, toggle_loggers
+
+toggle_loggers('allennlp', False)
+toggle_loggers('elmoformanylangs', False)
+
+logger = logging.getLogger(__name__)
+
+
+class ELMoModel(AllenELMo):
+    root = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/'
+    name = {
+        'small': '2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_',
+        'medium': '2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_',
+        'original': '2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_',
+        '5.5B': '2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_',
+    }
+
+    def __init__(self, *, options_file: str, weight_file: str, pack_output, **kwargs) -> None:
+        logger.info(f'loading pretrained {self.__class__.__name__} from {weight_file}')
+
+        super(ELMoModel, self).__init__(
+            options_file=options_file, weight_file=weight_file, **kwargs,
+        )
+
+        self.pack_output = pack_output
+        self.encoding_dim = self.get_output_dim()
+
+    @classmethod
+    def fetch(cls, weight: str):
+        elmo_path = data_path / cls.__name__.lower()
+        options_file = download_and_unzip(
+            url=cls.root + (cls.name[weight] + 'options.json'),
+            dest=elmo_path / (cls.name[weight] + 'options.json'),
+        )
+        weight_file = download_and_unzip(
+            url=cls.root + (cls.name[weight] + 'weights.hdf5'),
+            dest=elmo_path / (cls.name[weight] + 'weights.hdf5'),
+        )
+        return options_file, weight_file
+
+    @classmethod
+    def from_pretrained(cls, weight: str, pack_output: bool = True,
+                        num_output_representations: int = 1,
+                        dropout: float = 0., freeze: bool = True) -> 'ELMoModel':
+        options_file, weight_file = cls.fetch(weight=weight)
+        return cls(
+            options_file=str(options_file), weight_file=str(weight_file),
+            num_output_representations=num_output_representations,
+            requires_grad=not freeze, dropout=dropout, pack_output=pack_output,
+        )
+
+    def extra_repr(self) -> str:
+        args = [
+            f'encoding_dim={self.encoding_dim}',
+            f'num_layers={self._elmo_lstm.num_layers}',
+            f'dropout={self._dropout.p}',
+        ]
+        if not self._elmo_lstm._requires_grad:
+            args.append('frozen')
+        return ', '.join(args)
+
+    def __repr__(self) -> str:
+        return f'{self.__class__.__name__}({self.extra_repr()})'
+
+    def forward(self, batch: Tensor, word_inputs: Tensor = None) -> Union[Tensor, PackedSequence]:
+        outputs = super(ELMoModel, self).forward(batch, word_inputs=word_inputs)
+        elmo_representations, *_ = outputs['elmo_representations']
+        if not self.pack_output:
+            return elmo_representations
+        else:
+            lengths = outputs['mask'].long().sum(dim=-1)
+            return pack_padded_sequence(
+                elmo_representations, lengths,
+                batch_first=True, enforce_sorted=False,
+            )
+
+
+class ELMoForManyLanguages(Model):
+    root = 'http://vectors.nlpl.eu/repository/11/'
+    configs = [
+        'https://raw.githubusercontent.com/HIT-SCIR/ELMoForManyLangs/master/configs/cnn_0_100_512_4096_sample.json',
+        'https://raw.githubusercontent.com/HIT-SCIR/ELMoForManyLangs/master/configs/cnn_50_100_512_4096_sample.json',
+    ]
+    names = {
+        'ca': '138',
+        'es': '145',
+        'zh': '179',
+    }
+
+    def __init__(self, *, options_file: Path, weight_file: Path, pack_output: bool, requires_grad: bool) -> None:
+        with options_file.open('r', encoding='utf-8') as fp:
+            config = json.load(fp)
+
+        if config['token_embedder']['char_dim'] > 0:
+            char_lexicon = {}
+            with (weight_file / 'char.dic').open('r', encoding='utf-8') as fp:
+                for raw in fp:
+                    tokens = raw.strip().split('\t')
+                    if len(tokens) == 1:
+                        tokens.insert(0, '\u3000')
+                    token, index = tokens
+                    char_lexicon[token] = int(index)
+            char_emb_layer = EmbeddingLayer(
+                config['token_embedder']['char_dim'], char_lexicon,
+                fix_emb=False, embs=None,
+            )
+        else:
+            char_lexicon = None
+            char_emb_layer = None
+
+        if config['token_embedder']['word_dim'] > 0:
+            word_lexicon = {}
+            with (weight_file / 'word.dic').open('r', encoding='utf-8') as fp:
+                for raw in fp:
+                    tokens = raw.strip().split('\t')
+                    if len(tokens) == 1:
+                        tokens.insert(0, '\u3000')
+                    token, index = tokens
+                    word_lexicon[token] = int(index)
+            word_emb_layer = EmbeddingLayer(
+                config['token_embedder']['word_dim'], word_lexicon,
+                fix_emb=False, embs=None,
+            )
+        else:
+            word_lexicon = None
+            word_emb_layer = None
+
+        super(ELMoForManyLanguages, self).__init__(
+            config=config, word_emb_layer=word_emb_layer,
+            char_emb_layer=char_emb_layer, use_cuda=False,
+        )
+        self.load_model(path=weight_file)
+        self.char_lexicon = char_lexicon
+        self.word_lexicon = word_lexicon
+
+        self.lang = weight_file.name
+        self.requires_grad = requires_grad
+        self.pack_output = pack_output
+        self.encoding_dim = self.output_dim * 2
+
+    @classmethod
+    def fetch(cls, lang: str):
+        download_and_unzip(
+            url=cls.configs[0],
+            dest=data_path / cls.__name__.lower() / 'configs' / Path(cls.configs[0]).name,
+        )
+        download_and_unzip(
+            url=cls.configs[1],
+            dest=data_path / cls.__name__.lower() / 'configs' / Path(cls.configs[1]).name,
+        )
+        return download_and_unzip(
+            url=cls.root + f'{cls.names[lang]}.zip',
+            dest=data_path / cls.__name__.lower() / lang / f'{lang}.zip',
+        ).parent
+
+    @classmethod
+    def from_pretrained(cls, lang: str, pack_output: bool = True, freeze: bool = True) -> 'ELMoForManyLanguages':
+        path = cls.fetch(lang=lang)
+
+        with (path / 'config.json').open('r', encoding='utf-8') as fp:
+            args = json.load(fp)
+        return cls(
+            options_file=path / args['config_path'], requires_grad=not freeze,
+            weight_file=path, pack_output=pack_output,
+        )
+
+    def extra_repr(self) -> str:
+        args = [
+            f'lang={self.lang}', f'encoding_dim={self.encoding_dim}',
+            f'word_vocab={len(self.word_lexicon) if self.word_lexicon is not None else None}',
+            f'char_vocab={len(self.char_lexicon) if self.char_lexicon is not None else None}',
+        ]
+        if not self.requires_grad:
+            args.append('frozen')
+        return ', '.join(args)
+
+    def __repr__(self) -> str:
+        return f'{self.__class__.__name__}({self.extra_repr()})'
+
+    def forward(self, batch: List[List[str]], output_layer: int = -1) -> Union[Tensor, PackedSequence]:
+        if self.config['token_embedder']['name'].lower() == 'cnn':
+            pad, text = read_list(batch, self.config['token_embedder']['max_characters_per_token'])
+        else:
+            pad, text = read_list(batch)
+
+        pad_w, pad_c, pad_ln, pad_mask, pad_text, recover_idx = create_batches(
+            pad, len(text), self.word_lexicon, self.char_lexicon, self.config, text=text)
+
+        ans = []
+        for word, char, length, mask, pads in zip(pad_w, pad_c, pad_ln, pad_mask, pad_text):
+            output = super(ELMoForManyLanguages, self).forward(word, char, mask)
+            for index, text in enumerate(pads):
+                if self.config['encoder']['name'].lower() == 'lstm':
+                    data = output[index, 1:length[index] - 1, :]
+                elif self.config['encoder']['name'].lower() == 'elmo':
+                    data = output[:, index, 1:length[index] - 1, :]
+
+                if output_layer == -1:
+                    payload = data.mean(dim=0)
+                else:
+                    payload = data[output_layer]
+                ans.append(payload if self.requires_grad else payload.detach())
+
+        ans = recover(ans, recover_idx)
+        if self.pack_output:
+            ans = pack_sequence(ans, enforce_sorted=False)
+        return ans
diff --git a/torchglyph/nn/embedding.py b/torchglyph/nn/embedding.py
index b227825..87fd302 100644
--- a/torchglyph/nn/embedding.py
+++ b/torchglyph/nn/embedding.py
@@ -1,22 +1,35 @@
-from typing import Union
+from typing import Union, Tuple
 
 import torch
 from einops import rearrange
-from torch import nn, Tensor
-from torch.nn.utils.rnn import pack_padded_sequence, PackedSequence
+from torch import Tensor
+from torch import nn
+from torch.nn.utils.rnn import PackedSequence, pack_sequence, pack_padded_sequence
 
-from torchglyph.functional import SupportPack
+from torchglyph.functional import SupportPackMeta
 
 
-class TokEmbedding(nn.Embedding, metaclass=SupportPack):
-    pass
+class TokEmbedding(nn.Embedding, metaclass=SupportPackMeta):
+    def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: int = None, unk_idx: int = None,
+                 max_norm: float = None, norm_type: float = 2., scale_grad_by_freq: bool = False,
+                 sparse: bool = False, _weight: Tensor = None):
+        super(TokEmbedding, self).__init__(
+            num_embeddings=num_embeddings, embedding_dim=embedding_dim,
+            padding_idx=padding_idx, max_norm=max_norm, norm_type=norm_type,
+            scale_grad_by_freq=scale_grad_by_freq, sparse=sparse, _weight=_weight,
+        )
+        self.unk_idx = unk_idx
+
+    @property
+    def unk(self) -> Tensor:
+        return self.weight[self.unk_idx]
 
 
 class SubLstmEmbedding(nn.Module):
     def __init__(self, num_embeddings: int, embedding_dim: int,
                  hidden_dim: int, dropout: float, num_layers: int = 1,
                  bias: bool = True, batch_first: bool = True,
-                 bidirectional: bool = True, padding_idx: int = None) -> None:
+                 bidirectional: bool = True, padding_idx: int = None, unk_idx: int = None) -> None:
         super(SubLstmEmbedding, self).__init__()
 
         self.embedding = nn.Embedding(
@@ -32,6 +45,13 @@ def __init__(self, num_embeddings: int, embedding_dim: int,
         )
 
         self.embedding_dim = self.rnn.hidden_size * (2 if self.rnn.bidirectional else 1)
+        self.unk_idx = unk_idx
+
+    @property
+    def unk(self) -> Tensor:
+        embedding = self.embedding.weight[None, self.unk_idx]
+        _, (encoding, _) = self.rnn(pack_sequence([embedding], enforce_sorted=True))
+        return rearrange(encoding, '(l d) a h -> l a (d h)', l=self.rnn.num_layers)[0, 0, :]
 
     def _padded_forward(self, sub: Tensor, tok_lengths: Tensor) -> Tensor:
         pack = pack_padded_sequence(
@@ -39,7 +59,7 @@ def _padded_forward(self, sub: Tensor, tok_lengths: Tensor) -> Tensor:
             rearrange(tok_lengths.clamp_min(1), 'a b -> (a b)'),
             batch_first=self.rnn.batch_first, enforce_sorted=False,
         )
-        
+
         embedding = pack._replace(data=self.dropout(self.embedding(pack.data)))
         _, (encoding, _) = self.rnn(embedding)
 
@@ -57,3 +77,33 @@ def forward(self, sub: Union[Tensor, PackedSequence], *args) -> Union[Tensor, Pa
             return self._padded_forward(sub, *args)
         else:
             return self._packed_forward(sub, *args)
+
+
+class ContiguousSubLstmEmbedding(nn.Module):
+    def __init__(self, num_embeddings: int, embedding_dim: int,
+                 hidden_dim: int, dropout: float, num_layers: int = 1,
+                 bias: bool = True, batch_first: bool = True,
+                 bidirectional: bool = True, padding_idx: int = None) -> None:
+        super(ContiguousSubLstmEmbedding, self).__init__()
+
+        self.embedding = nn.Embedding(
+            num_embeddings=num_embeddings,
+            embedding_dim=embedding_dim,
+            padding_idx=padding_idx,
+        )
+        self.dropout = nn.Dropout(dropout)
+        self.rnn = nn.LSTM(
+            input_size=self.embedding.embedding_dim,
+            hidden_size=hidden_dim, num_layers=num_layers, bias=bias,
+            batch_first=batch_first, bidirectional=bidirectional,
+        )
+
+        self.embedding_dim = self.rnn.hidden_size * (2 if self.rnn.bidirectional else 1)
+
+    def forward(self, sub: PackedSequence, indices: Tuple[PackedSequence, PackedSequence]) -> PackedSequence:
+        embedding = sub._replace(data=self.dropout(self.embedding(sub.data)))
+        encoding, _ = self.rnn(embedding)  # type: (PackedSequence, _)
+
+        fidx, bidx = indices
+        fenc, benc = encoding.data.chunk(2, dim=-1)
+        return fidx._replace(data=torch.cat([fenc[fidx.data], benc[bidx.data]], dim=-1))
diff --git a/torchglyph/pipe/contiguous.py b/torchglyph/pipe/contiguous.py
new file mode 100644
index 0000000..25e0ae8
--- /dev/null
+++ b/torchglyph/pipe/contiguous.py
@@ -0,0 +1,33 @@
+from typing import Union, Optional, Tuple
+
+import torch
+
+from torchglyph.pipe import PackedTokSeqPipe
+from torchglyph.pipe import Pipe
+from torchglyph.pipe import THRESHOLD
+from torchglyph.proc import GetLength, Lift, ToTensor
+from torchglyph.proc.collecting import ToDevice
+from torchglyph.proc.contiguous import BuildContiguousSub, BuildContiguousSubPtr, PackContiguousSubPtr
+
+
+class PackedContiguousSubPipe(PackedTokSeqPipe):
+    def __init__(self, device: Union[int, torch.device], unk_token: Optional[str],
+                 seq_token: str, special_tokens: Tuple[Optional[str], ...] = (),
+                 threshold: int = THRESHOLD, dtype: torch.dtype = torch.long) -> None:
+        super(PackedContiguousSubPipe, self).__init__(
+            device=device, unk_token=unk_token, special_tokens=special_tokens,
+            threshold=threshold, dtype=dtype,
+        )
+        self.with_(
+            pre=BuildContiguousSub(seq_token=seq_token) + ...,
+        )
+
+
+class PackedContiguousSubPtrPipe(Pipe):
+    def __init__(self, device: Union[int, torch.device], dtype: torch.dtype = torch.long) -> None:
+        super(PackedContiguousSubPtrPipe, self).__init__(
+            pre=Lift(GetLength()) + BuildContiguousSubPtr() + Lift(ToTensor(dtype=dtype)),
+            vocab=None,
+            post=None,
+            batch=PackContiguousSubPtr(enforce_sorted=False) + ToDevice(device=device),
+        )
diff --git a/torchglyph/pipe/ctx.py b/torchglyph/pipe/ctx.py
new file mode 100644
index 0000000..58ae589
--- /dev/null
+++ b/torchglyph/pipe/ctx.py
@@ -0,0 +1,18 @@
+from typing import Union
+
+import torch
+
+from torchglyph.pipe import Pipe
+from torchglyph.proc import ToDevice
+from torchglyph.proc.ctx import PadELMo
+from torchglyph.proc.tokenizer import ELMoTokenizer
+
+
+class ELMoPipe(Pipe):
+    def __init__(self, device: Union[int, torch.device]):
+        super(ELMoPipe, self).__init__(
+            pre=ELMoTokenizer(),
+            vocab=None,
+            post=None,
+            batch=PadELMo() + ToDevice(device=device),
+        )
diff --git a/torchglyph/pipe/seq.py b/torchglyph/pipe/seq.py
index a5b9ff2..f0f7c6e 100644
--- a/torchglyph/pipe/seq.py
+++ b/torchglyph/pipe/seq.py
@@ -90,7 +90,7 @@ class PackedSeqPtrSeqPipe(PackedIdxSeqPipe):
     def __init__(self, device: Union[int, torch.device], dtype: torch.dtype = torch.long) -> None:
         super(PackedSeqPtrSeqPipe, self).__init__(device=device, dtype=dtype)
         self.with_(
-            pre=GetMask(token=0),
+            post=GetMask(token=0) + ...,
             batch=Scan(fn=cum_seq, init=0) + ...,
         )
 
diff --git a/torchglyph/proc/collecting.py b/torchglyph/proc/collecting.py
index 1ce3fc5..2783c6f 100644
--- a/torchglyph/proc/collecting.py
+++ b/torchglyph/proc/collecting.py
@@ -1,5 +1,6 @@
 from typing import Any, Union, List, Tuple
 
+import numpy as np
 import torch
 from torch import Tensor
 from torch.nn.utils.rnn import pad_sequence, PackedSequence, pack_sequence, pad_packed_sequence
@@ -9,7 +10,8 @@
 
 
 class ToDevice(Proc):
-    Batch = Union[Tensor, PackedSequence, Tuple[Union[Tensor, PackedSequence], ...]]
+    Item = Union[int, float, bool, Tensor, PackedSequence]
+    Batch = Union[Item, Tuple[Item, ...]]
 
     def __init__(self, device: Union[int, torch.device]) -> None:
         super(ToDevice, self).__init__()
@@ -24,9 +26,11 @@ def extra_repr(self) -> str:
         return f'{self.device}'
 
     def __call__(self, batch: Batch, vocab: Vocab, **kwargs) -> Batch:
-        if isinstance(batch, (PackedSequence, Tensor)):
-            return batch.to(self.device)
-        return type(batch)([self(e, vocab=vocab) for e in batch])
+        if isinstance(batch, (Tensor, PackedSequence)):
+            return batch.to(device=self.device)
+        if isinstance(batch, (list, tuple)):
+            return type(batch)([self(e, vocab=vocab) for e in batch])
+        return batch
 
 
 class ToTensor(Proc):
@@ -39,7 +43,9 @@ def extra_repr(self) -> str:
 
     def __call__(self, data: Any, **kwargs) -> Tensor:
         try:
-            return torch.tensor(data, dtype=self.dtype, requires_grad=False)
+            if isinstance(data, np.ndarray):
+                return torch.from_numpy(data).to(dtype=self.dtype).requires_grad_(False)
+            return torch.tensor(data, dtype=self.dtype).requires_grad_(False)
         except ValueError as err:
             if err.args[0] == "too many dimensions 'str'":
                 raise ValueError(f"'{data}' can not be converted to {Tensor.__name__}")
diff --git a/torchglyph/proc/contiguous.py b/torchglyph/proc/contiguous.py
new file mode 100644
index 0000000..8cd5b11
--- /dev/null
+++ b/torchglyph/proc/contiguous.py
@@ -0,0 +1,59 @@
+from typing import List, Tuple
+
+import torch
+from torch import Tensor
+from torch.nn.utils.rnn import PackedSequence
+from torch.nn.utils.rnn import pack_sequence
+from torch.nn.utils.rnn import pad_packed_sequence
+
+from torchglyph.proc.abc import Proc
+
+
+class BuildContiguousSub(Proc):
+    def __init__(self, seq_token: str) -> None:
+        super(BuildContiguousSub, self).__init__()
+        self.seq_token = seq_token
+
+    def extra_repr(self) -> str:
+        return repr(self.seq_token)
+
+    def __call__(self, tokens: List[str], **kwargs) -> List[str]:
+        zs = []
+        for token in tokens:
+            zs.extend(list(token))
+            zs.append(self.seq_token)
+        return zs[:-1]
+
+
+class BuildContiguousSubPtr(Proc):
+    def __call__(self, lengths: List[int], **kwargs) -> Tuple[List[int], List[int]]:
+        indices = [0]
+        for length in lengths:
+            indices.append(indices[-1] + length + 1)
+        return [index - 2 for index in indices[1:]], indices[:-1]
+
+
+class PackContiguousSubPtr(Proc):
+    def __init__(self, enforce_sorted: bool) -> None:
+        super(PackContiguousSubPtr, self).__init__()
+        self.enforce_sorted = enforce_sorted
+
+    def extra_repr(self) -> str:
+        return f'enforce_sorted={self.enforce_sorted}'
+
+    def __call__(self, indices: List[Tuple[Tensor, Tensor]], **kwargs) -> Tuple[PackedSequence, PackedSequence]:
+        fidx, bidx = zip(*indices)
+
+        pack = pack_sequence([
+            torch.empty((f.max().item() + 1,), dtype=torch.long) for f in fidx
+        ], enforce_sorted=self.enforce_sorted)
+        indices = pack._replace(data=torch.arange(pack.data.size(0), device=pack.data.device))
+        indices, _ = pad_packed_sequence(indices, batch_first=True)
+
+        fidx = pack_sequence([
+            indices[i, f] for i, f in enumerate(fidx)
+        ], enforce_sorted=self.enforce_sorted)
+        bidx = pack_sequence([
+            indices[i, b] for i, b in enumerate(bidx)
+        ], enforce_sorted=self.enforce_sorted)
+        return fidx, bidx
diff --git a/torchglyph/proc/ctx.py b/torchglyph/proc/ctx.py
new file mode 100644
index 0000000..0e2b786
--- /dev/null
+++ b/torchglyph/proc/ctx.py
@@ -0,0 +1,17 @@
+from typing import List
+
+from allennlp.data import Instance as AllenInstance, Vocabulary as AllenVocabulary
+from allennlp.data.dataset import Batch as AllenBatch
+from torch import Tensor
+
+from torchglyph.io import toggle_loggers
+from torchglyph.proc import Proc
+
+toggle_loggers('allennlp', False)
+
+
+class PadELMo(Proc):
+    def __call__(self, data: List[AllenInstance], *args, **kwargs) -> Tensor:
+        batch = AllenBatch(data)
+        batch.index_instances(AllenVocabulary())
+        return batch.as_tensor_dict()['elmo']['character_ids']
diff --git a/torchglyph/proc/infer.py b/torchglyph/proc/infer.py
new file mode 100644
index 0000000..d83faf1
--- /dev/null
+++ b/torchglyph/proc/infer.py
@@ -0,0 +1,9 @@
+from typing import List
+
+from torchglyph.proc import Proc
+from torchglyph.vocab import Vocab
+
+
+class RevVocab(Proc):
+    def __call__(self, xs: List[int], vocab: Vocab, **kwargs) -> List[str]:
+        return [vocab.itos[x] for x in xs]
diff --git a/torchglyph/proc/tokenizer.py b/torchglyph/proc/tokenizer.py
new file mode 100644
index 0000000..8d244dc
--- /dev/null
+++ b/torchglyph/proc/tokenizer.py
@@ -0,0 +1,102 @@
+from typing import Union, List
+
+import transformers
+from allennlp.data import Token as AllenToken, Instance as AllenInstance
+from allennlp.data.fields import TextField as AllenTextField
+from allennlp.data.token_indexers import ELMoTokenCharactersIndexer
+
+from torchglyph.io import toggle_loggers
+from torchglyph.proc import Proc
+
+toggle_loggers('allennlp', False)
+toggle_loggers('transformers', False)
+
+
+class ELMoTokenizer(Proc):
+    def __init__(self) -> None:
+        super(ELMoTokenizer, self).__init__()
+        self.tokenizer = ELMoTokenCharactersIndexer()
+
+    def __call__(self, data: List[str], *args, **kwargs):
+        data = [AllenToken(token) for token in data]
+        return AllenInstance({"elmo": AllenTextField(data, {'character_ids': self.tokenizer})})
+
+
+class TransformerTokenizerProc(Proc):
+    def __init__(self, weight: str) -> None:
+        super(TransformerTokenizerProc, self).__init__()
+        self.weightt = weight
+
+    def extra_repr(self) -> str:
+        return f'weight={self.weight}'
+
+    def __call__(self, data: Union[str, List[str]], **kwargs) -> List[int]:
+        if not isinstance(data, str):
+            data = ' '.join(data)
+        return self.tokenizer.encode(data)
+
+
+class BertTokenizer(TransformerTokenizerProc):
+    def __init__(self, weight: str = 'bert-base-uncased') -> None:
+        super(BertTokenizer, self).__init__(weight=weight)
+        self.tokenizer = transformers.BertTokenizer.from_pretrained(weight)
+
+
+class OpenAIGPTTokenizer(TransformerTokenizerProc):
+    def __init__(self, weight: str = 'openai-gpt') -> None:
+        super(OpenAIGPTTokenizer, self).__init__(weight=weight)
+        self.tokenizer = transformers.OpenAIGPTTokenizer.from_pretrained(weight)
+
+
+class GPT2Tokenizer(TransformerTokenizerProc):
+    def __init__(self, weight: str = 'gpt2') -> None:
+        super(GPT2Tokenizer, self).__init__(weight=weight)
+        self.tokenizer = transformers.GPT2Tokenizer.from_pretrained(weight)
+
+
+class CTRLTokenizer(TransformerTokenizerProc):
+    def __init__(self, weight: str = 'ctrl') -> None:
+        super(CTRLTokenizer, self).__init__(weight=weight)
+        self.tokenizer = transformers.CTRLTokenizer.from_pretrained(weight)
+
+
+class TransfoXLTokenizer(TransformerTokenizerProc):
+    def __init__(self, weight: str = 'transfo-xl-wt103') -> None:
+        super(TransfoXLTokenizer, self).__init__(weight=weight)
+        self.tokenizer = transformers.TransfoXLTokenizer.from_pretrained(weight)
+
+
+class XLNetTokenizer(TransformerTokenizerProc):
+    def __init__(self, weight: str = 'xlnet-base-cased') -> None:
+        super(XLNetTokenizer, self).__init__(weight=weight)
+        self.tokenizer = transformers.XLNetTokenizer.from_pretrained(weight)
+
+
+class XLMTokenizer(TransformerTokenizerProc):
+    def __init__(self, weight: str = 'xlm-mlm-enfr-1024') -> None:
+        super(XLMTokenizer, self).__init__(weight=weight)
+        self.tokenizer = transformers.XLMTokenizer.from_pretrained(weight)
+
+
+class DistilBertTokenizer(TransformerTokenizerProc):
+    def __init__(self, weight: str = 'distilbert-base-cased') -> None:
+        super(DistilBertTokenizer, self).__init__(weight=weight)
+        self.tokenizer = transformers.DistilBertTokenizer.from_pretrained(weight)
+
+
+class RobertaTokenizer(TransformerTokenizerProc):
+    def __init__(self, weight: str = 'roberta-base') -> None:
+        super(RobertaTokenizer, self).__init__(weight=weight)
+        self.tokenizer = transformers.RobertaTokenizer.from_pretrained(weight)
+
+
+class XLMRobertaTokenizer(TransformerTokenizerProc):
+    def __init__(self, weight: str = 'xlm-roberta-base') -> None:
+        super(XLMRobertaTokenizer, self).__init__(weight=weight)
+        self.tokenizer = transformers.XLMRobertaTokenizer.from_pretrained(weight)
+
+
+class BartTokenizer(TransformerTokenizerProc):
+    def __init__(self, weight: str = 'bart-large') -> None:
+        super(BartTokenizer, self).__init__(weight=weight)
+        self.tokenizer = transformers.BartTokenizer.from_pretrained(weight)
diff --git a/torchglyph/proc/vocab.py b/torchglyph/proc/vocab.py
index f42f2a3..ad47e9c 100644
--- a/torchglyph/proc/vocab.py
+++ b/torchglyph/proc/vocab.py
@@ -5,6 +5,8 @@
 from torchglyph.proc import Proc
 from torchglyph.vocab import Vocab, Vectors, Glove, FastTest
 
+logger = logging.getLogger(__name__)
+
 
 class UpdateCounter(Proc):
     def __call__(self, data: Union[str, List[str]], counter: Counter, *args, **kwargs) -> Union[str, List[str]]:
@@ -61,53 +63,59 @@ def __call__(self, vocab: Vocab, name: str, *args, **kwargs) -> Vocab:
         occ_avg = sum(vocab.freq.values()) / max(1, tok_cnt)
 
         name = f"{vocab.__class__.__name__} '{name}'"
-        logging.info(f"{name} has {tok_cnt} token(s) => "
-                     f"{occ_avg:.1f} occurrence(s)/token ["
-                     f"{occ_max} :: '{tok_max}', "
-                     f"{occ_min} :: '{tok_min}']")
+        logger.info(f"{name} has {tok_cnt} token(s) => "
+                    f"{occ_avg:.1f} occurrence(s)/token ["
+                    f"{occ_max} :: '{tok_max}', "
+                    f"{occ_min} :: '{tok_min}']")
         if tok_cnt <= self.threshold:
-            logging.info(f'{name} => [{", ".join(vocab.itos)}]')
+            logger.info(f'{name} => [{", ".join(vocab.itos)}]')
         else:
-            logging.info(f'{name} => ['
-                         f'{", ".join(vocab.itos[:self.threshold // 2])}, ..., '
-                         f'{", ".join(vocab.itos[-self.threshold // 2:])}]')
+            logger.info(f'{name} => ['
+                        f'{", ".join(vocab.itos[:self.threshold // 2])}, ..., '
+                        f'{", ".join(vocab.itos[-self.threshold // 2:])}]')
 
         return vocab
 
 
 class LoadVectors(Proc):
-    def __init__(self, vectors: Vectors, *fallbacks) -> None:
+    def __init__(self, *fallback_fns, vectors: Vectors, remove_missing: bool) -> None:
         super(LoadVectors, self).__init__()
+        self.fallback_fns = fallback_fns
         self.vectors = vectors
-        self.fallbacks = fallbacks
+        self.remove_missing = remove_missing
 
     def extra_repr(self) -> str:
         return ', '.join([
+            *[f'{f.__name__}' for f in self.fallback_fns],
             f'{self.vectors.extra_repr()}',
-            *[f'{f.__name__}' for f in self.fallbacks],
+            f'remove_missing={self.remove_missing}',
         ])
 
     def __call__(self, vocab: Vocab, name: str, *args, **kwargs) -> Vocab:
         assert vocab is not None, f"did you forget '{BuildVocab.__name__}' before '{LoadVectors.__name__}'?"
 
-        tok, occ = vocab.load_vectors(self.vectors, *self.fallbacks)
+        if self.remove_missing:
+            vocab = vocab.union(self.vectors, *self.fallback_fns)
+        tok, occ = vocab.load_vectors(*self.fallback_fns, vectors=self.vectors)
         tok = tok / max(1, len(vocab.freq.values())) * 100
         occ = occ / max(1, sum(vocab.freq.values())) * 100
-        logging.info(f"{self.vectors} hits {tok:.1f}% tokens and {occ:.1f}% occurrences of {Vocab.__name__} '{name}'")
+        logger.info(f"{self.vectors} hits {tok:.1f}% tokens and {occ:.1f}% occurrences of {Vocab.__name__} '{name}'")
         return vocab
 
 
 class LoadGlove(LoadVectors):
-    def __init__(self, name: str, dim: int, *fallbacks) -> None:
+    def __init__(self, *fallback_fns, name: str, dim: int, remove_missing: bool) -> None:
         super(LoadGlove, self).__init__(
-            Glove(name=name, dim=dim),
-            *fallbacks,
+            *fallback_fns,
+            vectors=Glove(name=name, dim=dim),
+            remove_missing=remove_missing,
         )
 
 
 class LoadFastText(LoadVectors):
-    def __init__(self, lang: str, *fallbacks) -> None:
+    def __init__(self, *fallback_fns, lang: str, remove_missing: bool) -> None:
         super(LoadFastText, self).__init__(
-            FastTest(lang=lang),
-            *fallbacks,
+            *fallback_fns,
+            vectors=FastTest(lang=lang),
+            remove_missing=remove_missing,
         )
diff --git a/torchglyph/vocab.py b/torchglyph/vocab.py
index ca7b0f6..428dea8 100644
--- a/torchglyph/vocab.py
+++ b/torchglyph/vocab.py
@@ -2,7 +2,7 @@
 from collections import Counter
 from collections import defaultdict
 from pathlib import Path
-from typing import Union, Optional, Tuple, Callable, List
+from typing import Optional, Tuple, Callable, List
 
 import torch
 from torch import Tensor
@@ -12,6 +12,8 @@
 from torchglyph import data_path
 from torchglyph.io import download_and_unzip
 
+logger = logging.getLogger(__name__)
+
 
 class Vocab(object):
     def __init__(self, counter: Counter,
@@ -77,48 +79,26 @@ def __len__(self) -> int:
     def __contains__(self, token: str) -> bool:
         return token in self.stoi
 
-    def __and__(self, rhs: Union['Counter', 'Vocab']) -> 'Vocab':
-        if isinstance(rhs, Vocab):
-            rhs = rhs.freq
-        return Vocab(
-            counter=Counter({
-                token: freq
-                for token, freq in self.freq.items()
-                if token in rhs
-            }),
-            unk_token=self.unk_token,
-            pad_token=self.pad_token,
-            special_tokens=self.special_tokens,
-            max_size=self.max_size, min_freq=self.min_freq,
-        )
+    def union(self, rhs: 'Vocab', *fallback_fns) -> 'Vocab':
+        counter = Counter()
 
-    def __add__(self, rhs: Union['Counter', 'Vocab']) -> 'Vocab':
-        if isinstance(rhs, Vocab):
-            rhs = rhs.freq
-        return Vocab(
-            counter=Counter({
-                token: self.freq[token] + rhs[token]
-                for token in {*self.freq.keys(), *rhs.keys()}
-            }),
-            unk_token=self.unk_token,
-            pad_token=self.pad_token,
-            special_tokens=self.special_tokens,
-            max_size=self.max_size, min_freq=self.min_freq,
-        )
+        for token, freq in self.freq.items():
+            if token in rhs.stoi:
+                counter[token] = freq
+            else:
+                for fallback_fn in fallback_fns:
+                    new_token = fallback_fn(token)
+                    if new_token in rhs.stoi:
+                        counter[new_token] = freq
+                        break
 
-    def __sub__(self, rhs: Union['Counter', 'Vocab']) -> 'Vocab':
-        if isinstance(rhs, Vocab):
-            rhs = rhs.freq
         return Vocab(
-            counter=Counter({
-                token: freq
-                for token, freq in self.freq.items()
-                if token not in rhs
-            }),
+            counter=counter,
             unk_token=self.unk_token,
             pad_token=self.pad_token,
             special_tokens=self.special_tokens,
-            max_size=self.max_size, min_freq=self.min_freq,
+            max_size=self.max_size,
+            min_freq=self.min_freq,
         )
 
     @property
@@ -133,12 +113,12 @@ def vec_dim(self) -> int:
             return 0
         return self.vectors.size(1)
 
-    def load_vectors(self, vectors: 'Vectors', *fallbacks) -> Tuple[int, int]:
+    def load_vectors(self, *fallback_fns, vectors: 'Vectors') -> Tuple[int, int]:
         self.vectors = torch.empty((len(self), vectors.vec_dim), dtype=torch.float32)
 
         tok, occ = 0, 0
         for token, index in self.stoi.items():
-            if vectors.query_(token, self.vectors[index], *fallbacks):
+            if vectors.query_(token, self.vectors[index], *fallback_fns):
                 tok += 1
                 occ += self.freq[token]
 
@@ -148,63 +128,64 @@ def load_vectors(self, vectors: 'Vectors', *fallbacks) -> Tuple[int, int]:
         return tok, occ
 
     def save(self, path: Path) -> None:
-        logging.info(f'saving {self.__class__.__name__} to {path}')
+        logger.info(f'saving {self.__class__.__name__} to {path}')
         torch.save((self.stoi, self.itos, self.vectors), path)
 
     def load(self, path: Path) -> None:
-        logging.info(f'loading {self.__class__.__name__} from {path}')
+        logger.info(f'loading {self.__class__.__name__} from {path}')
         self.stoi, self.itos, self.vectors = torch.load(path)
 
 
 class Vectors(Vocab):
     def __init__(self, urls_dest: List[Tuple[str, Path]], path: Path,
-                 has_head_info: bool, unk_init_: Callable[[Tensor], Tensor] = init.normal_) -> None:
+                 heading: bool, unicode_error: str = 'replace', dtype: torch.dtype = torch.float32,
+                 unk_init_: Callable[[Tensor], Tensor] = init.normal_) -> None:
         super(Vectors, self).__init__(
             counter=Counter(),
             unk_token=None, pad_token=None,
             special_tokens=(), max_size=None, min_freq=1,
         )
 
-        self.vectors = []
+        vectors = []
         self.unk_init_ = unk_init_
 
-        pt_path = path.with_suffix('.pt')
-        if not pt_path.exists():
+        dump_path = path.with_suffix('.pt')
+        if not dump_path.exists():
             if not path.exists():
                 for url, dest in urls_dest:
                     download_and_unzip(url, dest)
 
             with path.open('rb') as fp:
-                vec_dim = None
+                vector_dim = None
 
-                iteration = tqdm(fp, desc=f'reading {path}', unit=' tokens')
-                for raw in iteration:  # type:bytes
-                    if has_head_info:
-                        _, vec_dim = map(int, raw.strip().split(b' '))
-                        has_head_info = False
+                for raw in tqdm(fp, desc=f'reading {path}', unit=' lines'):  # type: bytes
+                    if heading:
+                        _, vector_dim = map(int, raw.rstrip().split(b' '))
+                        heading = False
                         continue
                     token, *vs = raw.rstrip().split(b' ')
 
-                    if vec_dim is None:
-                        vec_dim = len(vs)
-                    elif vec_dim != len(vs):
-                        raise ValueError(f'vector dimensions are not consistent, {vec_dim} != {len(vs)}')
+                    if vector_dim is None:
+                        vector_dim = len(vs)
+                    elif vector_dim != len(vs):
+                        logger.error(f'vector dimensions are not consistent, {vector_dim} != {len(vs)} :: {token}')
+                        continue
 
-                    self.add_token_(str(token, encoding='utf-8'))
-                    self.vectors.append(torch.tensor([float(v) for v in vs], dtype=torch.float32))
+                    self.add_token_(str(token, encoding='utf-8', errors=unicode_error))
+                    vectors.append(torch.tensor([float(v) for v in vs], dtype=dtype))
 
-            self.vectors = torch.stack(self.vectors, 0)
-            self.save(pt_path)
+            self.vectors = torch.stack(vectors, 0)
+            self.save(dump_path)
         else:
-            self.load(pt_path)
+            self.load(dump_path)
 
     @torch.no_grad()
-    def query_(self, token: str, vector: Tensor, *fallbacks) -> bool:
+    def query_(self, token: str, vector: Tensor, *fallback_fns) -> bool:
         if token in self:
             vector[:] = self.vectors[self.stoi[token]]
             return True
-        for fallback in fallbacks:
-            new_token = fallback(token)
+        for fallback_fn in fallback_fns:
+            new_token = fallback_fn(token)
             if new_token in self:
                 vector[:] = self.vectors[self.stoi[new_token]]
                 return True
@@ -214,23 +195,35 @@ def query_(self, token: str, vector: Tensor, *fallbacks) -> bool:
 
 class Glove(Vectors):
     def __init__(self, name: str, dim: int) -> None:
+        path = data_path / f'glove.{name}'
         super(Glove, self).__init__(
             urls_dest=[(
                 f'http://nlp.stanford.edu/data/glove.{name}.zip',
-                data_path / f'glove.{name}' / f'glove.{name}.zip'
+                path / f'glove.{name}.zip'
             )],
-            path=data_path / f'glove.{name}' / f'glove.{name}.{dim}d.txt',
-            has_head_info=False,
+            path=path / f'glove.{name}.{dim}d.txt', heading=False,
         )
 
 
 class FastTest(Vectors):
     def __init__(self, lang: str) -> None:
+        path = data_path / 'fasttext'
         super(FastTest, self).__init__(
             urls_dest=[(
                 f'https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.{lang}.vec',
-                data_path / 'fasttext' / f'wiki.{lang}.vec',
+                path / f'wiki.{lang}.vec',
+            )],
+            path=path / f'wiki.{lang}.vec', heading=True,
+        )
+
+
+class NLPLVectors(Vectors):
+    def __init__(self, index: int, repository: str = '20', name: str = 'model.txt', heading: bool = False) -> None:
+        path = data_path / 'nlpl' / f'{index}'
+        super(NLPLVectors, self).__init__(
+            urls_dest=[(
+                f'http://vectors.nlpl.eu/repository/{repository}/{index}.zip',
+                path / f'{index}.zip',
             )],
-            path=data_path / 'fasttext' / f'wiki.{lang}.vec',
-            has_head_info=True,
+            path=path / name, heading=heading,
         )