fairseq.tconf

##################################################################################################
# Packages used
##################################################################################################

package sockeye :: .versioner=git .repo="https://github.com/mjpost/sockeye" .ref=HEAD { }
package sacrebleu :: .versioner=pip .package="sacrebleu" .tag="1.5.0" { }
package subword_nmt :: .versioner=pip .package="subword-nmt" .tag="0.3.5" { }
package mosesdecoder :: .versioner=git .repo="https://github.com/moses-smt/mosesdecoder" .ref=HEAD { }
package wmt16_scripts :: .versioner=git .repo="https://github.com/rsennrich/wmt16-scripts" .ref=HEAD { }

package sentencepiece :: .versioner=git .repo="https://github.com/google/sentencepiece" .ref="tags/v0.1.5" {  # v0.1.6 throws segfault
  mkdir build
  cd build
  cmake ..
  make -j $(nproc)
}

package tools
    :: .versioner=git .repo="https://github.com/shuoyangd/tape4nmt-tools" .ref=HEAD {
  pip install -r requirements.txt
}

# using my fork for now, as fairseq evolves pretty fast
package fairseq
    :: .versioner=git .repo="https://github.com/shuoyangd/fairseq" .ref=dev0101 {

  python setup.py build develop
}

global {

  ##################################################################################################
  # Data-related stuff
  ##################################################################################################

  SRC=(TrainDataSource:
    iwslt_deen_2014="de"
  )
  TRG=(TrainDataSource:
    iwslt_deen_2014="en"
  )
  trg_lang=en  # FIXME (only used by wrap_xml, under some rare cases)

  train_data=(TrainDataSource:
    iwslt_deen_2014=(side:
      src="/path/to/iwslt/train.tags.nourl.de-en.de"
      trg="/path/to/iwslt/train.tags.nourl.de-en.en"
    )
  )

  dev_data=(DevDataSource:
    iwslt_deen_dev2010=(side:
      src="/path/to/iwslt/IWSLT14.TED.dev2010.de-en.de.xml"
      trg="/path/to/iwslt/IWSLT14.TED.dev2010.de-en.en.xml"
    )
    iwslt_deen_dev2012=(side:
      src="/path/to/iwslt/IWSLT14.TEDX.dev2012.de-en.de.xml"
      trg="/path/to/iwslt/IWSLT14.TEDX.dev2012.de-en.en.xml"
    )
  )

  test_data=(TestDataSource:
    iwslt_deen_test2010=(side:
      src="/path/to/iwslt/IWSLT14.TED.tst2010.de-en.de.xml"
      trg="/path/to/iwslt/IWSLT14.TED.tst2010.de-en.en.xml"
    )
    iwslt_deen_test2011=(side:
      src="/path/to/iwslt/IWSLT14.TED.tst2011.de-en.de.xml"
      trg="/path/to/iwslt/IWSLT14.TED.tst2011.de-en.en.xml"
    )
    iwslt_deen_test2012=(side:
      src="/path/to/iwslt/IWSLT14.TED.tst2012.de-en.de.xml"
      trg="/path/to/iwslt/IWSLT14.TED.tst2012.de-en.en.xml"
    )
  )

  ##################################################################################################
  # General options you should set for your environment
  ##################################################################################################

  # All ducttape files will be written underneath this directory
  ducttape_output="out"

  # TRAINING CONFIGURATIONS
  # all default is consistent with nematus
  train_train_from="" # if there is a previous model to start with
  train_train_from_state_dict="" # if there is a previous dict to start with
  train_start_epoch="" # if trained for certain amount of epochs previously

  train_batch_size="80"
  train_optim="adam"
  train_dropout=(Dropout: 0.1 0.3 0.5)
  train_lr="5e-4"
  train_lr_min="9e-8"
  # train_lr_min=""
  train_lr_shrink="0.5"
  train_lr_scheduler=(LrScheduler: Default="" Transformer="inverse_sqrt")
  train_warmup_init_lr=(WarmUpLr: Default="" Transformer="1e-07")
  train_warmup_updates=(WarmUpUpdates: Default="" Transformer="4000")
  train_criterion=(Criterion: CE="" Transformer="label_smoothed_cross_entropy")
  train_label_smoothing=(LabelSmoothing: Default="" Transformer="0.1")
  train_clip_norm=(ClipNorm: 0.0 0.1 0.5 1 5)
  train_max_tokens="4000"
  train_max_epochs="50"
  train_keep_last_epochs="8"
  train_weight_decay=(WeightDecay: Default="" Transformer="0.0001")
  train_update_freq=(UpdateFreq: Default="" Transformer="16")
  train_seed="2"
  train_arch=(Architecture: conv="fconv" transformer="transformer" fconv_iwslt_de_en="fconv_iwslt_de_en" transformer_iwslt_de_en="transformer_iwslt_de_en" transformer_wmt_en_de="transformer_wmt_en_de" fconv_wmt_en_de="fconv_wmt_en_de" lstm_wiseman_iwslt_de_en="lstm_wiseman_iwslt_de_en")
  train_share_input_output_embed=""
  train_skip_invalid_size_inputs_valid_test="yes"

  # TEST CONFIGURATIONS
  test_model_selection_strategy="acc"
  test_max_sent_length="300"
  test_beam_size="12"
  test_batch_size="32"
  test_replace_unk="True"
  test_remove_bpe=""

  ##################################################################################################
  # Job parameters
  ##################################################################################################

  # SGE: generic job flags
  resource_flags="-l mem_free=2g"

  # SGE: larger job flags
  resource_flags_16g="-l mem_free=16g"

  # SGE: flags for training a model
  resource_flags_train="-q g.q -l gpu=1,mem_free=4g"

  # SGE: flags for decoding
  resource_flags_decode="-q g.q -l gpu=1,mem_free=4g"

  # SGE: flags for notifying about job completion (put in your email address!)
  action_flags="-m ae -M YOUR_EMAIL_HERE"

  # The default submitter: shell (run locally) or sge (run on a grid)
  submitter=(TestMode: no="sge" yes="shell")

  # Virtual env location. This should be a file path to the virtual env you want loaded before tasks.
  # This variable supports both conda and Python's virtualenv. For conda, use "conda:ENV" as the value,
  # where "ENV" is the name of the conda environment that should be loaded. For virtualenv, supply
  # the path to the script that should be loaded.
  pyenv=(TestMode: no="conda:sockeye" yes="conda:sockeye-cpu")

  ##################################################################################################
  # Preprocessing options
  ##################################################################################################

  # sentencepiece options
  sentencepiece_vocab_size=8000
  sentencepiece_model_type="unigram"

  # no of BPE operations
  bpe_operations=(BpeMergeOps: 49500 9500)

  # options for cleaning training data
  MaxLen=80
  Ratio=1

  # flags for moses tokenizer
  tokenizer_flags="-no-escape -a -q"

  use_cpu=(TestMode: no yes)
}