diff --git a/.travis.yml b/.travis.yml index 6c95c83..62a309c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,8 +10,6 @@ python: install: - pip install -U pip - pip -q install -r requirements.txt - - pip -q install "http://download.pytorch.org/whl/cu75/torch-0.2.0.post1-cp27-cp27mu-manylinux1_x86_64.whl; python_version == '2.7'" - - pip -q install "http://download.pytorch.org/whl/cu75/torch-0.2.0.post1-cp36-cp36m-manylinux1_x86_64.whl; python_version == '3.6'" # dev dependencies - pip install flake8 @@ -32,4 +30,4 @@ script: # Unit test - nosetests --with-coverage --cover-erase --cover-package=seq2seq # Integration test - - "if [[ $TRAVIS_BRANCH =~ (master|develop) ]]; then python setup.py install && scripts/integration_test.sh; fi" \ No newline at end of file + - "if [[ $TRAVIS_BRANCH =~ (master|develop) ]]; then python setup.py install && scripts/integration_test.sh; fi" diff --git a/docs/public/_modules/index.html b/docs/public/_modules/index.html index c5eae05..34c956b 100644 --- a/docs/public/_modules/index.html +++ b/docs/public/_modules/index.html @@ -8,7 +8,7 @@ - Overview: module code — pytorch-seq2seq 0.1.5 documentation + Overview: module code — pytorch-seq2seq 0.1.6 documentation @@ -35,7 +35,7 @@ - + @@ -64,7 +64,7 @@
- 0.1.5 + 0.1.6
@@ -164,8 +164,6 @@

All modules for which code is available

@@ -215,7 +212,7 @@

All modules for which code is available

- - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.dataset.dataset
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.dataset.dataset

-import random
-from seq2seq.dataset import Vocabulary, utils
-
-
[docs]class Dataset(object): - """ - A class that encapsulates a dataset. - - Warning: - Do not use this constructor directly, use one of the class methods to initialize. - - Note: - Source or target sequences that are longer than the respective - max length will be filtered. - - Args: - src_max_len (int): maximum source sequence length - tgt_max_len (int): maximum target sequence length - """ - - def __init__(self, src_max_len, tgt_max_len): - # Prepare data - self.src_max_len = src_max_len - self.tgt_max_len = tgt_max_len - - # Declare vocabulary objects - self.input_vocab = None - self.output_vocab = None - - self.data = None - - -
[docs] @classmethod - def from_file(cls, path, src_max_len, tgt_max_len, src_vocab=None, tgt_vocab=None, src_max_vocab=50000, - tgt_max_vocab=50000): - """ - Initialize a dataset from the file at given path. The file - must contains a list of TAB-separated pairs of sequences. - - Note: - Source or target sequences that are longer than the respective - max length will be filtered. - As specified by maximum vocabulary size, source and target - vocabularies will be sorted in descending token frequency and cutoff. - Tokens that are in the dataset but not retained in the vocabulary - will be dropped in the sequences. - - Args: - path (str): path to the dataset file - src_max_len (int): maximum source sequence length - tgt_max_len (int): maximum target sequence length - src_vocab (Vocabulary): pre-populated Vocabulary object or a path of a file containing words for the source language, - default `None`. If a pre-populated Vocabulary object, `src_max_vocab` wouldn't be used. - tgt_vocab (Vocabulary): pre-populated Vocabulary object or a path of a file containing words for the target language, - default `None`. If a pre-populated Vocabulary object, `tgt_max_vocab` wouldn't be used. - src_max_vocab (int): maximum source vocabulary size - tgt_max_vocab (int): maximum target vocabulary size - """ - obj = cls(src_max_len, tgt_max_len) - pairs = utils.prepare_data(path, src_max_len, tgt_max_len) - return cls._encode(obj, pairs, src_vocab, tgt_vocab, src_max_vocab, tgt_max_vocab)
- -
[docs] @classmethod - def from_list(cls, src_data, tgt_data, src_max_len, tgt_max_len, src_vocab=None, tgt_vocab=None, src_max_vocab=50000, - tgt_max_vocab=50000): - """ - Initialize a dataset from the source and target lists of sequences. - - Note: - Source or target sequences that are longer than the respective - max length will be filtered. - As specified by maximum vocabulary size, source and target - vocabularies will be sorted in descending token frequency and cutoff. - Tokens that are in the dataset but not retained in the vocabulary - will be dropped in the sequences. - - Args: - src_data (list): list of source sequences - tgt_data (list): list of target sequences - src_max_len (int): maximum source sequence length - tgt_max_len (int): maximum target sequence length - src_vocab (Vocabulary): pre-populated Vocabulary object or a path of a file containing words for the source language, - default `None`. If a pre-populated Vocabulary object, `src_max_vocab` wouldn't be used. - tgt_vocab (Vocabulary): pre-populated Vocabulary object or a path of a file containing words for the target language, - default `None`. If a pre-populated Vocabulary object, `tgt_max_vocab` wouldn't be used. - src_max_vocab (int): maximum source vocabulary size - tgt_max_vocab (int): maximum target vocabulary size - """ - obj = cls(src_max_len, tgt_max_len) - pairs = utils.prepare_data_from_list(src_data, tgt_data, src_max_len, tgt_max_len) - return cls._encode(obj, pairs, src_vocab, tgt_vocab, src_max_vocab, tgt_max_vocab)
- - def _encode(self, pairs, src_vocab=None, tgt_vocab=None, src_max_vocab=50000, tgt_max_vocab=50000): - """ - Encodes the source and target lists of sequences using source and target vocabularies. - - Note: - Source or target sequences that are longer than the respective - max length will be filtered. - As specified by maximum vocabulary size, source and target - vocabularies will be sorted in descending token frequency and cutoff. - Tokens that are in the dataset but not retained in the vocabulary - will be dropped in the sequences. - - Args: - pairs (list): list of tuples (source sequences, target sequence) - src_vocab (Vocabulary): pre-populated Vocabulary object or a path of a file containing words for the source language, - default `None`. If a pre-populated Vocabulary object, `src_max_vocab` wouldn't be used. - tgt_vocab (Vocabulary): pre-populated Vocabulary object or a path of a file containing words for the target language, - default `None`. If a pre-populated Vocabulary object, `tgt_max_vocab` wouldn't be used. - src_max_vocab (int): maximum source vocabulary size - tgt_max_vocab (int): maximum target vocabulary size - """ - # Read in vocabularies - self.input_vocab = self._init_vocab(zip(*pairs)[0], src_max_vocab, src_vocab) - self.output_vocab = self._init_vocab(zip(*pairs)[1], tgt_max_vocab, tgt_vocab) - - # Translate input sequences to token ids - self.data = [] - for pair in pairs: - src = self.input_vocab.indices_from_sequence(pair[0]) - dst = self.output_vocab.indices_from_sequence(pair[1]) - self.data.append((src, dst)) - return self - - def _init_vocab(self, sequences, max_num_vocab, vocab): - resp_vocab = Vocabulary(max_num_vocab) - if vocab is None: - for sequence in sequences: - resp_vocab.add_sequence(sequence) - resp_vocab.trim() - elif isinstance(vocab, Vocabulary): - resp_vocab = vocab - elif isinstance(vocab, str): - for tok in utils.read_vocabulary(vocab, max_num_vocab): - resp_vocab.add_token(tok) - else: - raise AttributeError('{} is not a valid instance on a vocabulary. None, instance of Vocabulary class \ - and str are only supported formats for the vocabulary'.format(vocab)) - return resp_vocab - - def __len__(self): - return len(self.data) - -
[docs] def num_batches(self, batch_size): - """ - Get the number of batches given batch size. - - Args: - batch_size(int): number of examples in a batch - - Returns: - int: number of batches - """ - return len(range(0, len(self.data), batch_size))
- -
[docs] def make_batches(self, batch_size): - """ - Create a generator that generates batches in batch_size over data. - - Args: - batch_size (int): number of pairs in a mini-batch - - Yields: - (list(str), list(str)): next pair of source and target variable in a batch - - """ - if len(self.data) < batch_size: - raise OverflowError("batch size = {} cannot be larger than data size = {}". - format(batch_size, len(self.data))) - for i in range(0, len(self.data), batch_size): - cur_batch = self.data[i:i + batch_size] - source_variables = [pair[0] for pair in cur_batch] - target_variables = [pair[1] for pair in cur_batch] - - yield (source_variables, target_variables)
- -
[docs] def shuffle(self, seed=None): - """ - Shuffle the data. - - Args: - seed(int): provide a value for the random seed; default seed=None is truly random - """ - if seed is not None: - random.seed(seed) - random.shuffle(self.data)
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/dataset/utils.html b/docs/public/_modules/seq2seq/dataset/utils.html deleted file mode 100644 index 04eb4ed..0000000 --- a/docs/public/_modules/seq2seq/dataset/utils.html +++ /dev/null @@ -1,358 +0,0 @@ - - - - - - - - - - - seq2seq.dataset.utils — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.dataset.utils
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.dataset.utils

-import logging
-from tqdm import tqdm
-
-logger = logging.getLogger(__name__)
-
-
[docs]def filter_pair(pair, src_max_len, tgt_max_len): - """ - Returns true if a sentence pair meets the length requirements, false otherwise. - - Args: - pair ((str, str)): (source, target) sentence pair - src_max_len (int): maximum length cutoff for sentences in the source language - tgt_max_len (int): maximum length cutoff for sentences in the target language - Returns: - bool: true if the pair is shorter than the length cutoffs, false otherwise - """ - return len(pair[0]) <= src_max_len and len(pair[1]) <= tgt_max_len
- - -
[docs]def space_tokenize(text): - """ - Tokenizes a piece of text by splitting it up based on single spaces (" "). - - Args: - text (str): input text as a single string - - Returns: - list(str): list of tokens obtained by splitting the text on single spaces - """ - return text.split(" ")
- - -
[docs]def prepare_data(path, src_max_len, tgt_max_len, tokenize_func=space_tokenize): - """ - Reads a tab-separated data file where each line contains a source sentence and a target sentence. Pairs containing - a sentence that exceeds the maximum length allowed for its language are not added. - - Args: - path (str): path to the data file - src_max_len (int): maximum length cutoff for sentences in the source language - tgt_max_len (int): maximum length cutoff for sentences in the target language - tokenize_func (func): function for splitting words in a sentence (default is single-space-delimited) - - Returns: - list((str, str)): list of (source, target) string pairs - """ - - logger.info("Reading Lines from {}".format(path)) - # Read the file and split into lines - pairs = [] - with open(path) as fin: - for line in tqdm(fin): - try: - src, dst = line.strip().split("\t") - pair = map(tokenize_func, [src, dst]) - if filter_pair(pair, src_max_len, tgt_max_len): - pairs.append(pair) - except: - logger.error("Error when reading line: {0}".format(line)) - raise - - logger.info("Number of pairs: %s" % len(pairs)) - return pairs
- - -
[docs]def prepare_data_from_list(src_list, tgt_list, src_max_len, tgt_max_len, tokenize_func=space_tokenize): - """ - Reads a tab-separated data file where each line contains a source sentence and a target sentence. Pairs containing - a sentence that exceeds the maximum length allowed for its language are not added. - - Args: - src_list (list): list of source sequences - tgt_list (list): list of target sequences - src_max_len (int): maximum length cutoff for sentences in the source language - tgt_max_len (int): maximum length cutoff for sentences in the target language - tokenize_func (func): function for splitting words in a sentence (default is single-space-delimited) - - Returns: - list((str, str)): list of (source, target) string pairs - """ - if not len(src_list) == len(tgt_list): - raise ValueError('source sequence list and target sequence list has different number of entries.') - - logger.info("Preparing pairs...") - - # Read the file and split into lines - pairs = [] - - for index, _ in tqdm(enumerate(src_list)): - pair = map(tokenize_func, [src_list[index], tgt_list[index]]) - if filter_pair(pair, src_max_len, tgt_max_len): - pairs.append(pair) - - logger.info("Number of pairs: %s" % len(pairs)) - return pairs
- - -
[docs]def read_vocabulary(path, max_num_vocab=50000): - """ - Helper function to read a vocabulary file. - - Args: - path (str): filepath to raw vocabulary file - max_num_vocab (int): maximum number of words to read from vocabulary file - - Returns: - set: read words from vocabulary file - """ - logger.info("Reading vocabulary from {}".format(path)) - # Read the file and create list of tokens in vocabulary - vocab = set() - with open(path) as fin: - for line in fin: - if len(vocab) >= max_num_vocab: - break - try: - vocab.add(line.strip()) - except: - logger.error("Error when reading line: {0}".format(line)) - raise - - logger.info("Size of Vocabulary: %s" % len(vocab)) - return vocab
- -
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/dataset/vocabulary.html b/docs/public/_modules/seq2seq/dataset/vocabulary.html deleted file mode 100644 index 9b503ab..0000000 --- a/docs/public/_modules/seq2seq/dataset/vocabulary.html +++ /dev/null @@ -1,431 +0,0 @@ - - - - - - - - - - - seq2seq.dataset.vocabulary — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.dataset.vocabulary
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.dataset.vocabulary

-import cPickle as pickle
-
-
-
[docs]class Vocabulary(object): - """ - A Vocabulary stores a set of words belonging to a particular language. Words in the source vocabulary are mapped - to unique integer IDs during encoding. Words in the target vocabulary are recovered from the model's output - during decoding. - - In addition to the words in the actual language, a Vocabulary includes three reserved tokens (and IDs) for the - start-of-sentence and end-of-sentence markers, and for a special 'mask' marker used to handle - rare/unknown words. - - The Vocabulary is sorted in descending order based on frequency. If the number of words seen is greater than - the maximum size of the Vocabulary, the remaining least-frequent words are ignored. - - Args: - size(int): maximum number of words allowed in this vocabulary - """ - def __init__(self, size): - self.MASK_token_name = "MASK" - self.SOS_token_name = "SOS" - self.EOS_token_name = "EOS" - self.MASK_token_id = 0 - self.SOS_token_id = 1 - self.EOS_token_id = 2 - - self._reserved = set([self.MASK_token_name, self.SOS_token_name, self.EOS_token_name]) - self._reserved_token_id = [ - (self.MASK_token_name, self.MASK_token_id), - (self.SOS_token_name, self.SOS_token_id), - (self.EOS_token_name, self.EOS_token_id) - ] - - self._token2index = dict([(tok, idx) for tok, idx in self._reserved_token_id]) - self._index2token = dict([(idx, tok) for tok, idx in self._reserved_token_id]) - - self._token2count = {} - - self._num_tokens = 0 - self._num_reserved = 3 - - self.sorted = False - self.size = size - -
[docs] def trim(self): - """ - Sorts the vocabulary in descending order based on frequency - """ - sorted_vocab_count = sorted(self._token2count.items(), key=lambda x: x[1], reverse=True)[:self.size] - self._token2index = dict([(w, self._num_reserved + idx) for idx, (w, _) in enumerate(sorted_vocab_count)]) - self._index2token = dict([(idx, w) for w, idx in self._token2index.items()]) - for tok, idx in self._reserved_token_id: - self._token2index[tok] = idx - self._index2token[idx] = tok - if self._num_tokens > self.size: - self._num_tokens = self.size - self.sorted = True
- -
[docs] def check_sorted(self): - """ - Sorts the vocabulary (if it is not already sorted). - """ - if not self.sorted: - self.trim()
- -
[docs] def get_index(self, token): - """ - Returns: - int: ID of the given token. - """ - self.check_sorted() - return self._token2index[token]
- -
[docs] def get_token(self, index): - """ - Returns: - str: token with ID equal to the given index. - """ - self.check_sorted() - return self._index2token[index]
- -
[docs] def get_vocab_size(self): - """ - Returns: - int: maximum number of words in the vocabulary. - """ - self.check_sorted() - return self._num_tokens + self._num_reserved
- -
[docs] def add_token(self, token): - """ - Adds an occurrence of a token to the vocabulary, incrementing its observed frequency if the word already exists. - - Args: - token (int): word to add - """ - if token in self._reserved: - return - if token not in self._token2count: - self._token2count[token] = 1 - self._num_tokens += 1 - else: - self._token2count[token] += 1 - self.sorted = False
- -
[docs] def add_sequence(self, sequence): - """ - Adds a sequence of words to the vocabulary. - - Args: - sequence(list(str)): list of words, e.g. representing a sentence. - """ - for tok in sequence: - self.add_token(tok)
- -
[docs] def indices_from_sequence(self, sequence): - """ - Maps a list of words to their token IDs, or else the 'mask' token if the word is rare/unknown. - - Args: - sequence (list(str)): list of words to map - Returns: - list(int): list of mapped IDs - """ - self.check_sorted() - return [self._token2index[tok] - if tok in self._token2index - else self.MASK_token_id - for tok in sequence]
- -
[docs] def sequence_from_indices(self, indices): - """ - Recover a sentence from a list of token IDs. - - Args: - indices (list(int)): list of token IDs. - - Returns: - list(str): recovered sentence, represented as a list of words - """ - seq = [self._index2token[idx] for idx in indices] - return seq
- -
[docs] def save(self, file_name): - """ - Writes this Vocabulary to disk in a pickle file. - - Args: - file_name (str): path to the target pickle file - """ - self.check_sorted() - vocab_size = self.get_vocab_size() - with open(file_name, "w") as f: - for index, token in self._index2token.items(): - if token in self._reserved: - continue - if index < vocab_size -1: - f.write(token + "\n") - else: - f.write(token)
- -
[docs] @classmethod - def load(cls, file_name): - """ - Loads a Vocabulary from a pickle file on disk. - - Args: - file_name (str): path to the pickle file - - Returns: - Vocabulary: loaded Vocabulary - """ - with open(file_name, "r") as f: - tokens = f.readlines() - vocab = Vocabulary(len(tokens)) - for token in tokens: - vocab.add_token(token.strip()) - return vocab
- - def __eq__(self, other): - if not isinstance(other, self.__class__): - return False - self.check_sorted() - other.check_sorted() - - if self._token2count == other._token2count and self._token2index == other._token2index \ - and self._index2token == other._index2token: - return True - else: - return False - - def __ne__(self, other): - return not self.__eq__(other) - - def __hash__(self): - return hash(self._token2index)
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/evaluator/evaluator.html b/docs/public/_modules/seq2seq/evaluator/evaluator.html deleted file mode 100644 index e00520a..0000000 --- a/docs/public/_modules/seq2seq/evaluator/evaluator.html +++ /dev/null @@ -1,285 +0,0 @@ - - - - - - - - - - - seq2seq.evaluator.evaluator — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.evaluator.evaluator
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.evaluator.evaluator

-from __future__ import print_function
-
-import torch
-
-from seq2seq.loss import NLLLoss
-
-
[docs]class Evaluator(object): - """ Class to evaluate models with given datasets. - Args: - loss (seq2seq.loss, optional): loss for evaluator (default: seq2seq.loss.NLLLoss) - batch_size (int, optional): batch size for evaluator (default: 64) - """ - - def __init__(self, loss=NLLLoss(), batch_size=64): - self.loss = loss - self.batch_size = batch_size - -
[docs] def evaluate(self, model, data): - """ Evaluate a model on given dataset and return performance. - - Args: - model (seq2seq.models): model to evaluate - data (seq2seq.dataset.dataset.Dataset): dataset to evaluate against - - Returns: - loss (float): loss of the given model on the given dataset - """ - model.eval() - loss = self.loss - loss.reset() - - for batch in data.make_batches(self.batch_size): - input_variables = batch[0] - target_variables = batch[1] - - decoder_outputs, decoder_hidden, other = model(input_variables, target_variables, volatile=True) - - # Evaluation - targets = other['inputs'] - lengths = other['length'] - for b in range(len(targets)): - # Batch wise loss - batch_target = targets[b] - batch_len = lengths[b] - # Crop output and target to batch length - batch_output = torch.stack([output[b] for output in decoder_outputs[:batch_len]]) - batch_target = batch_target[:batch_len] - # Evaluate loss - loss.eval_batch(batch_output, batch_target) - - return loss.get_loss()
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/evaluator/predictor.html b/docs/public/_modules/seq2seq/evaluator/predictor.html deleted file mode 100644 index 03f823d..0000000 --- a/docs/public/_modules/seq2seq/evaluator/predictor.html +++ /dev/null @@ -1,272 +0,0 @@ - - - - - - - - - - - seq2seq.evaluator.predictor — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.evaluator.predictor
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.evaluator.predictor

-
[docs]class Predictor(object): - - def __init__(self, model, src_vocab, tgt_vocab): - """ - Predictor class to evaluate for a given model. - Args: - model (seq2seq.models): trained model. This can be loaded from a checkpoint - using `seq2seq.util.checkpoint.load` - src_vocab (seq2seq.dataset.vocabulary.Vocabulary): source sequence vocabulary - tgt_vocab (seq2seq.dataset.vocabulary.Vocabulary): target sequence vocabulary - """ - self.model = model - self.model.eval() - self.src_vocab = src_vocab - self.tgt_vocab = tgt_vocab - -
[docs] def predict(self, src_seq): - """ Make prediction given `src_seq` as input. - - Args: - src_seq (list): list of tokens in source language - - Returns: - tgt_seq (list): list of tokens in target language as predicted - by the pre-trained model - """ - src_id_seq = self.src_vocab.indices_from_sequence(src_seq) - - softmax_list, _, other = self.model([src_id_seq], volatile=True) - length = other['length'][0] - - tgt_id_seq = [] - for i in range(length): - idx = softmax_list[i].max(1)[1].data[0][0] - tgt_id_seq.append(idx) - - tgt_seq = self.tgt_vocab.sequence_from_indices(tgt_id_seq) - return tgt_seq
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/loss/loss.html b/docs/public/_modules/seq2seq/loss/loss.html deleted file mode 100644 index cc2d5f0..0000000 --- a/docs/public/_modules/seq2seq/loss/loss.html +++ /dev/null @@ -1,377 +0,0 @@ - - - - - - - - - - - seq2seq.loss.loss — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- - - - -
-
-
-
- -

Source code for seq2seq.loss.loss

-from __future__ import print_function
-import math
-import torch.nn as nn
-import numpy as np
-
-
[docs]class Loss(object): - """ Base class for encapsulation of the loss functions. - - This class defines interfaces that are commonly used with loss functions - in training and inferencing. For information regarding individual loss - functions, please refer to http://pytorch.org/docs/master/nn.html#loss-functions - - Note: - Do not use this class directly, use one of the sub classes. - - Args: - name (str): name of the loss function used by logging messages. - criterion (torch.nn._Loss): one of PyTorch's loss function. Refer - to http://pytorch.org/docs/master/nn.html#loss-functions for - a list of them. - - Attributes: - name (str): name of the loss function used by logging messages. - criterion (torch.nn._Loss): one of PyTorch's loss function. Refer - to http://pytorch.org/docs/master/nn.html#loss-functions for - a list of them. Implementation depends on individual - sub-classes. - acc_loss (int or torcn.nn.Tensor): variable that stores accumulated loss. - norm_term (float): normalization term that can be used to calculate - the loss of multiple batches. Implementation depends on individual - sub-classes. - """ - - def __init__(self, name, criterion): - self.name = name - self.criterion = criterion - if not issubclass(type(self.criterion), nn.modules.loss._Loss): - raise ValueError("Criterion has to be a subclass of torch.nn._Loss") - # accumulated loss - self.acc_loss = 0 - # normalization term - self.norm_term = 0 - -
[docs] def reset(self): - """ Reset the accumulated loss. """ - self.acc_loss = 0 - self.norm_term = 0
- -
[docs] def get_loss(self): - """ Get the loss. - - This method defines how to calculate the averaged loss given the - accumulated loss and the normalization term. Override to define your - own logic. - - Returns: - loss (float): value of the loss. - """ - raise NotImplementedError
- -
[docs] def eval_batch(self, outputs, target): - """ Evaluate and accumulate loss given outputs and expected results. - - This method is called after each batch with the batch outputs and - the target (expected) results. The loss and normalization term are - accumulated in this method. Override it to define your own accumulation - method. - - Args: - outputs (torch.Tensor): outputs of a batch. - target (torch.Tensor): expected output of a batch. - """ - raise NotImplementedError
- - def cuda(self): - self.criterion.cuda() - - def backward(self): - if type(self.acc_loss) is int: - raise ValueError("No loss to back propagate.") - self.acc_loss.backward()
- -
[docs]class NLLLoss(Loss): - """ Batch averaged negative log-likelihood loss. - - Args: - weight (torch.Tensor, optional): refer to http://pytorch.org/docs/master/nn.html#nllloss - mask (int, optional): index of masked token, i.e. weight[mask] = 0. - size_average (bool, optional): refer to http://pytorch.org/docs/master/nn.html#nllloss - """ - - _NAME = "Avg NLLLoss" - - def __init__(self, weight=None, mask=None, size_average=True): - self.mask = mask - if mask is not None: - if weight is None: - raise ValueError("Must provide weight with a mask.") - weight[mask] = 0 - - super(NLLLoss, self).__init__( - self._NAME, - nn.NLLLoss(weight=weight, size_average=size_average)) - - def get_loss(self): - if type(self.acc_loss) is int: - return 0 - return self.acc_loss.data[0] / self.norm_term - - def eval_batch(self, outputs, target): - self.acc_loss += self.criterion(outputs, target) - self.norm_term += 1
- -
[docs]class Perplexity(NLLLoss): - """ Language model perplexity loss. - - Perplexity is the token averaged likelihood. When the averaging options are the - same, it is the exponential of negative log-likelihood. - - Args: - weight (torch.Tensor, optional): refer to http://pytorch.org/docs/master/nn.html#nllloss - mask (int, optional): index of masked token, i.e. weight[mask] = 0. - """ - - _NAME = "Perplexity" - _MAX_EXP = 100 - - def __init__(self, weight=None, mask=None): - super(Perplexity, self).__init__(weight=weight, mask=mask, size_average=False) - - def eval_batch(self, outputs, target): - self.acc_loss += self.criterion(outputs, target) - if self.mask is None: - self.norm_term += np.prod(target.size()) - else: - self.norm_term += target.data.ne(self.mask).sum() - - def get_loss(self): - nll = super(Perplexity, self).get_loss() - if nll > Perplexity._MAX_EXP: - print("WARNING: Loss exceeded maximum value, capping to e^100") - return math.exp(Perplexity._MAX_EXP) - return math.exp(nll)
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/models/DecoderRNN.html b/docs/public/_modules/seq2seq/models/DecoderRNN.html deleted file mode 100644 index f9a1548..0000000 --- a/docs/public/_modules/seq2seq/models/DecoderRNN.html +++ /dev/null @@ -1,411 +0,0 @@ - - - - - - - - - - - seq2seq.models.DecoderRNN — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.models.DecoderRNN
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.models.DecoderRNN

-import random
-
-import numpy as np
-
-import torch
-if torch.cuda.is_available():
-    import torch.cuda as device
-else:
-    import torch as device
-import torch.nn as nn
-from torch.autograd import Variable
-import torch.nn.functional as F
-
-from attention import Attention
-from baseRNN import BaseRNN
-
-
-
[docs]class DecoderRNN(BaseRNN): - r""" - Provides functionality for decoding in a seq2seq framework, with an option for attention. - - Args: - vocab (Vocabulary): an object of Vocabulary class - max_len (int): a maximum allowed length for the sequence to be processed - hidden_size (int): the number of features in the hidden state `h` - n_layers (int, optional): number of recurrent layers (default: 1) - rnn_cell (str, optional): type of RNN cell (default: gru) - input_dropout_p (float, optional): dropout probability for the input sequence (default: 0) - dropout_p (float, optional): dropout probability for the output sequence (default: 0) - use_attention(bool, optional): flag indication whether to use attention mechanism or not (default: false) - - Attributes: - KEY_ATTN_SCORE (str): key used to indicate attention weights in `ret_dict` - KEY_LENGTH (str): key used to indicate a list representing lengths of output sequences in `ret_dict` - KEY_SEQUENCE (str): key used to indicate a list of sequences in `ret_dict` - KEY_INPUT (str): key used to target outputs in `ret_dict` - - Inputs: inputs, encoder_hidden, encoder_outputs, function, teacher_forcing_ratio - - **inputs** (seq_len, batch, input_size): list of sequences, whose length is the batch size and within which - each sequence is a list of token IDs. It is used for teacher forcing when provided. (default is `None`) - - **encoder_hidden** (batch, seq_len, hidden_size): tensor containing the features in the hidden state `h` of - encoder. Used as the initial hidden state of the decoder. - - **encoder_outputs** (batch, seq_len, hidden_size): tensor with containing the outputs of the encoder. - Used for attention mechanism (default is `None`). - - **function** (torch.nn.Module): A function used to generate symbols from RNN hidden state - (default is `torch.nn.functional.log_softmax`). - - **teacher_forcing_ratio** (float): The probability that teacher forcing will be used. A random number is - drawn uniformly from 0-1 for every decoding token, and if the sample is smaller than the given value, - teacher forcing would be used (default is 0). - - Outputs: decoder_outputs, decoder_hidden, ret_dict - - **decoder_outputs** (batch): batch-length list of tensors with size (max_length, hidden_size) containing the - outputs of the decoder. - - **decoder_hidden** (num_layers * num_directions, batch, hidden_size): tensor containing the last hidden - state of the decoder. - - **ret_dict**: dictionary containing additional information as follows {*KEY_LENGTH* : list of integers - representing lengths of output sequences, *KEY_SEQUENCE* : list of sequences, where each sequence is a list of - predicted token IDs, *KEY_INPUT* : target outputs if provided for decoding, *KEY_ATTN_SCORE* : list of - sequences, where each list is of attention weights }. - """ - - KEY_ATTN_SCORE = 'attention_score' - KEY_LENGTH = 'length' - KEY_SEQUENCE = 'sequence' - KEY_INPUT = 'inputs' - - def __init__(self, vocab, max_len, hidden_size, - n_layers=1, rnn_cell='gru', - input_dropout_p=0, dropout_p=0, use_attention=False): - super(DecoderRNN, self).__init__(vocab, max_len, hidden_size, - input_dropout_p, dropout_p, - n_layers, rnn_cell) - - self.output_size = self.vocab.get_vocab_size() - self.dropout_p = dropout_p - self.max_length = max_len - self.use_attention = use_attention - - self.init_input = None - - self.embedding = nn.Embedding(self.output_size, self.hidden_size) - self.dropout = nn.Dropout(self.dropout_p) - if use_attention: - self.attention = Attention(self.hidden_size) - - self.out = nn.Linear(self.hidden_size, self.output_size) - - def init_start_input(self, batch_size): - # GO input for decoder # Re-initialize when batch size changes - if self.init_input is None or self.init_input.size(0) != batch_size: - self.init_input = Variable(device.LongTensor([[self.vocab.SOS_token_id]*batch_size])).view(batch_size, -1) - return self.init_input - - def forward_step(self, input_var, hidden, encoder_outputs, function): - batch_size = input_var.size(0) - output_size = input_var.size(1) - embedded = self.embedding(input_var) - embedded = self.input_dropout(embedded) - - output, hidden = self.rnn(embedded, hidden) - output = self.dropout(output) - - attn = None - if self.use_attention: - output, attn = self.attention(output, encoder_outputs) - - predicted_softmax = function(self.out(output.view(-1, self.hidden_size))).view(batch_size, output_size, -1) - return predicted_softmax, hidden, attn - - def forward_rnn(self, inputs=None, encoder_hidden=None, function=F.log_softmax, - encoder_outputs=None, teacher_forcing_ratio=0): - ret_dict = dict() - if self.use_attention: - if encoder_outputs is None: - raise ValueError("Argument encoder_outputs cannot be None when attention is used.") - ret_dict[DecoderRNN.KEY_ATTN_SCORE] = list() - if inputs is None: - if teacher_forcing_ratio > 0: - raise ValueError("Teacher forcing has to be disabled (set 0) when no inputs is provided.") - if inputs is None and encoder_hidden is None: - batch_size = 1 - else: - if inputs is not None: - batch_size = inputs.size(0) - else: - if self.rnn_cell is nn.LSTM: - batch_size = encoder_hidden[0].size(1) - elif self.rnn_cell is nn.GRU: - batch_size = encoder_hidden.size(1) - - decoder_input = self.init_start_input(batch_size) - decoder_hidden = encoder_hidden - - use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False - - decoder_outputs = [] - sequence_symbols = [] - lengths = np.array([self.max_length] * batch_size) - - def decode(step, step_output, step_attn): - decoder_outputs.append(step_output) - if self.use_attention: - ret_dict[DecoderRNN.KEY_ATTN_SCORE].append(step_attn) - symbols = decoder_outputs[-1].topk(1)[1] - sequence_symbols.append(symbols) - - eos_batches = symbols.data.eq(self.vocab.EOS_token_id) - if eos_batches.dim() > 0: - eos_batches = eos_batches.cpu().view(-1).numpy() - update_idx = ((lengths > di) & eos_batches) != 0 - lengths[update_idx] = len(sequence_symbols) - return symbols - - # Manual unrolling is used to support random teacher forcing. - # If teacher_forcing_ratio is True or False instead of a probability, the unrolling can be done in graph - if use_teacher_forcing: - decoder_input = torch.cat([decoder_input, inputs], dim=1) - decoder_output, decoder_hidden, attn = self.forward_step(decoder_input, decoder_hidden, encoder_outputs, - function=function) - - for di in range(self.max_length): - step_output = decoder_output[:, di, :] - step_attn = attn[:, di, :] - decode(di, step_output, step_attn) - else: - for di in range(self.max_length): - decoder_output, decoder_hidden, step_attn = self.forward_step(decoder_input, decoder_hidden, encoder_outputs, - function=function) - step_output = decoder_output.squeeze(1) - symbols = decode(di, step_output, step_attn) - decoder_input = symbols - - ret_dict[DecoderRNN.KEY_SEQUENCE] = sequence_symbols - ret_dict[DecoderRNN.KEY_LENGTH] = lengths.tolist() - ret_dict[DecoderRNN.KEY_INPUT] = inputs - - return decoder_outputs, decoder_hidden, ret_dict
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/models/EncoderRNN.html b/docs/public/_modules/seq2seq/models/EncoderRNN.html deleted file mode 100644 index d2158ee..0000000 --- a/docs/public/_modules/seq2seq/models/EncoderRNN.html +++ /dev/null @@ -1,295 +0,0 @@ - - - - - - - - - - - seq2seq.models.EncoderRNN — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.models.EncoderRNN
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.models.EncoderRNN

-import torch.nn as nn
-from baseRNN import BaseRNN
-
-
-
[docs]class EncoderRNN(BaseRNN): - r""" - Applies a multi-layer RNN to an input sequence. - Args: - vocab (Vocabulary): an object of Vocabulary class - max_len (int): a maximum allowed length for the sequence to be processed - hidden_size (int): the number of features in the hidden state `h` - input_dropout_p (float, optional): dropout probability for the input sequence (default: 0) - dropout_p (float, optional): dropout probability for the output sequence (default: 0) - n_layers (int, optional): number of recurrent layers (default: 1) - rnn_cell (str, optional): type of RNN cell (default: gru) - - Inputs: inputs, volatile - - **inputs**: list of sequences, whose length is the batch size and within which each sequence is a list of token IDs. - - **volatile** (bool, optional): boolean flag specifying whether to preserve gradients, when you are sure you - will not be even calling .backward(). - Outputs: output, hidden - - **output** (batch, seq_len, hidden_size): tensor containing the encoded features of the input sequence - - **hidden** (num_layers * num_directions, batch, hidden_size): tensor containing the features in the hidden state `h` - - Examples:: - - >>> encoder = EncoderRNN(input_vocab, max_seq_length, hidden_size) - >>> output, hidden = encoder(input) - - """ - def __init__(self, vocab, max_len, hidden_size, - input_dropout_p=0, dropout_p=0, - n_layers=1, rnn_cell='gru'): - super(EncoderRNN, self).__init__(vocab, max_len, hidden_size, - input_dropout_p, dropout_p, n_layers, rnn_cell) - - self.embedding = nn.Embedding(self.vocab.get_vocab_size(), hidden_size) - self.lengths = None - - def forward(self, *args, **kwargs): - batch = args[0] - self.lengths = [min(self.max_len, len(seq)) for seq in batch] - return super(EncoderRNN, self).forward(batch, **kwargs) - -
[docs] def forward_rnn(self, input_var): - """ - Applies a multi-layer RNN to an input sequence. - - Args: - input_var (batch, seq_len): tensor containing the features of the input sequence. - - returns: output, hidden - - **output** (batch, seq_len, hidden_size): variable containing the encoded features of the input sequence - - **hidden** (num_layers * num_directions, batch, hidden_size): variable containing the features in the hidden state h - """ - embedded = self.embedding(input_var) - embedded = self.input_dropout(embedded) - embedded = nn.utils.rnn.pack_padded_sequence(embedded, self.lengths, batch_first=True) - output, hidden = self.rnn(embedded) - output, _ = nn.utils.rnn.pad_packed_sequence(output, batch_first=True) - return output, hidden
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/models/TopKDecoder.html b/docs/public/_modules/seq2seq/models/TopKDecoder.html deleted file mode 100644 index 585e6e8..0000000 --- a/docs/public/_modules/seq2seq/models/TopKDecoder.html +++ /dev/null @@ -1,554 +0,0 @@ - - - - - - - - - - - seq2seq.models.TopKDecoder — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.models.TopKDecoder
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.models.TopKDecoder

-import torch
-import torch.nn.functional as F
-from torch.autograd import Variable
-from baseRNN import BaseRNN
-
-
-
[docs]class TopKDecoder(BaseRNN): - r""" - Top-K decoding with beam search. - - Args: - decoder_rnn (DecoderRNN): An object of DecoderRNN used for decoding. - k (int): Size of the beam. - - Inputs: inputs, encoder_hidden, encoder_outputs, function, teacher_forcing_ratio - - **inputs** (seq_len, batch, input_size): list of sequences, whose length is the batch size and within which - each sequence is a list of token IDs. It is used for teacher forcing when provided. (default is `None`) - - **encoder_hidden** (batch, seq_len, hidden_size): tensor containing the features in the hidden state `h` of - encoder. Used as the initial hidden state of the decoder. - - **encoder_outputs** (batch, seq_len, hidden_size): tensor with containing the outputs of the encoder. - Used for attention mechanism (default is `None`). - - **function** (torch.nn.Module): A function used to generate symbols from RNN hidden state - (default is `torch.nn.functional.log_softmax`). - - **teacher_forcing_ratio** (float): The probability that teacher forcing will be used. A random number is - drawn uniformly from 0-1 for every decoding token, and if the sample is smaller than the given value, - teacher forcing would be used (default is 0). - - Outputs: decoder_outputs, decoder_hidden, ret_dict - - **decoder_outputs** (batch): batch-length list of tensors with size (max_length, hidden_size) containing the - outputs of the decoder. - - **decoder_hidden** (num_layers * num_directions, batch, hidden_size): tensor containing the last hidden - state of the decoder. - - **ret_dict**: dictionary containing additional information as follows {*length* : list of integers - representing lengths of output sequences, *sequence* : list of sequences, where each sequence is a list of - predicted token IDs, *inputs* : target outputs if provided for decoding}. - - """ - - def __init__(self, decoder_rnn, k): - super(TopKDecoder, self).__init__(decoder_rnn.lang, decoder_rnn.max_length) - self.rnn = decoder_rnn - self.k = k - self.V = self.rnn.lang.get_vocab_size() - self.SOS = self.rnn.lang.SOS_token_id - self.EOS = self.rnn.lang.EOS_token_id - -
[docs] def forward_rnn(self, inputs=None, encoder_hidden=None, encoder_outputs=None, function=F.log_softmax, - retain_output_probs=True): - """ - Forward rnn for MAX_LENGTH steps. Look at :func:`seq2seq.models.DecoderRNN.DecoderRNN.forward_rnn` for details. - """ - - # TODO: Looks like encoder_hidden is not optional, we need unit tests - # for this class - # Get batch size, assuming h_0 is num_layers*directions x b x hidden_dim - b = encoder_hidden.size(1) - h = encoder_hidden.size(2) - - self.pos_index = Variable(torch.LongTensor(range(b)) * self.k).view(-1, 1) - - # Inflate the initial hidden states to be of size: b*k x h - hidden = self._inflate(encoder_hidden, self.k) - # ... same idea for encoder_outputs and decoder_outputs - if self.rnn.use_attention: - if encoder_outputs is None: - raise ValueError("Argument encoder_outputs cannot be None when attention is used.") - else: - inflated_encoder_outputs = self._inflate(encoder_outputs, self.k) - else: - inflated_encoder_outputs = None - - # Initialize the scores; for the first step, - # ignore the inflated copies to avoid duplicate entries in the top k - sequence_scores = torch.Tensor(b*self.k, 1) - sequence_scores.fill_(-float('Inf')) - sequence_scores.index_fill_(0, torch.LongTensor([i*self.k for i in range(0, b)]), 0.0) - sequence_scores = Variable(sequence_scores) - - # Initialize the input vector - input_var = Variable(torch.transpose(torch.LongTensor([[self.SOS]*b*self.k]), 0, 1)) - - # Store decisions for backtracking - stored_outputs = list() - stored_scores = list() - stored_predecessors = list() - stored_emitted_symbols = list() - stored_hidden = list() - - for _ in range(0, self.rnn.max_length): - - # Run the RNN one step forward - log_softmax_output, hidden, _ = self.rnn.forward_step(input_var, hidden, inflated_encoder_outputs, function=function) - - # If doing local backprop (e.g. supervised training), retain the output layer - if retain_output_probs: - stored_outputs.append(log_softmax_output) - - # To get the full sequence scores for the new candidates, add the local scores for t_i to the predecessor scores for t_(i-1) - sequence_scores = self._inflate(sequence_scores, self.V) - sequence_scores += log_softmax_output - scores, candidates = sequence_scores.view(b, -1).topk(self.k, dim=1) - - # Reshape input = (bk, 1) and sequence_scores = (bk, 1) - input_var = (candidates % self.V).view(b * self.k, 1) - sequence_scores = scores.view(b * self.k, 1) - - # Update fields for next timestep - predecessors = (candidates / self.V + self.pos_index.expand_as(candidates)).view(b*self.k, 1) - hidden = hidden.index_select(1, predecessors.squeeze()) - - # Update sequence scores and erase scores for end-of-sentence symbol so that they aren't expanded - stored_scores.append(sequence_scores.clone()) - eos_indices = input_var.data.eq(self.EOS) - if eos_indices.nonzero().dim() > 0: - sequence_scores.data.masked_fill_(eos_indices, -float('inf')) - - # Cache results for backtracking - stored_predecessors.append(predecessors) - stored_emitted_symbols.append(input_var) - stored_hidden.append(hidden) - - # Do backtracking to return the optimal values - output, h_t, h_n, s, l, p = self._backtrack(stored_outputs, stored_hidden, - stored_predecessors, stored_emitted_symbols, stored_scores, b, h) - - # Build return objects - decoder_outputs = [step[:, 0, :] for step in output] - decoder_hidden = h_n[:, :, 0, :] - metadata = {} - metadata['inputs'] = inputs - metadata['output'] = output - metadata['h_t'] = h_t - metadata['score'] = s - metadata['length'] = l - metadata['sequence'] = p - return decoder_outputs, decoder_hidden, metadata
- - def _backtrack(self, nw_output, nw_hidden, predecessors, symbols, scores, b, hidden_size): - """Backtracks over batch to generate optimal k-sequences. - - Args: - nw_output [(batch*k, vocab_size)] * sequence_length: A Tensor of outputs from network - nw_hidden [(num_layers, batch*k, hidden_size)] * sequence_length: A Tensor of hidden states from network - predecessors [(batch*k)] * sequence_length: A Tensor of predecessors - symbols [(batch*k)] * sequence_length: A Tensor of predicted tokens - scores [(batch*k)] * sequence_length: A Tensor containing sequence scores for every token t = [0, ... , seq_len - 1] - b: Size of the batch - hidden_size: Size of the hidden state - - Returns: - output [(batch, k, vocab_size)] * sequence_length: A list of the output probabilities (p_n) - from the last layer of the RNN, for every n = [0, ... , seq_len - 1] - - h_t [(batch, k, hidden_size)] * sequence_length: A list containing the output features (h_n) - from the last layer of the RNN, for every n = [0, ... , seq_len - 1] - - h_n(batch, k, hidden_size): A Tensor containing the last hidden state for all top-k sequences. - - score [batch, k]: A list containing the final scores for all top-k sequences - - length [batch, k]: A list specifying the length of each sequence in the top-k candidates - - p (batch, k, sequence_len): A Tensor containing predicted sequence - """ - - # initialize return variables given different types - output = list() - h_t = list() - p = list() - h_n = torch.zeros(nw_hidden[0].size()) # Placeholder for last hidden state of top-k sequences. - # If a (top-k) sequence ends early in decoding, `h_n` contains - # its hidden state when it sees EOS. Otherwise, `h_n` contains - # the last hidden state of decoding. - l = [[self.rnn.max_length] * self.k for _ in range(b)] # Placeholder for lengths of top-k sequences - # Similar to `h_n` - - # the last step output of the beams are not sorted - # thus they are sorted here - sorted_score, sorted_idx = scores[-1].view(b, self.k).topk(self.k) - # initialize the sequence scores with the sorted last step beam scores - s = sorted_score.clone() - - batch_eos_found = [0] * b # the number of EOS found - # in the backward loop below for each batch - - t = self.rnn.max_length - 1 - # initialize the back pointer with the sorted order of the last step beams. - # add self.pos_index for indexing variable with b*k as the first dimension. - t_predecessors = (sorted_idx + self.pos_index.expand_as(sorted_idx)).view(b * self.k) - while t >= 0: - # Re-order the variables with the back pointer - current_output = nw_output[t].index_select(0, t_predecessors) - current_hidden = nw_hidden[t].index_select(1, t_predecessors) - current_symbol = symbols[t].index_select(0, t_predecessors) - # Re-order the back pointer of the previous step with the back pointer of - # the current step - t_predecessors = predecessors[t].index_select(0, t_predecessors).squeeze() - - # This tricky block handles dropped sequences that see EOS earlier. - # The basic idea is summarized below: - # - # Terms: - # Ended sequences = sequences that see EOS early and dropped - # Survived sequences = sequences in the last step of the beams - # - # Although the ended sequences are dropped during decoding, - # their generated symbols and complete backtracking information are still - # in the backtracking variables. - # For each batch, everytime we see an EOS in the backtracking process, - # 1. If there is survived sequences in the return variables, replace - # the one with the lowest survived sequence score with the new ended - # sequences - # 2. Otherwise, replace the ended sequence with the lowest sequence - # score with the new ended sequence - # - eos_indices = symbols[t].data.squeeze(1).eq(self.EOS).nonzero() - if eos_indices.dim() > 0: - for i in range(eos_indices.size(0)-1, -1, -1): - # Indices of the EOS symbol for both variables - # with b*k as the first dimension, and b, k for - # the first two dimensions - idx = eos_indices[i] - b_idx = idx[0] / self.k - # The indices of the replacing position - # according to the replacement strategy noted above - res_k_idx = self.k - (batch_eos_found[b_idx] % self.k) - 1 - batch_eos_found[b_idx] += 1 - res_idx = b_idx * self.k + res_k_idx - - # Replace the old information in return variables - # with the new ended sequence information - t_predecessors[res_idx] = predecessors[t][idx[0]] - current_output[res_idx, :] = nw_output[t][idx[0], :] - current_hidden[:, res_idx, :] = nw_hidden[t][:, idx[0], :] - h_n[:, res_idx, :] = nw_hidden[t][:, idx[0], :].data - current_symbol[res_idx, :] = symbols[t][idx[0]] - s[b_idx, res_k_idx] = scores[t][idx[0]].data[0] - l[b_idx][res_k_idx] = t + 1 - - # record the back tracked results - output.append(current_output) - h_t.append(current_hidden) - p.append(current_symbol) - - t -= 1 - - # Sort and re-order again as the added ended sequences may change - # the order (very unlikely) - s, re_sorted_idx = s.topk(self.k) - for b_idx in range(b): - l[b_idx] = [l[b_idx][k_idx.data[0]] for k_idx in re_sorted_idx[b_idx,:]] - - re_sorted_idx = (re_sorted_idx + self.pos_index.expand_as(re_sorted_idx)).view(b * self.k) - - # Reverse the sequences and re-order at the same time - # It is reversed because the backtracking happens in reverse time order - output = [step.index_select(0, re_sorted_idx).view(b, self.k, -1) for step in reversed(output)] - p = [step.index_select(0, re_sorted_idx).view(b, self.k, -1) for step in reversed(p)] - h_t = [step.index_select(1, re_sorted_idx).view(-1, b, self.k, hidden_size) for step in reversed(h_t)] - h_n = h_n.index_select(1, re_sorted_idx.data).view(-1, b, self.k, hidden_size) - s = s.data - - if self.k == 1: - l = [_l[0] for _l in l] - - return output, h_t, h_n, s, l, p - - def _mask_symbol_scores(self, score, idx, masking_score=-float('inf')): - score[idx] = masking_score - - def _mask(self, tensor, idx, dim=0, masking_score=-float('inf')): - if len(idx.size()) > 0: - indices = idx[:, 0] - tensor.index_fill_(dim, indices, masking_score) - - def _inflate(self, tensor, times): - """ - Given a tensor, 'inflates' it along the given dimension by replicating each slice specified number of times (in-place) - - Args: - tensor: A :class:`Tensor` to inflate - times: number of repetitions - dimension: axis for inflation (default=0) - - Returns: - A :class:`Tensor` - - Examples:: - >> a = torch.LongTensor([[1, 2], [3, 4]]) - >> a - 1 2 - 3 4 - [torch.LongTensor of size 2x2] - >> decoder = TopKDecoder(nn.RNN(10, 20, 2), 3) - >> b = decoder._inflate(a, 1, dimension=1) - >> b - 1 1 2 2 - 3 3 4 4 - [torch.LongTensor of size 2x4] - >> c = decoder._inflate(a, 1, dimension=0) - >> c - 1 2 - 1 2 - 3 4 - 3 4 - [torch.LongTensor of size 4x2] - - """ - tensor_dim = len(tensor.size()) - if tensor_dim is 3: - b = tensor.size(1) - return tensor.repeat(1, 1, times).view(tensor.size(0), b * times, -1) - elif tensor_dim is 2: - return tensor.repeat(1, times) - elif tensor_dim is 1: - b = tensor.size(0) - return tensor.repeat(times).view(b, -1) - else: - raise ValueError("Tensor can be of 1D, 2D or 3D only. This one is {}D.".format(tensor_dim))
- -
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/models/attention.html b/docs/public/_modules/seq2seq/models/attention.html deleted file mode 100644 index 1acf7bf..0000000 --- a/docs/public/_modules/seq2seq/models/attention.html +++ /dev/null @@ -1,306 +0,0 @@ - - - - - - - - - - - seq2seq.models.attention — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.models.attention
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.models.attention

-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-
[docs]class Attention(nn.Module): - r""" - Applies an attention mechanism on the output features from the decoder. - - .. math:: - \begin{array}{ll} - x = context*output \\ - attn = exp(x_i - max_i x_i) / sum_j exp(x_j - max_i x_i) \\ - output = \tanh(w * (attn * context) + b * output) - \end{array} - - Args: - dim(int): The number of expected features in the output - - Inputs: output, context - - **output** (batch, output_len, dimensions): tensor containing the output features from the decoder. - - **context** (batch, input_len, dimensions): tensor containing features of the encoded input sequence. - - Outputs: output, attn - - **output** (batch, output_len, dimensions): tensor containing the attended output features from the decoder. - - **attn** (batch, output_len, input_len): tensor containing attention weights. - - Attributes: - linear_out (torch.nn.Linear): applies a linear transformation to the incoming data: :math:`y = Ax + b`. - mask (torch.Tensor, optional): applies a :math:`-inf` to the indices specified in the `Tensor`. - - Examples:: - - >>> attention = seq2seq.models.Attention(256) - >>> context = Variable(torch.randn(5, 3, 256)) - >>> output = Variable(torch.randn(5, 5, 256)) - >>> output, attn = attention(output, context) - - """ - def __init__(self, dim): - super(Attention, self).__init__() - self.linear_out = nn.Linear(dim*2, dim) - self.mask = None - -
[docs] def set_mask(self, mask): - """ - Sets indices to be masked - - Args: - mask (torch.Tensor): tensor containing indices to be masked - """ - self.mask = mask
- - def forward(self, output, context): - batch_size = output.size(0) - hidden_size = output.size(2) - input_size = context.size(1) - # (batch, out_len, dim) * (batch, in_len, dim) -> (batch, out_len, in_len) - attn = torch.bmm(output, context.transpose(1, 2)) - if self.mask is not None: - attn.data.masked_fill_(self.mask, -float('inf')) - attn = F.softmax(attn.view(-1, input_size)).view(batch_size, -1, input_size) - - # (batch, out_len, in_len) * (batch, in_len, dim) -> (batch, out_len, dim) - mix = torch.bmm(attn, context) - - # concat -> (batch, out_len, 2*dim) - combined = torch.cat((mix, output), dim=2) - # output -> (batch, out_len, dim) - output = F.tanh(self.linear_out(combined.view(-1, 2 * hidden_size))).view(batch_size, -1, hidden_size) - - return output, attn
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/models/baseRNN.html b/docs/public/_modules/seq2seq/models/baseRNN.html deleted file mode 100644 index ed42f44..0000000 --- a/docs/public/_modules/seq2seq/models/baseRNN.html +++ /dev/null @@ -1,324 +0,0 @@ - - - - - - - - - - - seq2seq.models.baseRNN — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.models.baseRNN
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.models.baseRNN

-""" A base class for RNN. """
-import torch
-import torch.nn as nn
-
-
-
[docs]class BaseRNN(nn.Module): - r""" - Applies a multi-layer RNN to an input sequence. - Note: - Do not use this class directly, use one of the sub classes. - Args: - vocab (Vocabulary): object of Vocabulary class - max_len (int): maximum allowed length for the sequence to be processed - hidden_size (int): number of features in the hidden state `h` - input_dropout_p (float): dropout probability for the input sequence - dropout_p (float): dropout probability for the output sequence - n_layers (int): number of recurrent layers - rnn_cell (str): type of RNN cell (Eg. 'LSTM' , 'GRU') - - Inputs: ``*args``, ``**kwargs`` - - ``*args``: variable length argument list. - - ``**kwargs``: arbitrary keyword arguments. - - Attributes: - SYM_MASK: masking symbol - SYM_EOS: end-of-sequence symbol - """ - SYM_MASK = "MASK" - SYM_EOS = "EOS" - - def __init__(self, vocab, max_len, hidden_size, input_dropout_p, dropout_p, n_layers, rnn_cell): - super(BaseRNN, self).__init__() - self.vocab = vocab - self.max_len = max_len - self.hidden_size = hidden_size - self.n_layers = n_layers - self.input_dropout_p = input_dropout_p - self.input_dropout = nn.Dropout(p=input_dropout_p) - if rnn_cell.lower() == 'lstm': - self.rnn_cell = nn.LSTM - elif rnn_cell.lower() == 'gru': - self.rnn_cell = nn.GRU - else: - raise ValueError("Unsupported RNN Cell: {0}".format(rnn_cell)) - - self.dropout_p = dropout_p - self.rnn = self.rnn_cell(hidden_size, hidden_size, n_layers, batch_first=True, dropout=dropout_p) - -
[docs] def balance(self, batch, volatile): - """ - Add reserved symbols and balance batch input. - It first appends EOS symbol to each sequence and then appends multiple - MASK symbols to make the sequences the same length. - Args: - batch: list of sequences, each of which is a list of integers - volatile: boolean flag specifying whether to preserve gradients, when you are sure you will not be even calling .backward(). - - Returns: - torch.autograd.Variable: variable with balanced input data. - """ - max_len = self.max_len - outputs = [] - for seq in batch: - seq = seq[:min(len(seq), max_len - 1)] - outputs.append(seq + [self.vocab.EOS_token_id] + [self.vocab.MASK_token_id] * (max_len - len(seq) - 1)) - - outputs_var = torch.autograd.Variable(torch.LongTensor(outputs), volatile=volatile) - if torch.cuda.is_available(): - outputs_var = outputs_var.cuda() - - return outputs_var
- - def forward(self, *args, **kwargs): - if 'volatile' in kwargs: - volatile = kwargs['volatile'] - kwargs.pop('volatile', None) - else: - volatile = False - if args: - self.balanced_batch = self.balance(args[0], volatile) - args = [self.balanced_batch] + list(args[1:]) - else: - if 'inputs' in kwargs and kwargs['inputs'] is not None: - self.balanced_batch = self.balance(kwargs['inputs'], volatile) - kwargs['inputs'] = self.balanced_batch - - return self.forward_rnn(*args, **kwargs) - - def forward_rnn(self, *args, **kwargs): - raise NotImplementedError()
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/models/seq2seq.html b/docs/public/_modules/seq2seq/models/seq2seq.html deleted file mode 100644 index ac6ba77..0000000 --- a/docs/public/_modules/seq2seq/models/seq2seq.html +++ /dev/null @@ -1,294 +0,0 @@ - - - - - - - - - - - seq2seq.models.seq2seq — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.models.seq2seq
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.models.seq2seq

-import os
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
[docs]class Seq2seq(nn.Module): - """ Standard sequence-to-sequence architecture with configurable encoder - and decoder. - Args: - encoder (EncoderRNN): object of EncoderRNN - decoder (DecoderRNN): object of DecoderRNN - decode_function (func, optional): function to generate symbols from output hidden states (default: F.log_softmax) - - Inputs: input_variable, target_variable, teacher_forcing_ratio, volatile - - **input_variable** (list, option): list of sequences, whose length is the batch size and within which - each sequence is a list of token IDs. This information is forwarded to the encoder. - - **target_variable** (list, optional): list of sequences, whose length is the batch size and within which - each sequence is a list of token IDs. This information is forwarded to the decoder. - - **teacher_forcing_ratio** (int, optional): The probability that teacher forcing will be used. A random number - is drawn uniformly from 0-1 for every decoding token, and if the sample is smaller than the given value, - teacher forcing would be used (default is 0) - - **volatile** (bool, optional): boolean flag specifying whether to preserve gradients, when you are sure you - will not be even calling .backward(). - - Outputs: decoder_outputs, decoder_hidden, ret_dict - - **decoder_outputs** (batch): batch-length list of tensors with size (max_length, hidden_size) containing the - outputs of the decoder. - - **decoder_hidden** (num_layers * num_directions, batch, hidden_size): tensor containing the last hidden - state of the decoder. - - **ret_dict**: dictionary containing additional information as follows {*KEY_LENGTH* : list of integers - representing lengths of output sequences, *KEY_SEQUENCE* : list of sequences, where each sequence is a list of - predicted token IDs, *KEY_INPUT* : target outputs if provided for decoding, *KEY_ATTN_SCORE* : list of - sequences, where each list is of attention weights }. - - """ - - def __init__(self, encoder, decoder, decode_function=F.log_softmax): - super(Seq2seq, self).__init__() - self.encoder = encoder - self.decoder = decoder - self.decode_function = decode_function - - def forward(self, input_variable, target_variable=None, - teacher_forcing_ratio=0, volatile=False): - if target_variable is None: - input_variable = sorted(input_variable, len, reverse=True) - else: - sorted_input = sorted(zip(input_variable, target_variable), - key=lambda x: len(x[0]), reverse=True) - input_variable = [p[0] for p in sorted_input] - target_variable = [p[1] for p in sorted_input] - encoder_outputs, encoder_hidden = self.encoder(input_variable, volatile=volatile) - result = self.decoder(inputs=target_variable, - encoder_hidden=encoder_hidden, - encoder_outputs=encoder_outputs, - function=self.decode_function, - teacher_forcing_ratio=teacher_forcing_ratio, - volatile=volatile) - return result
- -
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/optim/optim.html b/docs/public/_modules/seq2seq/optim/optim.html deleted file mode 100644 index 8d56f2f..0000000 --- a/docs/public/_modules/seq2seq/optim/optim.html +++ /dev/null @@ -1,313 +0,0 @@ - - - - - - - - - - - seq2seq.optim.optim — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- - - - -
-
-
-
- -

Source code for seq2seq.optim.optim

-import torch
-
-
[docs]class Optimizer(object): - """ The Optimizer class encapsulates torch.optim package and provides functionalities - for learning rate scheduling and gradient norm clipping. - - Args: - optim_class (torch.optim.Optimizer): optimizer class, e.g. torch.optim.SGD - max_grad_norm (float, optional): value used for gradient norm clipping, - set 0 to disable (default 0) - lr_decay (float, optional): value for learning rate decay: - lr = lr_decay * lr (default 1) - decay_after_epoch (float, optional): learning rate starts to decay after the - specified epoch number, set 0 to disable (default 0) - **kwargs: arguments for the given optimizer class, - refer http://pytorch.org/docs/optim.html#algorithms for more information - """ - - _ARG_MAX_GRAD_NORM = 'max_grad_norm' - _ARG_DECAY_AFTER = "decay_after_epoch" - _ARG_LR_DECAY = "lr_decay" - - def __init__(self, optim_class, **kwargs): - self.optim_class = optim_class - self.optimizer = None - self.parameters = None - - self.max_grad_norm = self._get_remove(kwargs, Optimizer._ARG_MAX_GRAD_NORM, 0) - self.lr_decay = self._get_remove(kwargs, Optimizer._ARG_LR_DECAY, 1) - self.decay_after_epoch = self._get_remove(kwargs, Optimizer._ARG_DECAY_AFTER, 0) - self.optim_args = kwargs - - def _get_remove(self, args, key, default): - value = default - if key in args: - value = args[key] - del args[key] - return value - -
[docs] def set_parameters(self, parameters): - """ Set the parameters to optimize. - - Args: - parameters (iterable): An iterable of torch.nn.Parameter. - """ - self.parameters = parameters - self.optimizer = self.optim_class(parameters, **self.optim_args)
- -
[docs] def step(self): - """ Performs a single optimization step, including gradient norm clipping if necessary. """ - if self.max_grad_norm > 0: - torch.nn.utils.clip_grad_norm(self.parameters, self.max_grad_norm) - self.optimizer.step()
- -
[docs] def update(self, loss, epoch): - """ Update the learning rate if the conditions are met. Override this method - to implement your own learning rate schedule. - - Args: - loss (float): The current loss. It could be training loss or developing loss - depending on the caller. By default the supervised trainer uses developing - loss. - epoch (int): The current epoch number. - """ - after_decay_epoch = self.decay_after_epoch != 0 and epoch >= self.decay_after_epoch - if after_decay_epoch: - self.optimizer.param_groups[0]['lr'] *= self.lr_decay
- -
[docs] def load_state_dict(self, optimizer_dict): - """ Wrapper for loading optimizer state_dict. - For further reference please refer to http://pytorch.org/docs/master/optim.html#torch.optim.Optimizer.load_state_dict - """ - self.optimizer.load_state_dict(optimizer_dict)
- -
[docs] def state_dict(self): - """Wrapper for accessing optimizer state_dict. - For further reference please refer to http://pytorch.org/docs/master/optim.html#torch.optim.Optimizer.state_dict - """ - return self.optimizer.state_dict()
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/trainer/supervised_trainer.html b/docs/public/_modules/seq2seq/trainer/supervised_trainer.html deleted file mode 100644 index 82fac57..0000000 --- a/docs/public/_modules/seq2seq/trainer/supervised_trainer.html +++ /dev/null @@ -1,397 +0,0 @@ - - - - - - - - - - - seq2seq.trainer.supervised_trainer — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.trainer.supervised_trainer
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.trainer.supervised_trainer

-import os
-import random
-import logging
-
-import torch
-from torch import optim
-
-from seq2seq.evaluator import Evaluator
-from seq2seq.loss import NLLLoss
-from seq2seq.optim import Optimizer
-from seq2seq.util.custom_time import *
-from seq2seq.util.checkpoint import Checkpoint
-
-
[docs]class SupervisedTrainer(object): - """ The SupervisedTrainer class helps in setting up a training framework in a - supervised setting. - - Args: - expt_dir (optional, str): experiment Directory to store details of the experiment, - by default it makes a folder in the current directory to store the details (default: `experiment`). - loss (seq2seq.loss.loss.Loss, optional): loss for training, (default: seq2seq.loss.NLLLoss) - batch_size (int, optional): batch size for experiment, (default: 64) - checkpoint_every (int, optional): number of epochs to checkpoint after, (default: 100) - optimizer (seq2seq.optim.Optimizer, optional): optimizer for training - (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5)) - """ - def __init__(self, expt_dir='experiment', loss=NLLLoss(), batch_size=64, - random_seed=None, - checkpoint_every=100, print_every=100, - optimizer=Optimizer(optim.Adam, max_grad_norm=5)): - self._trainer = "Simple Trainer" - self.random_seed = random_seed - if random_seed is not None: - random.seed(random_seed) - torch.manual_seed(random_seed) - self.loss = loss - self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size) - self.optimizer = optimizer - self.checkpoint_every = checkpoint_every - self.print_every = print_every - - if not os.path.isabs(expt_dir): - expt_dir = os.path.join(os.getcwd(), expt_dir) - self.expt_dir = expt_dir - if not os.path.exists(self.expt_dir): - os.makedirs(self.expt_dir) - self.batch_size = batch_size - self.input_vocab_file = os.path.join(self.expt_dir, 'input_vocab') - self.output_vocab_file = os.path.join(self.expt_dir, 'output_vocab') - - self.logger = logging.getLogger(__name__) - - def _train_batch(self, input_variable, target_variable, model, teacher_forcing_ratio): - loss = self.loss - # Forward propagation - decoder_outputs, decoder_hidden, other = model(input_variable, target_variable, - teacher_forcing_ratio=teacher_forcing_ratio) - # Get loss - loss.reset() - targets = other['inputs'] - lengths = other['length'] - for batch in range(len(targets)): - # Batch wise loss - batch_target = targets[batch] - batch_len = lengths[batch] - # Crop output and target to batch length - batch_output = torch.stack([output[batch] for output in decoder_outputs[:batch_len]]) - batch_target = batch_target[:batch_len] - # Evaluate loss - loss.eval_batch(batch_output, batch_target) - # Backward propagation - model.zero_grad() - loss.backward() - self.optimizer.step() - - return loss.get_loss() - - def _train_epoches(self, data, model, n_epochs, batch_size, resume, dev_data=None, teacher_forcing_ratio=0): - start = time.time() - print_loss_total = 0 # Reset every print_every - steps_per_epoch = data.num_batches(batch_size) - total_steps = steps_per_epoch * n_epochs - - # If training is set to resume - if resume: - latest_checkpoint_path = Checkpoint.get_latest_checkpoint(self.expt_dir) - resume_checkpoint = Checkpoint.load(latest_checkpoint_path) - model = resume_checkpoint.model - self.optimizer.set_parameters(model.parameters()) - self.optimizer.load_state_dict(resume_checkpoint.optimizer_state_dict) - start_epoch = resume_checkpoint.epoch - step = resume_checkpoint.step - else: - start_epoch = 1 - step = 0 - self.optimizer.set_parameters(model.parameters()) - - for epoch in range(start_epoch, n_epochs + 1): - data.shuffle(self.random_seed) - - batch_generator = data.make_batches(batch_size) - - # consuming seen batches from previous training - for _ in range((epoch - 1) * steps_per_epoch, step): - next(batch_generator) - - model.train(True) - for batch in batch_generator: - step += 1 - - input_variables = batch[0] - target_variables = batch[1] - - loss = self._train_batch(input_variables, target_variables, model, teacher_forcing_ratio) - - # Record average loss - print_loss_total += loss - - if step % self.print_every == 0: - print_loss_avg = print_loss_total / (self.print_every) - print_loss_total = 0 - log_msg = 'Time elapsed: %s, Progress: %d%%, Train %s: %.4f' % ( - pretty_interval(start), - float(step) / total_steps * 100, - self.loss.name, - print_loss_avg) - self.logger.info(log_msg) - - # Checkpoint - if step % self.checkpoint_every == 0 or step == total_steps: - Checkpoint(model=model, - optimizer_state_dict=self.optimizer.state_dict(), - epoch=epoch, step=step, - input_vocab=data.input_vocab, - output_vocab=data.output_vocab).save(self.expt_dir) - - log_msg = "Finished epoch {0}".format(epoch) - if dev_data is not None: - dev_loss = self.evaluator.evaluate(model, dev_data) - self.optimizer.update(dev_loss, epoch) - log_msg += ", Dev %s: %.4f" % (self.loss.name, dev_loss) - model.train(mode=True) - self.logger.info(log_msg) - -
[docs] def train(self, model, data, num_epochs=5, resume=False, dev_data=None, teacher_forcing_ratio=0): - """ Run training for a given model. - - Args: - model (seq2seq.models): model to run training on, if `resume=True`, it would be - overwritten by the model loaded from the latest checkpoint. - data (seq2seq.dataset.dataset.Dataset): dataset object to train on - num_epochs (int, optional): number of epochs to run (default 5) - resume(bool, optional): resume training with the latest checkpoint, (default False) - dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None) - teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0) - - """ - # Make Checkpoint Directories - data.input_vocab.save(self.input_vocab_file) - data.output_vocab.save(self.output_vocab_file) - - self._train_epoches(data, model, num_epochs, self.batch_size, - resume=resume, dev_data=dev_data, teacher_forcing_ratio=teacher_forcing_ratio)
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/util/checkpoint.html b/docs/public/_modules/seq2seq/util/checkpoint.html deleted file mode 100644 index cf3bf2b..0000000 --- a/docs/public/_modules/seq2seq/util/checkpoint.html +++ /dev/null @@ -1,353 +0,0 @@ - - - - - - - - - - - seq2seq.util.checkpoint — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.util.checkpoint
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.util.checkpoint

-import os
-import time
-import shutil
-
-import torch
-
-from seq2seq.dataset.vocabulary import Vocabulary
-
-
-
[docs]class Checkpoint(object): - """ - The Checkpoint class manages the saving and loading of a model during training. It allows training to be suspended - and resumed at a later time (e.g. when running on a cluster using sequential jobs). - - To make a checkpoint, initialize a Checkpoint object with the following args; then call that object's save() method - to write parameters to disk. - - Args: - model (seq2seq): seq2seq model being trained - optimizer_state_dict (dict): stores the state of the optimizer - epoch (int): current epoch (an epoch is a loop through the full training data) - step (int): number of examples seen within the current epoch - input_vocab (Vocabulary): vocabulary for the input language - output_vocab (Vocabulary): vocabulary for the output language - - Attributes: - CHECKPOINT_DIR_NAME (str): name of the checkpoint directory - TRAINER_STATE_NAME (str): name of the file storing trainer states - MODEL_NAME (str): name of the file storing model - INPUT_VOCAB_FILE (str): name of the input vocab file - OUTPUT_VOCAB_FILE (str): name of the output vocab file - """ - - CHECKPOINT_DIR_NAME = 'checkpoints' - TRAINER_STATE_NAME = 'trainer_states.pt' - MODEL_NAME = 'model.pt' - INPUT_VOCAB_FILE = 'input_vocab.pt' - OUTPUT_VOCAB_FILE = 'output_vocab.pt' - - def __init__(self, model, optimizer_state_dict, epoch, step, input_vocab, output_vocab, path=None): - self.model = model - self.optimizer_state_dict = optimizer_state_dict - self.input_vocab = input_vocab - self.output_vocab = output_vocab - self.epoch = epoch - self.step = step - self._path = path - - @property - def path(self): - if self._path is None: - raise LookupError("The checkpoint has not been saved.") - return self._path - -
[docs] def save(self, experiment_dir): - """ - Saves the current model and related training parameters into a subdirectory of the checkpoint directory. - The name of the subdirectory is the current local time in Y_M_D_H_M_S format. - Args: - experiment_dir (str): path to the experiment root directory - Returns: - str: path to the saved checkpoint subdirectory - """ - date_time = time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime()) - - self._path = os.path.join(experiment_dir, self.CHECKPOINT_DIR_NAME, date_time) - path = self._path - - if os.path.exists(path): - shutil.rmtree(path) - os.makedirs(path) - torch.save({'epoch': self.epoch, - 'step': self.step, - 'optimizer': self.optimizer_state_dict}, - os.path.join(path, self.TRAINER_STATE_NAME)) - torch.save(self.model, os.path.join(path, self.MODEL_NAME)) - - if not os.path.isfile(os.path.join(path, self.INPUT_VOCAB_FILE)): - self.input_vocab.save(os.path.join(path, self.INPUT_VOCAB_FILE)) - if not os.path.isfile(os.path.join(path, self.OUTPUT_VOCAB_FILE)): - self.output_vocab.save(os.path.join(path, self.OUTPUT_VOCAB_FILE)) - - return path
- -
[docs] @classmethod - def load(cls, path): - """ - Loads a Checkpoint object that was previously saved to disk. - Args: - path (str): path to the checkpoint subdirectory - Returns: - checkpoint (Checkpoint): checkpoint object with fields copied from those stored on disk - """ - print "Loading checkpoints from {}".format(path) - resume_checkpoint = torch.load(os.path.join(path, cls.TRAINER_STATE_NAME)) - model = torch.load(os.path.join(path, cls.MODEL_NAME)) - input_vocab = Vocabulary.load(os.path.join(path, cls.INPUT_VOCAB_FILE)) - output_vocab = Vocabulary.load(os.path.join(path, cls.OUTPUT_VOCAB_FILE)) - return Checkpoint(model=model, input_vocab=input_vocab, - output_vocab=output_vocab, - optimizer_state_dict=resume_checkpoint['optimizer'], - epoch=resume_checkpoint['epoch'], - step=resume_checkpoint['step'], - path=path)
- -
[docs] @classmethod - def get_latest_checkpoint(cls, experiment_path): - """ - Given the path to an experiment directory, returns the path to the last saved checkpoint's subdirectory. - - Precondition: at least one checkpoint has been made (i.e., latest checkpoint subdirectory exists). - Args: - experiment_path (str): path to the experiment directory - Returns: - str: path to the last saved checkpoint's subdirectory - """ - checkpoints_path = os.path.join(experiment_path, cls.CHECKPOINT_DIR_NAME) - all_times = sorted(os.listdir(checkpoints_path), reverse=True) - return os.path.join(checkpoints_path, all_times[0])
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/_modules/seq2seq/util/custom_time.html b/docs/public/_modules/seq2seq/util/custom_time.html deleted file mode 100644 index fe0c088..0000000 --- a/docs/public/_modules/seq2seq/util/custom_time.html +++ /dev/null @@ -1,280 +0,0 @@ - - - - - - - - - - - seq2seq.util.custom_time — pytorch-seq2seq 0.1.2 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - -
- - - - - - -
-
- - - - - - - - - - - - - - - - -
- -
    - -
  • Docs »
  • - -
  • Module code »
  • - -
  • seq2seq.util.custom_time
  • - - -
  • - - - -
  • - -
- - -
-
-
-
- -

Source code for seq2seq.util.custom_time

-import time
-
-
-
[docs]def pretty_interval(start_time): - """ - Given a previous point in time (e.g. the start of process, measured by calling time.time()), calculates - the time elapsed since that point, divides the elapsed time days/hours/minutes/seconds, and returns that summary - - Args: - start_time (float): the start of the interval of time, obtained by calling time.time() - Returns: - str: elapsed time in '##d ##h ##m ##s' format - """ - return pretty_time(time.time() - start_time)
- - -
[docs]def pretty_time(timespan_in_seconds): - """ - Given a length of time (measured in seconds), divides that timespan into - days/hours/minutes/seconds and returns that summary. - - Args: - timespan_in_seconds (float): the number of seconds in a span of time - - Returns: - str: timespan in '##d ##h ##m ##s' format - - - Examples:: - - >>> print seq2seq.util.custom_time.pretty_time(426753) - >>> 4d 22h 32m 33 - """ - seconds = abs(int(timespan_in_seconds)) - msg = [] - days, seconds = divmod(seconds, 86400) - if days > 0: - msg.append("%dd" % days) - hours, seconds = divmod(seconds, 3600) - if hours > 0: - msg.append("%dh" % hours) - minutes, seconds = divmod(seconds, 60) - if minutes > 0: - msg.append("%dm" % minutes) - msg.append("%ds" % seconds) - return " ".join(msg)
-
- -
-
- -
-
- - -
-
- -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/public/dataset.html b/docs/public/dataset.html index d53741c..e05c869 100644 --- a/docs/public/dataset.html +++ b/docs/public/dataset.html @@ -8,7 +8,7 @@ - Dataset — pytorch-seq2seq 0.1.5 documentation + Dataset — pytorch-seq2seq 0.1.6 documentation @@ -35,7 +35,7 @@ - + @@ -66,7 +66,7 @@
- 0.1.5 + 0.1.6
@@ -260,7 +260,7 @@

Dataset var DOCUMENTATION_OPTIONS = { URL_ROOT:'./', - VERSION:'0.1.5', + VERSION:'0.1.6', COLLAPSE_INDEX:false, FILE_SUFFIX:'.html', HAS_SOURCE: true, diff --git a/docs/public/evaluator.html b/docs/public/evaluator.html index 6a97d4a..dd947a3 100644 --- a/docs/public/evaluator.html +++ b/docs/public/evaluator.html @@ -8,7 +8,7 @@ - Evaluator — pytorch-seq2seq 0.1.5 documentation + Evaluator — pytorch-seq2seq 0.1.6 documentation @@ -35,7 +35,7 @@ - + @@ -66,7 +66,7 @@
- 0.1.5 + 0.1.6
@@ -184,7 +184,7 @@

evaluator Parameters: @@ -207,7 +207,7 @@

evaluatorReturns:

loss of the given model on the given dataset

-Return type:

loss (float)

+Return type:

loss (float)

@@ -223,6 +223,11 @@

evaluator class seq2seq.evaluator.predictor.Predictor(model, src_vocab, tgt_vocab)
+
+get_decoder_features(src_seq)
+
+ +
predict(src_seq)

Make prediction given src_seq as input.

@@ -230,12 +235,42 @@

evaluator -Parameters:src_seq (list) – list of tokens in source language +Parameters:src_seq (list) – list of tokens in source language Returns:list of tokens in target language as predicted by the pre-trained model -Return type:tgt_seq (list) +Return type:tgt_seq (list) + + + +

+ +
+
+predict_n(src_seq, n=1)
+

Make ‘n’ predictions given src_seq as input.

+ +++ + + + + +
Parameters:
    +
  • src_seq (list) – list of tokens in source language
  • +
  • n (int) – number of predicted seqs to return. If None, +it will return just one seq.
  • +
+
Returns:

+
list of tokens in target language as predicted
+

by the pre-trained model

+
+
+

+
Return type:

tgt_seq (list)

+
@@ -290,7 +325,7 @@

evaluator var DOCUMENTATION_OPTIONS = { URL_ROOT:'./', - VERSION:'0.1.5', + VERSION:'0.1.6', COLLAPSE_INDEX:false, FILE_SUFFIX:'.html', HAS_SOURCE: true, diff --git a/docs/public/genindex.html b/docs/public/genindex.html index fdabc4f..6b59af9 100644 --- a/docs/public/genindex.html +++ b/docs/public/genindex.html @@ -9,7 +9,7 @@ - Index — pytorch-seq2seq 0.1.5 documentation + Index — pytorch-seq2seq 0.1.6 documentation @@ -36,7 +36,7 @@ - + @@ -65,7 +65,7 @@
- 0.1.5 + 0.1.6
@@ -257,10 +257,12 @@

F

G

@@ -324,6 +326,8 @@

P

  • predict() (seq2seq.evaluator.predictor.Predictor method) +
  • +
  • predict_n() (seq2seq.evaluator.predictor.Predictor method)
  • Predictor (class in seq2seq.evaluator.predictor)
  • @@ -450,7 +454,7 @@

    U

    @@ -64,7 +64,7 @@
    - 0.1.5 + 0.1.6
    @@ -208,7 +208,7 @@