Merge internal changes (#283)

Summary: Pull Request resolved: pytorch/translate#283 Pull Request resolved: facebookresearch/fairseq#428 Differential Revision: D13564190 Pulled By: myleott fbshipit-source-id: 3b62282d7069c288f5bdd1dd2c120788cee4abb5
yzpang · Feb 19, 2021 · c90abbc · c90abbc
1 parent bebdd46
commit c90abbc
Show file tree

Hide file tree

Showing 28 changed files with 248 additions and 239 deletions.
diff --git a/distributed_train.py b/distributed_train.py
diff --git a/docs/criterions.rst b/docs/criterions.rst
@@ -6,8 +6,26 @@
 Criterions
 ==========
 
+Criterions compute the loss function given the model and batch, roughly::
+
+  loss = criterion(model, batch)
+
 .. automodule:: fairseq.criterions
     :members:
+
 .. autoclass:: fairseq.criterions.FairseqCriterion
     :members:
     :undoc-members:
+
+.. autoclass:: fairseq.criterions.adaptive_loss.AdaptiveLoss
+    :members:
+    :undoc-members:
+.. autoclass:: fairseq.criterions.composite_loss.CompositeLoss
+    :members:
+    :undoc-members:
+.. autoclass:: fairseq.criterions.cross_entropy.CrossEntropyCriterion
+    :members:
+    :undoc-members:
+.. autoclass:: fairseq.criterions.label_smoothed_cross_entropy.LabelSmoothedCrossEntropyCriterion
+    :members:
+    :undoc-members:
diff --git a/docs/data.rst b/docs/data.rst
@@ -21,6 +21,20 @@ mini-batches.
 .. autoclass:: fairseq.data.MonolingualDataset
     :members:
 
+**Helper Datasets**
+
+These datasets wrap other :class:`fairseq.data.FairseqDataset` instances and
+provide additional functionality:
+
+.. autoclass:: fairseq.data.BacktranslationDataset
+    :members:
+.. autoclass:: fairseq.data.ConcatDataset
+    :members:
+.. autoclass:: fairseq.data.RoundRobinZipDatasets
+    :members:
+.. autoclass:: fairseq.data.TransformEosDataset
+    :members:
+
 
 Dictionary
 ----------
@@ -32,6 +46,8 @@ Dictionary
 Iterators
 ---------
 
+.. autoclass:: fairseq.data.BufferedIterator
+    :members:
 .. autoclass:: fairseq.data.CountingIterator
     :members:
 .. autoclass:: fairseq.data.EpochBatchIterator

diff --git a/docs/getting_started.rst b/docs/getting_started.rst
@@ -27,21 +27,20 @@ interactively. Here, we use a beam size of 5:
     > MODEL_DIR=wmt14.en-fr.fconv-py
     > python interactive.py \
         --path $MODEL_DIR/model.pt $MODEL_DIR \
-        --beam 5
+        --beam 5 --source-lang en --target-lang fr
     | loading model(s) from wmt14.en-fr.fconv-py/model.pt
     | [en] dictionary: 44206 types
     | [fr] dictionary: 44463 types
     | Type the input sentence and press return:
     > Why is it rare to discover new marine mam@@ mal species ?
     O       Why is it rare to discover new marine mam@@ mal species ?
-    H       -0.06429661810398102    Pourquoi est-il rare de découvrir de nouvelles espèces de mammifères marins ?
-    A       0 1 3 3 5 6 6 8 8 8 7 11 12
-
-This generation script produces four types of outputs: a line prefixed
-with *S* shows the supplied source sentence after applying the
-vocabulary; *O* is a copy of the original source sentence; *H* is the
-hypothesis along with an average log-likelihood; and *A* is the
-attention maxima for each word in the hypothesis, including the
+    H       -0.1525060087442398     Pourquoi est @-@ il rare de découvrir de nouvelles espèces de mammifères marins ?
+    P       -0.2221 -0.3122 -0.1289 -0.2673 -0.1711 -0.1930 -0.1101 -0.1660 -0.1003 -0.0740 -0.1101 -0.0814 -0.1238 -0.0985 -0.1288
+
+This generation script produces three types of outputs: a line prefixed
+with *O* is a copy of the original source sentence; *H* is the
+hypothesis along with an average log-likelihood; and *P* is the
+positional score per token position, including the
 end-of-sentence marker which is omitted from the text.
 
 See the `README <https://github.com/pytorch/fairseq#pre-trained-models>`__ for a

diff --git a/docs/lr_scheduler.rst b/docs/lr_scheduler.rst
@@ -6,7 +6,29 @@
 Learning Rate Schedulers
 ========================
 
-TODO
+Learning Rate Schedulers update the learning rate over the course of training.
+Learning rates can be updated after each update via :func:`step_update` or at
+epoch boundaries via :func:`step`.
 
 .. automodule:: fairseq.optim.lr_scheduler
     :members:
+
+.. autoclass:: fairseq.optim.lr_scheduler.FairseqLRScheduler
+    :members:
+    :undoc-members:
+
+.. autoclass:: fairseq.optim.lr_scheduler.cosine_lr_scheduler.CosineSchedule
+    :members:
+    :undoc-members:
+.. autoclass:: fairseq.optim.lr_scheduler.fixed_schedule.FixedSchedule
+    :members:
+    :undoc-members:
+.. autoclass:: fairseq.optim.lr_scheduler.inverse_square_root_schedule.InverseSquareRootSchedule
+    :members:
+    :undoc-members:
+.. autoclass:: fairseq.optim.lr_scheduler.reduce_lr_on_plateau.ReduceLROnPlateau
+    :members:
+    :undoc-members:
+.. autoclass:: fairseq.optim.lr_scheduler.reduce_angular_lr_scheduler.TriangularSchedule
+    :members:
+    :undoc-members:
diff --git a/docs/modules.rst b/docs/modules.rst
@@ -1,8 +1,8 @@
 Modules
 =======
 
-Fairseq provides several stand-alone :class:`torch.nn.Module` s that may be
-helpful when implementing a new :class:`FairseqModel`.
+Fairseq provides several stand-alone :class:`torch.nn.Module` classes that may
+be helpful when implementing a new :class:`~fairseq.models.FairseqModel`.
 
 .. automodule:: fairseq.modules
     :members:

diff --git a/docs/optim.rst b/docs/optim.rst
@@ -6,5 +6,27 @@
 Optimizers
 ==========
 
+Optimizers update the Model parameters based on the gradients.
+
 .. automodule:: fairseq.optim
     :members:
+
+.. autoclass:: fairseq.optim.FairseqOptimizer
+    :members:
+    :undoc-members:
+
+.. autoclass:: fairseq.optim.adagrad.Adagrad
+    :members:
+    :undoc-members:
+.. autoclass:: fairseq.optim.adam.FairseqAdam
+    :members:
+    :undoc-members:
+.. autoclass:: fairseq.optim.fp16_optimizer.FP16Optimizer
+    :members:
+    :undoc-members:
+.. autoclass:: fairseq.optim.nag.FairseqNAG
+    :members:
+    :undoc-members:
+.. autoclass:: fairseq.optim.sgd.SGD
+    :members:
+    :undoc-members:
diff --git a/docs/overview.rst b/docs/overview.rst
@@ -22,12 +22,18 @@ fairseq implements the following high-level training flow::
   for epoch in range(num_epochs):
       itr = task.get_batch_iterator(task.dataset('train'))
       for num_updates, batch in enumerate(itr):
-          loss = criterion(model, batch)
-          optimizer.backward(loss)
+          task.train_step(batch, model, criterion, optimizer)
+          average_and_clip_gradients()
           optimizer.step()
           lr_scheduler.step_update(num_updates)
       lr_scheduler.step(epoch)
 
+where the default implementation for ``train.train_step`` is roughly::
+
+  def train_step(self, batch, model, criterion, optimizer):
+      loss = criterion(model, batch)
+      optimizer.backward(loss)
+
 **Registering new plug-ins**
 
 New plug-ins are *registered* through a set of ``@register`` function

diff --git a/docs/tutorial_classifying_names.rst b/docs/tutorial_classifying_names.rst
@@ -353,17 +353,16 @@ The model files should appear in the :file:`checkpoints/` directory.
 -------------------------------
 
 Finally we can write a short script to evaluate our model on new inputs. Create
-a new file named :file:`eval_classify.py` with the following contents::
+a new file named :file:`eval_classifier.py` with the following contents::
 
   from fairseq import data, options, tasks, utils
   from fairseq.tokenizer import Tokenizer
 
   # Parse command-line arguments for generation
-  parser = options.get_generation_parser()
+  parser = options.get_generation_parser(default_task='simple_classification')
   args = options.parse_args_and_arch(parser)
 
   # Setup task
-  args.task = 'simple_classification'
   task = tasks.setup_task(args)
 
   # Load model

diff --git a/eval_lm.py b/eval_lm.py
@@ -55,7 +55,9 @@ def main(parsed_args):
 
     # Load ensemble
     print('| loading model(s) from {}'.format(parsed_args.path))
-    models, args = utils.load_ensemble_for_inference(parsed_args.path.split(':'), task, model_arg_overrides=eval(parsed_args.model_overrides))
+    models, args = utils.load_ensemble_for_inference(
+        parsed_args.path.split(':'), task, model_arg_overrides=eval(parsed_args.model_overrides),
+    )
 
     for arg in vars(parsed_args).keys():
         if arg not in {'self_target', 'future_target', 'past_target', 'tokens_per_sample', 'output_size_dictionary'}:
@@ -83,9 +85,10 @@ def main(parsed_args):
         max_positions=utils.resolve_max_positions(*[
             model.max_positions() for model in models
         ]),
+        ignore_invalid_inputs=True,
         num_shards=args.num_shards,
         shard_id=args.shard_id,
-        ignore_invalid_inputs=True,
+        num_workers=args.num_workers,
     ).next_epoch_itr(shuffle=False)
 
     gen_timer = StopwatchMeter()

diff --git a/fairseq/data/fairseq_dataset.py b/fairseq/data/fairseq_dataset.py
@@ -6,8 +6,6 @@
 import numpy as np
 import torch.utils.data
 
-from fairseq.data import data_utils
-
 
 class EpochListening:
     """Mixin for receiving updates whenever the epoch increments."""
@@ -75,7 +73,9 @@ def __iter__(self):
 
     @property
     def supports_prefetch(self):
+        """Whether this dataset supports prefetching."""
         return False
 
     def prefetch(self, indices):
+        """Prefetch the data required for this epoch."""
         raise NotImplementedError
diff --git a/fairseq/data/indexed_dataset.py b/fairseq/data/indexed_dataset.py
@@ -111,7 +111,7 @@ class IndexedDataset(FairseqDataset):
     """Loader for TorchNet IndexedDataset"""
     _HDR_MAGIC = b'TNTIDX\x00\x00'
 
-    def __init__(self, path, fix_lua_indexing=False, read_data=True):
+    def __init__(self, path, fix_lua_indexing=False):
         super().__init__()
         self.path = path
         self.fix_lua_indexing = fix_lua_indexing
@@ -150,7 +150,7 @@ def __getitem__(self, i):
         if not self.data_file:
             self.read_data(self.path)
         self.check_index(i)
-        tensor_size = self.sizes[self.dim_offsets[i]:self.dim_offsets[i + 1]]
+        tensor_size = int(self.sizes[self.dim_offsets[i]:self.dim_offsets[i + 1]])
         a = np.empty(tensor_size, dtype=self.dtype)
         self.data_file.seek(self.data_offsets[i] * self.element_size)
         self.data_file.readinto(a)
@@ -168,12 +168,6 @@ def num_tokens(self, index):
     def size(self, index):
         return self.sizes[index]
 
-    def read_into(self, start, dst):
-        self.data_file.seek(start * self.element_size)
-        self.data_file.readinto(dst)
-        if self.fix_lua_indexing:
-            dst -= 1  # subtract 1 for 0-based indexing
-
     @staticmethod
     def exists(path):
         return (

diff --git a/fairseq/data/iterators.py b/fairseq/data/iterators.py
@@ -347,40 +347,6 @@ def __next__(self):
         return chunk
 
 
-class BufferedIterator(object):
-    """Wrapper around an iterable that prefetches items into a buffer.
-
-    Args:
-        iterable (iterable): iterable to wrap
-        buffer_size (int): number of items to prefetch and buffer
-    """
-
-    def __init__(self, iterable, buffer_size):
-        self.iterable = iterable
-
-        self.q = queue.Queue(maxsize=buffer_size)
-        self.thread = threading.Thread(target=self._load_q, daemon=True)
-        self.thread.start()
-
-    def __len__(self):
-        return len(self.iterable)
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        x = self.q.get()
-        if x is None:
-            self.thread.join()
-            raise StopIteration
-        return x[0]
-
-    def _load_q(self):
-        for x in self.iterable:
-            self.q.put([x])  # wrap in list so that it's never None
-        self.q.put(None)
-
-
 class GroupedIterator(object):
     """Wrapper around an iterable that returns groups (chunks) of items.
 

diff --git a/fairseq/data/monolingual_dataset.py b/fairseq/data/monolingual_dataset.py
@@ -7,7 +7,6 @@
 import torch
 
 from . import data_utils, FairseqDataset
-from typing import List
 
 
 def collate(samples, pad_idx, eos_idx):
@@ -76,8 +75,8 @@ def __init__(self, dataset, sizes, src_vocab, tgt_vocab, add_eos_for_other_targe
             targets = None
         self.targets = targets
 
-        assert targets is None or all(
-            t in {'self', 'future', 'past'} for t in targets), "targets must be none or one of 'self', 'future', 'past'"
+        assert targets is None or all(t in {'self', 'future', 'past'} for t in targets), \
+            "targets must be none or one of 'self', 'future', 'past'"
         if targets is not None and len(targets) == 0:
             targets = None
         self.targets = targets