Merge pull request #259 from rsokl/1.7-fixes

Fix constant=True for nll loss. Fix docstring sections
rsokl · Jul 11, 2020 · 2758263 · 2758263
2 parents e99646f + f45fae0
commit 2758263
Show file tree

Hide file tree

Showing 9 changed files with 91 additions and 26 deletions.
diff --git a/src/mygrad/nnet/activations/glu.py b/src/mygrad/nnet/activations/glu.py
@@ -1,6 +1,7 @@
 from numpy import ndarray
 
-from mygrad import multiply, Tensor
+from mygrad import Tensor, multiply
+
 from .sigmoid import sigmoid
 
 
@@ -24,8 +25,8 @@ def glu(x, axis=-1, constant=False):
     mygrad.Tensor
         The result of applying the  Gated Linear Unit elementwise to the input.
 
-    Extended Description
-    --------------------
+    Notes
+    -----
     The Gated Linear Unit was proposed in the paper
         "Language Modeling with Gated Convolutional Networks"
         Yann Dauphin, Angela Fan, Michael Auli, David Grangier

diff --git a/src/mygrad/nnet/initializers/glorot_normal.py b/src/mygrad/nnet/initializers/glorot_normal.py
@@ -27,8 +27,8 @@ def glorot_normal(*shape, gain=1, dtype=np.float32, constant=False):
     mygrad.Tensor, shape=`shape`
         A Tensor, with values initialized according to the glorot normal initialization.
 
-    Extended Description
-    --------------------
+    Notes
+    -----
     Glorot and Bengio put forward this initialization in the paper
         "Understanding the Difficulty of Training Deep Feedforward Neural Networks"
     http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf

diff --git a/src/mygrad/nnet/initializers/glorot_uniform.py b/src/mygrad/nnet/initializers/glorot_uniform.py
@@ -27,8 +27,8 @@ def glorot_uniform(*shape, gain=1, dtype=np.float32, constant=False):
     mygrad.Tensor, shape=`shape`
         A Tensor, with values initialized according to the glorot uniform initialization.
 
-    Extended Description
-    --------------------
+    Notes
+    -----
     Glorot and Bengio put forward this initialization in the paper
         "Understanding the Difficulty of Training Deep Feedforward Neural Networks"
     http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf

diff --git a/src/mygrad/nnet/initializers/he_normal.py b/src/mygrad/nnet/initializers/he_normal.py
@@ -27,8 +27,8 @@ def he_normal(*shape, gain=1, dtype=np.float32, constant=False):
     mygrad.Tensor, shape=``shape``
         A Tensor, with values initialized according to the He normal initialization.
 
-    Extended Description
-    --------------------
+    Notes
+    -----
     He, Zhang, Ren, and Sun put forward this initialization in the paper
         "Delving Deep into Rectifiers: Surpassing Human-Level Performance
         on ImageNet Classification"

diff --git a/src/mygrad/nnet/initializers/he_uniform.py b/src/mygrad/nnet/initializers/he_uniform.py
@@ -27,8 +27,8 @@ def he_uniform(*shape, gain=1, dtype=np.float32, constant=False):
     mygrad.Tensor, shape=``shape``
         A Tensor, with values initialized according to the He uniform initialization.
 
-    Extended Description
-    --------------------
+    Notes
+    -----
     He, Zhang, Ren, and Sun put forward this initialization in the paper
         "Delving Deep into Rectifiers: Surpassing Human-Level Performance
         on ImageNet Classification"

diff --git a/src/mygrad/nnet/losses/__init__.py b/src/mygrad/nnet/losses/__init__.py
@@ -8,6 +8,7 @@
     "focal_loss",
     "margin_ranking_loss",
     "multiclass_hinge",
+    "negative_log_likelihood",
     "softmax_crossentropy",
     "softmax_focal_loss",
 ]
diff --git a/src/mygrad/nnet/losses/negative_log_likelihood.py b/src/mygrad/nnet/losses/negative_log_likelihood.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from mygrad import Tensor, mean
+
 from ._utils import check_loss_inputs
 
 
@@ -74,4 +75,4 @@ def negative_log_likelihood(x, y_true, *, weights=None, constant=False):
 
     label_locs = (range(len(y_true)), y_true)
     factors = weights[y_true]
-    return -mean(x[label_locs] * factors)
+    return -mean(x[label_locs] * factors, constant=constant)
diff --git a/tests/nnet/losses/test_negative_log_likelihood.py b/tests/nnet/losses/test_negative_log_likelihood.py
@@ -9,25 +9,88 @@
 import mygrad as mg
 from mygrad.nnet.losses import negative_log_likelihood, softmax_crossentropy
 from mygrad.tensor_base import Tensor
+from tests.wrappers.uber import backprop_test_factory, fwdprop_test_factory
 
 
 @pytest.mark.parametrize(
     ("data", "labels", "weights"),
     [
         (np.ones((2,), dtype=float), np.zeros((2,), dtype=int), None),  # 1D data
-        (np.ones((2, 1), dtype=float), np.zeros((2,), dtype=float), None),  # non-int labels
-        (np.ones((2, 1), dtype=float), np.zeros((2, 1), dtype=int), None),  # bad label-ndim
-        (np.ones((2, 1), dtype=float), np.zeros((3,), dtype=int), None),  # bad label-shape
-        (np.ones((2, 2), dtype=float), np.zeros((2,), dtype=int), np.ones((1,)))  # bad weight shape
+        (
+            np.ones((2, 1), dtype=float),
+            np.zeros((2,), dtype=float),
+            None,
+        ),  # non-int labels
+        (
+            np.ones((2, 1), dtype=float),
+            np.zeros((2, 1), dtype=int),
+            None,
+        ),  # bad label-ndim
+        (
+            np.ones((2, 1), dtype=float),
+            np.zeros((3,), dtype=int),
+            None,
+        ),  # bad label-shape
+        (
+            np.ones((2, 2), dtype=float),
+            np.zeros((2,), dtype=int),
+            np.ones((1,)),
+        ),  # bad weight shape
     ],
 )
 def test_input_validation(data, labels, weights):
     with raises((ValueError, TypeError)):
         negative_log_likelihood(data, labels, weights=weights)
 
 
+def numpy_negative_log_likelihood(x, y_true, weights=None):
+    if weights is None:
+        weights = np.ones(x.shape[1])
+    label_locs = (range(len(y_true)), y_true)
+    factors = weights[y_true]
+    return -np.mean(x[label_locs] * factors)
+
+
+def get_nll_args(*arrs):
+    (s,) = arrs
+    y_true = hnp.arrays(
+        shape=(s.shape[0],),
+        dtype=hnp.integer_dtypes(),
+        elements=st.integers(min_value=0, max_value=s.shape[1] - 1),
+    )
+    weights = st.none() | hnp.arrays(
+        shape=(s.shape[1],), dtype=float, elements=st.floats(1e-8, 100),
+    )
+    return st.fixed_dictionaries(dict(y_true=y_true, weights=weights))
+
+
+@fwdprop_test_factory(
+    num_arrays=1,
+    index_to_arr_shapes={0: hnp.array_shapes(min_dims=2, max_dims=2)},
+    mygrad_func=negative_log_likelihood,
+    true_func=numpy_negative_log_likelihood,
+    kwargs=get_nll_args,
+)
+def test_nll_fwd():
+    pass
+
+
+@backprop_test_factory(
+    num_arrays=1,
+    index_to_arr_shapes={0: hnp.array_shapes(min_dims=2, max_dims=2)},
+    mygrad_func=negative_log_likelihood,
+    true_func=numpy_negative_log_likelihood,
+    kwargs=get_nll_args,
+    vary_each_element=True,
+)
+def test_nll_bkwd():
+    pass
+
+
 @given(data=st.data(), labels_as_tensor=st.booleans())
-def test_negative_log_likelihood(data: st.DataObject, labels_as_tensor: bool):
+def test_negative_log_likelihood_vs_softmax_cross_entropy(
+    data: st.DataObject, labels_as_tensor: bool
+):
     s = data.draw(
         hnp.arrays(
             shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2),
@@ -55,7 +118,9 @@ def test_negative_log_likelihood(data: st.DataObject, labels_as_tensor: bool):
 
 
 @given(data=st.data(), labels_as_tensor=st.booleans())
-def test_weighted_negative_log_likelihood(data: st.DataObject, labels_as_tensor: bool):
+def test_weighted_negative_log_likelihood_vs_softmax_cross_entropy(
+    data: st.DataObject, labels_as_tensor: bool
+):
     s = data.draw(
         hnp.arrays(
             shape=hnp.array_shapes(min_side=1, max_side=10, min_dims=2, max_dims=2),
@@ -71,11 +136,7 @@ def test_weighted_negative_log_likelihood(data: st.DataObject, labels_as_tensor:
         ).map(Tensor if labels_as_tensor else lambda x: x)
     )
     weights = data.draw(
-        hnp.arrays(
-            shape=(s.shape[1],),
-            dtype=float,
-            elements=st.floats(1e-8, 100),
-        )
+        hnp.arrays(shape=(s.shape[1],), dtype=float, elements=st.floats(1e-8, 100),)
     )
     scores = Tensor(s)
     weights = Tensor(weights)

diff --git a/tests/wrappers/uber.py b/tests/wrappers/uber.py
@@ -308,9 +308,10 @@ def wrapper(shapes: hnp.BroadcastableShapes, constant, data: st.DataObject):
             assert isinstance(
                 o, Tensor
             ), f"`mygrad_func` returned type {type(o)}, should return `mygrad.Tensor`"
-            assert o.constant is constant or bool(
-                sum(tensor_constants)
-            ), f"`mygrad_func` returned tensor.constant={o.constant}, should be constant={ bool(sum(tensor_constants))}"
+            assert o.constant is constant or bool(sum(tensor_constants)), (
+                f"`mygrad_func` returned tensor.constant={o.constant}, "
+                f"should be constant={constant or  bool(sum(tensor_constants))}"
+            )
 
             assert_allclose(
                 actual=tensor_out,