Merge changes for version 0.8.0

Trusted-AI · Apr 30, 2019 · 403e67c · 403e67c
2 parents c40e632 + 0e0b7bf
commit 403e67c
Show file tree

Hide file tree

Showing 57 changed files with 2,003 additions and 780 deletions.
diff --git a/.gitignore b/.gitignore
@@ -104,3 +104,6 @@ demo/pics/*
 *.npz
 *.ipynb
 .DS_Store
+
+# Exceptions for notebooks/
+!notebooks/*.ipynb
diff --git a/README.md b/README.md
@@ -21,6 +21,8 @@ The library contains implementations of the following **evasion attacks**:
 * Spatial transformations attack ([Engstrom et al., 2017](https://arxiv.org/abs/1712.02779))
 * Query-efficient black-box attack ([Ilyas et al., 2017](https://arxiv.org/abs/1712.07113))
 * Zeroth-order optimization attack ([Chen et al., 2017](https://arxiv.org/abs/1708.03999))
+* Decision-based attack ([Brendel et al., 2018](https://arxiv.org/abs/1712.04248))
+* Adversarial patch ([Brown et al., 2017](https://arxiv.org/abs/1712.09665))
 
 The following **defence** methods are also supported:
 * Feature squeezing ([Xu et al., 2017](http://arxiv.org/abs/1704.01155))

diff --git a/art/attacks/__init__.py b/art/attacks/__init__.py
@@ -1,18 +1,18 @@
 """
 Module providing adversarial attacks under a common interface.
 """
+from art.attacks.adversarial_patch import AdversarialPatch
 from art.attacks.attack import Attack
+from art.attacks.boundary import BoundaryAttack
 from art.attacks.carlini import CarliniL2Method, CarliniLInfMethod
 from art.attacks.deepfool import DeepFool
+from art.attacks.elastic_net import ElasticNet
 from art.attacks.fast_gradient import FastGradientMethod
 from art.attacks.iterative_method import BasicIterativeMethod
 from art.attacks.newtonfool import NewtonFool
 from art.attacks.projected_gradient_descent import ProjectedGradientDescent
 from art.attacks.saliency_map import SaliencyMapMethod
+from art.attacks.spatial_transformation import SpatialTransformation
 from art.attacks.universal_perturbation import UniversalPerturbation
 from art.attacks.virtual_adversarial import VirtualAdversarialMethod
-from art.attacks.elastic_net import ElasticNet
-from art.attacks.spatial_transformation import SpatialTransformation
-from art.attacks.boundary import Boundary
 from art.attacks.zoo import ZooAttack
-
diff --git a/art/attacks/adversarial_patch.py b/art/attacks/adversarial_patch.py
diff --git a/art/attacks/adversarial_patch_unittest.py b/art/attacks/adversarial_patch_unittest.py
@@ -0,0 +1,124 @@
+# MIT License
+#
+# Copyright (C) IBM Corporation 2018
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import logging
+import unittest
+
+import keras.backend as k
+import numpy as np
+import tensorflow as tf
+
+from art.attacks import AdversarialPatch
+from art.utils import load_mnist, master_seed, get_classifier_tf, get_classifier_kr, get_classifier_pt
+
+logger = logging.getLogger('testLogger')
+
+BATCH_SIZE = 10
+NB_TRAIN = 10
+NB_TEST = 10
+
+
+class TestAdversarialPatch(unittest.TestCase):
+    """
+    A unittest class for testing Adversarial Patch attack.
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        # Get MNIST
+        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
+        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
+        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]
+        cls.mnist = (x_train, y_train), (x_test, y_test)
+
+    def setUp(self):
+        # Set master seed
+        master_seed(1234)
+
+    def test_tfclassifier(self):
+        """
+        First test with the TFClassifier.
+        :return:
+        """
+        # Build TFClassifier
+        tfc, sess = get_classifier_tf()
+
+        # Get MNIST
+        (x_train, _), (_, _) = self.mnist
+
+        # Attack
+        attack_params = {"rotation_max": 22.5, "scale_min": 0.1, "scale_max": 1.0,
+                         "learning_rate": 5.0, "number_of_steps": 5, "patch_shape": (28, 28, 1), "batch_size": 10}
+        attack_ap = AdversarialPatch(tfc)
+        patch_adv, _ = attack_ap.generate(x_train, **attack_params)
+
+        self.assertTrue(patch_adv[8, 8, 0] - (-3.1106631027725005) < 0.01)
+        self.assertTrue(patch_adv[14, 14, 0] - 18.954278294246386 < 0.01)
+        self.assertTrue(np.sum(patch_adv) - 794.2447019737851 < 0.01)
+
+        sess.close()
+        tf.reset_default_graph()
+
+    def test_krclassifier(self):
+        """
+        Second test with the KerasClassifier.
+        :return:
+        """
+        # Build KerasClassifier
+        krc, _ = get_classifier_kr()
+
+        # Get MNIST
+        (x_train, _), (_, _) = self.mnist
+
+        # Attack
+        attack_params = {"rotation_max": 22.5, "scale_min": 0.1, "scale_max": 1.0,
+                         "learning_rate": 5.0, "number_of_steps": 5, "patch_shape": (28, 28, 1), "batch_size": 10}
+        attack_ap = AdversarialPatch(krc)
+        patch_adv, _ = attack_ap.generate(x_train, **attack_params)
+
+        self.assertTrue(patch_adv[8, 8, 0] - (-3.2501425017774923) < 0.01)
+        self.assertTrue(patch_adv[14, 14, 0] - 20.48400094881169 < 0.01)
+        self.assertTrue(np.sum(patch_adv) - 1764.7681744376168 < 0.01)
+
+        k.clear_session()
+
+    def test_ptclassifier(self):
+        """
+        Third test with the PyTorchClassifier.
+        :return:
+        """
+        # Build PyTorchClassifier
+        ptc = get_classifier_pt()
+
+        # Get MNIST
+        (x_train, _), (_, _) = self.mnist
+        x_train = np.swapaxes(x_train, 1, 3)
+
+        # Attack
+        attack_params = {"rotation_max": 22.5, "scale_min": 0.1, "scale_max": 1.0,
+                         "learning_rate": 5.0, "number_of_steps": 5, "patch_shape": (1, 28, 28), "batch_size": 10}
+        attack_ap = AdversarialPatch(ptc)
+        patch_adv, _ = attack_ap.generate(x_train, **attack_params)
+
+        self.assertTrue(patch_adv[0, 8, 8] - (-3.1423605902784875) < 0.01)
+        self.assertTrue(patch_adv[0, 14, 14] - 19.790434152473054 < 0.01)
+        self.assertTrue(np.sum(patch_adv) - 383.5670772794207 < 0.01)
+
+    if __name__ == '__main__':
+        unittest.main()
diff --git a/art/attacks/boundary.py b/art/attacks/boundary.py
@@ -26,7 +26,7 @@
 logger = logging.getLogger(__name__)
 
 
-class Boundary(Attack):
+class BoundaryAttack(Attack):
     """
     Implementation of the boundary attack from Wieland Brendel et al. (2018).
     Paper link: https://arxiv.org/abs/1712.04248
@@ -37,7 +37,7 @@ class Boundary(Attack):
     def __init__(self, classifier, targeted=True, delta=0.01, epsilon=0.01, step_adapt=0.9, max_iter=100,
                  sample_size=20, init_size=100):
         """
-        Create a Boundary attack instance.
+        Create a boundary attack instance.
 
         :param classifier: A trained model.
         :type classifier: :class:`.Classifier`
@@ -56,7 +56,7 @@ def __init__(self, classifier, targeted=True, delta=0.01, epsilon=0.01, step_ada
         :param init_size: Maximum number of trials for initial generation of adversarial examples.
         :type init_size: `int`
         """
-        super(Boundary, self).__init__(classifier=classifier)
+        super(BoundaryAttack, self).__init__(classifier=classifier)
         params = {'targeted': targeted,
                   'delta': delta,
                   'epsilon': epsilon,
@@ -116,41 +116,39 @@ def generate(self, x, **kwargs):
 
             x_adv[ind] = x_
 
-        preds_adv = np.argmax(self.classifier.predict(x_adv), axis=1)
-        logger.info('Success rate of Boundary attack: %.2f%%', (np.sum(preds != preds_adv) / x.shape[0]))
+        logger.info('Success rate of Boundary attack: %.2f%%',
+                    (np.sum(preds != np.argmax(self.classifier.predict(x_adv), axis=1)) / x.shape[0]))
 
         return x_adv
 
     def _perturb(self, x, y, y_p):
         """
-        Internal attack function for 1 example.
+        Internal attack function for one example.
 
-        :param x: An array with 1 original input to be attacked.
+        :param x: An array with one original input to be attacked.
         :type x: `np.ndarray`
         :param y: If `self.targeted` is true, then `y` represents the target label.
         :type y: `int`
         :param y_p: The predicted label of x.
         :type y_p: `int`
         :return: an adversarial example.
         """
-        clip_min, clip_max = self.classifier.clip_values
-
         # First, create an initial adversarial sample
-        initial_sample = self._init_sample(x, y, y_p, clip_min, clip_max)
+        initial_sample = self._init_sample(x, y, y_p)
 
         # If an initial adversarial example is not found, then return the original image
         if initial_sample is None:
             return x
 
         # If an initial adversarial example found, then go with boundary attack
         if self.targeted:
-            x_adv = self._attack(initial_sample, x, y, self.delta, self.epsilon, clip_min, clip_max)
+            x_adv = self._attack(initial_sample, x, y, self.delta, self.epsilon)
         else:
-            x_adv = self._attack(initial_sample, x, y_p, self.delta, self.epsilon, clip_min, clip_max)
+            x_adv = self._attack(initial_sample, x, y_p, self.delta, self.epsilon)
 
         return x_adv
 
-    def _attack(self, initial_sample, original_sample, target, initial_delta, initial_epsilon, clip_min, clip_max):
+    def _attack(self, initial_sample, original_sample, target, initial_delta, initial_epsilon):
         """
         Main function for the boundary attack.
 
@@ -165,16 +163,14 @@ def _attack(self, initial_sample, original_sample, target, initial_delta, initia
         :type initial_delta: `float`
         :param initial_epsilon: Initial step size for the step towards the target.
         :type initial_epsilon: `float`
-        :param clip_min: minimum value of x.
-        :type clip_min: `float`
-        :param clip_max: maximum value of x.
-        :type clip_max: `float`
         :return: an adversarial example.
+        :rtype: `np.ndarray`
         """
         # Get initialization for some variables
         x_adv = initial_sample
         delta = initial_delta
         epsilon = initial_epsilon
+        clip_min, clip_max = self.classifier.clip_values
 
         # Main loop to wander around the boundary
         for _ in range(self.max_iter):
@@ -276,7 +272,7 @@ def _orthogonal_perturb(self, delta, current_sample, original_sample):
 
         return perturb
 
-    def _init_sample(self, x, y, y_p, clip_min, clip_max):
+    def _init_sample(self, x, y, y_p):
         """
         Find initial adversarial example for the attack.
 
@@ -286,12 +282,9 @@ def _init_sample(self, x, y, y_p, clip_min, clip_max):
         :type y: `int`
         :param y_p: The predicted label of x.
         :type y_p: `int`
-        :param clip_min: minimum value of x.
-        :type clip_min: `float`
-        :param clip_max: maximum value of x.
-        :type clip_max: `float`
         :return: an adversarial example.
         """
+        clip_min, clip_max = self.classifier.clip_values
         nprd = np.random.RandomState()
         initial_sample = None
 
@@ -348,7 +341,7 @@ def set_params(self, **kwargs):
         :type init_size: `int`
         """
         # Save attack-specific parameters
-        super(Boundary, self).set_params(**kwargs)
+        super(BoundaryAttack, self).set_params(**kwargs)
 
         if not isinstance(self.max_iter, (int, np.int)) or self.max_iter <= 0:
             raise ValueError("The number of iterations must be a positive integer.")

diff --git a/art/attacks/boundary_unittest.py b/art/attacks/boundary_unittest.py
@@ -24,7 +24,7 @@
 import numpy as np
 import tensorflow as tf
 
-from art.attacks import Boundary
+from art.attacks import BoundaryAttack
 from art.utils import load_mnist, random_targets, master_seed, get_classifier_tf, get_classifier_kr, get_classifier_pt
 
 logger = logging.getLogger('testLogger')
@@ -61,7 +61,7 @@ def test_tfclassifier(self):
         (_, _), (x_test, y_test) = self.mnist
 
         # First targeted attack
-        boundary = Boundary(classifier=tfc, targeted=True, max_iter=20)
+        boundary = BoundaryAttack(classifier=tfc, targeted=True, max_iter=20)
         params = {'y': random_targets(y_test, tfc.nb_classes)}
         x_test_adv = boundary.generate(x_test, **params)
 
@@ -74,7 +74,7 @@ def test_tfclassifier(self):
         self.assertTrue((target == y_pred_adv).any())
 
         # Second untargeted attack
-        boundary = Boundary(classifier=tfc, targeted=False, max_iter=20)
+        boundary = BoundaryAttack(classifier=tfc, targeted=False, max_iter=20)
         x_test_adv = boundary.generate(x_test)
 
         self.assertFalse((x_test == x_test_adv).all())
@@ -101,7 +101,7 @@ def test_krclassifier(self):
         (_, _), (x_test, y_test) = self.mnist
 
         # First targeted attack
-        boundary = Boundary(classifier=krc, targeted=True, max_iter=20)
+        boundary = BoundaryAttack(classifier=krc, targeted=True, max_iter=20)
         params = {'y': random_targets(y_test, krc.nb_classes)}
         x_test_adv = boundary.generate(x_test, **params)
 
@@ -114,7 +114,7 @@ def test_krclassifier(self):
         self.assertTrue((target == y_pred_adv).any())
 
         # Second untargeted attack
-        boundary = Boundary(classifier=krc, targeted=False, max_iter=20)
+        boundary = BoundaryAttack(classifier=krc, targeted=False, max_iter=20)
         x_test_adv = boundary.generate(x_test)
 
         self.assertFalse((x_test == x_test_adv).all())
@@ -141,7 +141,7 @@ def test_ptclassifier(self):
         x_test = np.swapaxes(x_test, 1, 3)
 
         # First targeted attack
-        boundary = Boundary(classifier=ptc, targeted=True, max_iter=20)
+        boundary = BoundaryAttack(classifier=ptc, targeted=True, max_iter=20)
         params = {'y': random_targets(y_test, ptc.nb_classes)}
         x_test_adv = boundary.generate(x_test, **params)
 
@@ -154,7 +154,7 @@ def test_ptclassifier(self):
         self.assertTrue((target == y_pred_adv).any())
 
         # Second untargeted attack
-        boundary = Boundary(classifier=ptc, targeted=False, max_iter=20)
+        boundary = BoundaryAttack(classifier=ptc, targeted=False, max_iter=20)
         x_test_adv = boundary.generate(x_test)
 
         self.assertFalse((x_test == x_test_adv).all())

diff --git a/art/attacks/carlini.py b/art/attacks/carlini.py
@@ -23,7 +23,7 @@
 
 from art import NUMPY_DTYPE
 from art.attacks.attack import Attack
-from art.utils import get_labels_np_array, tanh_to_original, original_to_tanh
+from art.utils import compute_success, get_labels_np_array, tanh_to_original, original_to_tanh
 
 logger = logging.getLogger(__name__)
 
@@ -371,13 +371,8 @@ def generate(self, x, **kwargs):
 
             x_adv[batch_index_1:batch_index_2] = best_x_adv_batch
 
-        adv_preds = np.argmax(self.classifier.predict(x_adv), axis=1)
-        if self.targeted:
-            rate = np.sum(adv_preds == np.argmax(y, axis=1)) / x_adv.shape[0]
-        else:
-            preds = np.argmax(self.classifier.predict(x), axis=1)
-            rate = np.sum(adv_preds != preds) / x_adv.shape[0]
-        logger.info('Success rate of C&W attack: %.2f%%', 100*rate)
+        logger.info('Success rate of C&W L_2 attack: %.2f%%',
+                    100 * compute_success(self.classifier, x, y, x_adv, self.targeted))
 
         return x_adv
 
@@ -699,13 +694,8 @@ def generate(self, x, **kwargs):
             x_adv_batch[~attack_success] = x_batch[~attack_success]
             x_adv[batch_index_1:batch_index_2] = x_adv_batch
 
-        adv_preds = np.argmax(self.classifier.predict(x_adv), axis=1)
-        if self.targeted:
-            rate = np.sum(adv_preds == np.argmax(y, axis=1)) / x_adv.shape[0]
-        else:
-            preds = np.argmax(self.classifier.predict(x), axis=1)
-            rate = np.sum(adv_preds != preds) / x_adv.shape[0]
-        logger.info('Success rate of C&W attack: %.2f%%', 100 * rate)
+        logger.info('Success rate of C&W L_inf attack: %.2f%%',
+                    100 * compute_success(self.classifier, x, y, x_adv, self.targeted))
 
         return x_adv