Skip to content

Commit

Permalink
Merge changes for version 0.8.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Irina Nicolae authored and Irina Nicolae committed Apr 30, 2019
2 parents c40e632 + 0e0b7bf commit 403e67c
Show file tree
Hide file tree
Showing 57 changed files with 2,003 additions and 780 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,6 @@ demo/pics/*
*.npz
*.ipynb
.DS_Store

# Exceptions for notebooks/
!notebooks/*.ipynb
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ The library contains implementations of the following **evasion attacks**:
* Spatial transformations attack ([Engstrom et al., 2017](https://arxiv.org/abs/1712.02779))
* Query-efficient black-box attack ([Ilyas et al., 2017](https://arxiv.org/abs/1712.07113))
* Zeroth-order optimization attack ([Chen et al., 2017](https://arxiv.org/abs/1708.03999))
* Decision-based attack ([Brendel et al., 2018](https://arxiv.org/abs/1712.04248))
* Adversarial patch ([Brown et al., 2017](https://arxiv.org/abs/1712.09665))

The following **defence** methods are also supported:
* Feature squeezing ([Xu et al., 2017](http://arxiv.org/abs/1704.01155))
Expand Down
8 changes: 4 additions & 4 deletions art/attacks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
"""
Module providing adversarial attacks under a common interface.
"""
from art.attacks.adversarial_patch import AdversarialPatch
from art.attacks.attack import Attack
from art.attacks.boundary import BoundaryAttack
from art.attacks.carlini import CarliniL2Method, CarliniLInfMethod
from art.attacks.deepfool import DeepFool
from art.attacks.elastic_net import ElasticNet
from art.attacks.fast_gradient import FastGradientMethod
from art.attacks.iterative_method import BasicIterativeMethod
from art.attacks.newtonfool import NewtonFool
from art.attacks.projected_gradient_descent import ProjectedGradientDescent
from art.attacks.saliency_map import SaliencyMapMethod
from art.attacks.spatial_transformation import SpatialTransformation
from art.attacks.universal_perturbation import UniversalPerturbation
from art.attacks.virtual_adversarial import VirtualAdversarialMethod
from art.attacks.elastic_net import ElasticNet
from art.attacks.spatial_transformation import SpatialTransformation
from art.attacks.boundary import Boundary
from art.attacks.zoo import ZooAttack

351 changes: 351 additions & 0 deletions art/attacks/adversarial_patch.py

Large diffs are not rendered by default.

124 changes: 124 additions & 0 deletions art/attacks/adversarial_patch_unittest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# MIT License
#
# Copyright (C) IBM Corporation 2018
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from __future__ import absolute_import, division, print_function, unicode_literals

import logging
import unittest

import keras.backend as k
import numpy as np
import tensorflow as tf

from art.attacks import AdversarialPatch
from art.utils import load_mnist, master_seed, get_classifier_tf, get_classifier_kr, get_classifier_pt

logger = logging.getLogger('testLogger')

BATCH_SIZE = 10
NB_TRAIN = 10
NB_TEST = 10


class TestAdversarialPatch(unittest.TestCase):
"""
A unittest class for testing Adversarial Patch attack.
"""

@classmethod
def setUpClass(cls):
# Get MNIST
(x_train, y_train), (x_test, y_test), _, _ = load_mnist()
x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]
cls.mnist = (x_train, y_train), (x_test, y_test)

def setUp(self):
# Set master seed
master_seed(1234)

def test_tfclassifier(self):
"""
First test with the TFClassifier.
:return:
"""
# Build TFClassifier
tfc, sess = get_classifier_tf()

# Get MNIST
(x_train, _), (_, _) = self.mnist

# Attack
attack_params = {"rotation_max": 22.5, "scale_min": 0.1, "scale_max": 1.0,
"learning_rate": 5.0, "number_of_steps": 5, "patch_shape": (28, 28, 1), "batch_size": 10}
attack_ap = AdversarialPatch(tfc)
patch_adv, _ = attack_ap.generate(x_train, **attack_params)

self.assertTrue(patch_adv[8, 8, 0] - (-3.1106631027725005) < 0.01)
self.assertTrue(patch_adv[14, 14, 0] - 18.954278294246386 < 0.01)
self.assertTrue(np.sum(patch_adv) - 794.2447019737851 < 0.01)

sess.close()
tf.reset_default_graph()

def test_krclassifier(self):
"""
Second test with the KerasClassifier.
:return:
"""
# Build KerasClassifier
krc, _ = get_classifier_kr()

# Get MNIST
(x_train, _), (_, _) = self.mnist

# Attack
attack_params = {"rotation_max": 22.5, "scale_min": 0.1, "scale_max": 1.0,
"learning_rate": 5.0, "number_of_steps": 5, "patch_shape": (28, 28, 1), "batch_size": 10}
attack_ap = AdversarialPatch(krc)
patch_adv, _ = attack_ap.generate(x_train, **attack_params)

self.assertTrue(patch_adv[8, 8, 0] - (-3.2501425017774923) < 0.01)
self.assertTrue(patch_adv[14, 14, 0] - 20.48400094881169 < 0.01)
self.assertTrue(np.sum(patch_adv) - 1764.7681744376168 < 0.01)

k.clear_session()

def test_ptclassifier(self):
"""
Third test with the PyTorchClassifier.
:return:
"""
# Build PyTorchClassifier
ptc = get_classifier_pt()

# Get MNIST
(x_train, _), (_, _) = self.mnist
x_train = np.swapaxes(x_train, 1, 3)

# Attack
attack_params = {"rotation_max": 22.5, "scale_min": 0.1, "scale_max": 1.0,
"learning_rate": 5.0, "number_of_steps": 5, "patch_shape": (1, 28, 28), "batch_size": 10}
attack_ap = AdversarialPatch(ptc)
patch_adv, _ = attack_ap.generate(x_train, **attack_params)

self.assertTrue(patch_adv[0, 8, 8] - (-3.1423605902784875) < 0.01)
self.assertTrue(patch_adv[0, 14, 14] - 19.790434152473054 < 0.01)
self.assertTrue(np.sum(patch_adv) - 383.5670772794207 < 0.01)

if __name__ == '__main__':
unittest.main()
39 changes: 16 additions & 23 deletions art/attacks/boundary.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
logger = logging.getLogger(__name__)


class Boundary(Attack):
class BoundaryAttack(Attack):
"""
Implementation of the boundary attack from Wieland Brendel et al. (2018).
Paper link: https://arxiv.org/abs/1712.04248
Expand All @@ -37,7 +37,7 @@ class Boundary(Attack):
def __init__(self, classifier, targeted=True, delta=0.01, epsilon=0.01, step_adapt=0.9, max_iter=100,
sample_size=20, init_size=100):
"""
Create a Boundary attack instance.
Create a boundary attack instance.
:param classifier: A trained model.
:type classifier: :class:`.Classifier`
Expand All @@ -56,7 +56,7 @@ def __init__(self, classifier, targeted=True, delta=0.01, epsilon=0.01, step_ada
:param init_size: Maximum number of trials for initial generation of adversarial examples.
:type init_size: `int`
"""
super(Boundary, self).__init__(classifier=classifier)
super(BoundaryAttack, self).__init__(classifier=classifier)
params = {'targeted': targeted,
'delta': delta,
'epsilon': epsilon,
Expand Down Expand Up @@ -116,41 +116,39 @@ def generate(self, x, **kwargs):

x_adv[ind] = x_

preds_adv = np.argmax(self.classifier.predict(x_adv), axis=1)
logger.info('Success rate of Boundary attack: %.2f%%', (np.sum(preds != preds_adv) / x.shape[0]))
logger.info('Success rate of Boundary attack: %.2f%%',
(np.sum(preds != np.argmax(self.classifier.predict(x_adv), axis=1)) / x.shape[0]))

return x_adv

def _perturb(self, x, y, y_p):
"""
Internal attack function for 1 example.
Internal attack function for one example.
:param x: An array with 1 original input to be attacked.
:param x: An array with one original input to be attacked.
:type x: `np.ndarray`
:param y: If `self.targeted` is true, then `y` represents the target label.
:type y: `int`
:param y_p: The predicted label of x.
:type y_p: `int`
:return: an adversarial example.
"""
clip_min, clip_max = self.classifier.clip_values

# First, create an initial adversarial sample
initial_sample = self._init_sample(x, y, y_p, clip_min, clip_max)
initial_sample = self._init_sample(x, y, y_p)

# If an initial adversarial example is not found, then return the original image
if initial_sample is None:
return x

# If an initial adversarial example found, then go with boundary attack
if self.targeted:
x_adv = self._attack(initial_sample, x, y, self.delta, self.epsilon, clip_min, clip_max)
x_adv = self._attack(initial_sample, x, y, self.delta, self.epsilon)
else:
x_adv = self._attack(initial_sample, x, y_p, self.delta, self.epsilon, clip_min, clip_max)
x_adv = self._attack(initial_sample, x, y_p, self.delta, self.epsilon)

return x_adv

def _attack(self, initial_sample, original_sample, target, initial_delta, initial_epsilon, clip_min, clip_max):
def _attack(self, initial_sample, original_sample, target, initial_delta, initial_epsilon):
"""
Main function for the boundary attack.
Expand All @@ -165,16 +163,14 @@ def _attack(self, initial_sample, original_sample, target, initial_delta, initia
:type initial_delta: `float`
:param initial_epsilon: Initial step size for the step towards the target.
:type initial_epsilon: `float`
:param clip_min: minimum value of x.
:type clip_min: `float`
:param clip_max: maximum value of x.
:type clip_max: `float`
:return: an adversarial example.
:rtype: `np.ndarray`
"""
# Get initialization for some variables
x_adv = initial_sample
delta = initial_delta
epsilon = initial_epsilon
clip_min, clip_max = self.classifier.clip_values

# Main loop to wander around the boundary
for _ in range(self.max_iter):
Expand Down Expand Up @@ -276,7 +272,7 @@ def _orthogonal_perturb(self, delta, current_sample, original_sample):

return perturb

def _init_sample(self, x, y, y_p, clip_min, clip_max):
def _init_sample(self, x, y, y_p):
"""
Find initial adversarial example for the attack.
Expand All @@ -286,12 +282,9 @@ def _init_sample(self, x, y, y_p, clip_min, clip_max):
:type y: `int`
:param y_p: The predicted label of x.
:type y_p: `int`
:param clip_min: minimum value of x.
:type clip_min: `float`
:param clip_max: maximum value of x.
:type clip_max: `float`
:return: an adversarial example.
"""
clip_min, clip_max = self.classifier.clip_values
nprd = np.random.RandomState()
initial_sample = None

Expand Down Expand Up @@ -348,7 +341,7 @@ def set_params(self, **kwargs):
:type init_size: `int`
"""
# Save attack-specific parameters
super(Boundary, self).set_params(**kwargs)
super(BoundaryAttack, self).set_params(**kwargs)

if not isinstance(self.max_iter, (int, np.int)) or self.max_iter <= 0:
raise ValueError("The number of iterations must be a positive integer.")
Expand Down
14 changes: 7 additions & 7 deletions art/attacks/boundary_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import numpy as np
import tensorflow as tf

from art.attacks import Boundary
from art.attacks import BoundaryAttack
from art.utils import load_mnist, random_targets, master_seed, get_classifier_tf, get_classifier_kr, get_classifier_pt

logger = logging.getLogger('testLogger')
Expand Down Expand Up @@ -61,7 +61,7 @@ def test_tfclassifier(self):
(_, _), (x_test, y_test) = self.mnist

# First targeted attack
boundary = Boundary(classifier=tfc, targeted=True, max_iter=20)
boundary = BoundaryAttack(classifier=tfc, targeted=True, max_iter=20)
params = {'y': random_targets(y_test, tfc.nb_classes)}
x_test_adv = boundary.generate(x_test, **params)

Expand All @@ -74,7 +74,7 @@ def test_tfclassifier(self):
self.assertTrue((target == y_pred_adv).any())

# Second untargeted attack
boundary = Boundary(classifier=tfc, targeted=False, max_iter=20)
boundary = BoundaryAttack(classifier=tfc, targeted=False, max_iter=20)
x_test_adv = boundary.generate(x_test)

self.assertFalse((x_test == x_test_adv).all())
Expand All @@ -101,7 +101,7 @@ def test_krclassifier(self):
(_, _), (x_test, y_test) = self.mnist

# First targeted attack
boundary = Boundary(classifier=krc, targeted=True, max_iter=20)
boundary = BoundaryAttack(classifier=krc, targeted=True, max_iter=20)
params = {'y': random_targets(y_test, krc.nb_classes)}
x_test_adv = boundary.generate(x_test, **params)

Expand All @@ -114,7 +114,7 @@ def test_krclassifier(self):
self.assertTrue((target == y_pred_adv).any())

# Second untargeted attack
boundary = Boundary(classifier=krc, targeted=False, max_iter=20)
boundary = BoundaryAttack(classifier=krc, targeted=False, max_iter=20)
x_test_adv = boundary.generate(x_test)

self.assertFalse((x_test == x_test_adv).all())
Expand All @@ -141,7 +141,7 @@ def test_ptclassifier(self):
x_test = np.swapaxes(x_test, 1, 3)

# First targeted attack
boundary = Boundary(classifier=ptc, targeted=True, max_iter=20)
boundary = BoundaryAttack(classifier=ptc, targeted=True, max_iter=20)
params = {'y': random_targets(y_test, ptc.nb_classes)}
x_test_adv = boundary.generate(x_test, **params)

Expand All @@ -154,7 +154,7 @@ def test_ptclassifier(self):
self.assertTrue((target == y_pred_adv).any())

# Second untargeted attack
boundary = Boundary(classifier=ptc, targeted=False, max_iter=20)
boundary = BoundaryAttack(classifier=ptc, targeted=False, max_iter=20)
x_test_adv = boundary.generate(x_test)

self.assertFalse((x_test == x_test_adv).all())
Expand Down
20 changes: 5 additions & 15 deletions art/attacks/carlini.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from art import NUMPY_DTYPE
from art.attacks.attack import Attack
from art.utils import get_labels_np_array, tanh_to_original, original_to_tanh
from art.utils import compute_success, get_labels_np_array, tanh_to_original, original_to_tanh

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -371,13 +371,8 @@ def generate(self, x, **kwargs):

x_adv[batch_index_1:batch_index_2] = best_x_adv_batch

adv_preds = np.argmax(self.classifier.predict(x_adv), axis=1)
if self.targeted:
rate = np.sum(adv_preds == np.argmax(y, axis=1)) / x_adv.shape[0]
else:
preds = np.argmax(self.classifier.predict(x), axis=1)
rate = np.sum(adv_preds != preds) / x_adv.shape[0]
logger.info('Success rate of C&W attack: %.2f%%', 100*rate)
logger.info('Success rate of C&W L_2 attack: %.2f%%',
100 * compute_success(self.classifier, x, y, x_adv, self.targeted))

return x_adv

Expand Down Expand Up @@ -699,13 +694,8 @@ def generate(self, x, **kwargs):
x_adv_batch[~attack_success] = x_batch[~attack_success]
x_adv[batch_index_1:batch_index_2] = x_adv_batch

adv_preds = np.argmax(self.classifier.predict(x_adv), axis=1)
if self.targeted:
rate = np.sum(adv_preds == np.argmax(y, axis=1)) / x_adv.shape[0]
else:
preds = np.argmax(self.classifier.predict(x), axis=1)
rate = np.sum(adv_preds != preds) / x_adv.shape[0]
logger.info('Success rate of C&W attack: %.2f%%', 100 * rate)
logger.info('Success rate of C&W L_inf attack: %.2f%%',
100 * compute_success(self.classifier, x, y, x_adv, self.targeted))

return x_adv

Expand Down
Loading

0 comments on commit 403e67c

Please sign in to comment.