Source code for aijack.attack.poison.poison_attack

import copy

import numpy as np
import sklearn

from ..base_attack import BaseAttacker

[docs]class Poison_attack_sklearn(BaseAttacker): """implementation of poison attack for sklearn binary classifier reference Args: target_model: sklean classifier X_train: training data for target_model y_train: training label for target_model t: step size Attributes: target_model: sklean classifier X_train: y_train: t: step size kernel delta_kernel """ def __init__(self, target_model, X_train, y_train, t=0.5): super().__init__(target_model) self.X_train = X_train self.y_train = y_train self.t = t self.kernel = None self.delta_kernel = None _ = self._detect_type_of_classifier() def _detect_type_of_classifier(self): """detect the type of classifier and prepare proper settings Returns: return true if no error occurs Raises: ValueError: if given kernel type is not supported. """ target_type = type(self.target_model) if target_type == sklearn.svm._classes.SVC: params = self.target_model.get_params() kernel_type = params["kernel"] if kernel_type == "linear": self.kernel = lambda xa, xb: self.delta_kernel = lambda xi, xc: self.t * xi else: raise ValueError(f"kernel type {kernel_type} is not supported") else: raise ValueError(f"target type {target_type} is not supported") return True def _delta_q(self, xi, xc, yi, yc): """Calculate deviation of q Q = yy.T * K denotes the label - annotated version of K, and α denotes the SVM’s dual variables corresponding to each training point. Args: xi: intermidiate results of the generation of adversarial example xc: initial attack point yi: the labels of intermidiate results of the generation of adversarial example yc: true label of initial attack point Returns: dq: """ d = xi.shape[1] yy = np.array([(yi * yc)] * d).T dq = yy * (self.delta_kernel(xi, xc)) return dq
[docs] def attack(self, xc, yc, X_valid, y_valid, num_iterations=200): """Create an adversarial example for poison attack Args: xc: initial attack point yc: true label of initial attack point X_valid: validation data for target_model y_valid: validation label for target_model num_iterations: (default = 200) Returns: xc: created adversarial example log: log of score of target_model under attack """ # flip the class label yc *= -1 log = [] X_train_poisoned = copy.copy(self.X_train) y_train_poisoned = copy.copy(self.y_train) # best_score = float("inf") # best_xc = None # best_itr = None for _ in range(num_iterations): target_model_ = copy.copy(self.target_model) target_model_.__init__() target_model_.set_params(**self.target_model.get_params()) # target_model_ = sklearn.svm.SVC(kernel="linear", C=1) # add poinsoned data np.concatenate([X_train_poisoned, xc.reshape(1, -1)]), np.concatenate([y_train_poisoned, [yc]]), ) score_temp = target_model_.score(X_valid, y_valid) log.append(score_temp) # if score_temp < best_score: # best_score = score_temp # best_xc = xc # best_itr = i # ------------------------ # xs = target_model_.support_vectors_ ys = np.concatenate([y_train_poisoned, [yc]])[target_model_.support_] Qks = y_valid.reshape(-1, 1).dot(ys.reshape(-1, 1).T) * self.kernel( X_valid, xs ) Qss_inv = np.linalg.inv(self.kernel(xs, xs)) v = zeta = Mk = (-1 / zeta) * ( (Qks).dot(zeta * Qss_inv - + y_valid.reshape(-1, 1).dot(v.reshape(1, -1)) ) delta_Qsc = self._delta_q(xs, xc.reshape, ys, yc) delta_Qkc = self._delta_q(X_valid, xc.reshape(1, -1), y_valid, yc) # α denotes the SVM’s dual variables corresponding to each # training point alpha = target_model_.decision_function([xc]) # the desired gradient used for optimizing our attack: delta_L = np.sum((( + delta_Qkc) * alpha), axis=0) # u is a norm-1 vector representing the attack direction, u = delta_L / np.sqrt(np.sum((delta_L**2))) # the attack point xc += self.t * u # print(f"initial score is {log[0]}") # print(f"best score is {best_score} in iteration {best_itr}") return xc, log