Memership Inference

Contents

4.1. Memership Inference#

from sklearn.metrics import roc_auc_score
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

from aijack.attack.membership import ShadowMembershipInferenceAttack
from aijack.utils.utils import TorchClassifier, NumpyDataset

np.random.seed(42)
torch.manual_seed(42)

df = pd.read_csv("/content/sample_data/mnist_train_small.csv", header=None)

X = df[range(1, 785)].values.astype("float64") / 255
y = df[0].values

# We use the train dataset to train the victim model. The attacker utilize shadow dataset to
# prepare membership inference attack. The test dataset is used to evaluate the result of attack.
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=1 / 3, random_state=42
)
X_train, X_shadow, y_train, y_shadow = train_test_split(
    X_train, y_train, test_size=1 / 2, random_state=42
)
# We simulate the situation where the distribution of training dataset is different from the test/shadow datasets.
X_test = 0.5 * X_test + 0.5 * np.random.normal(size=(X_test.shape))

4.1.1. sklearn#

# Train the victim

clf = SVC(probability=True)
clf.fit(X_train, y_train)
clf.score(X_train, y_train), clf.score(X_test, y_test)

(0.9848484848484849, 0.11054447277636119)

# Train the attacker

shadow_models = [SVC(probability=True) for _ in range(2)]
attack_models = [SVC(probability=True) for _ in range(10)]

attacker = ShadowMembershipInferenceAttack(clf, shadow_models, attack_models)
attacker.fit(X_shadow, y_shadow)

# Get the attack result of membership inference
in_result = attacker.predict(clf.predict_proba(X_train), y_train)
out_result = attacker.predict(clf.predict_proba(X_test), y_test)

in_label = np.ones(in_result.shape[0])
out_label = np.zeros(out_result.shape[0])

accuracy_score(
    np.concatenate([in_label, out_label]), np.concatenate([in_result, out_result])
)

0.9738243456086402

4.1.2. PyTorch#

class LM(nn.Module):
    def __init__(self):
        super(LM, self).__init__()
        self.lin1 = nn.Linear(28 * 28, 10)

    def forward(self, x):
        out = self.lin1(x)
        return out

# Train the victim
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

criterion = nn.CrossEntropyLoss()
net = LM().to(torch.double).to(device)
optimizer = optim.Adam(net.parameters(), lr=0.001)
# You need to wrap the torch module with TorchClassifier
clf = TorchClassifier(
    net, criterion, optimizer, batch_size=64, epoch=100, device=device
)

clf.fit(X_train, y_train)
clf.score(X_train, y_train), clf.score(X_test, y_test)

(0.974947494749475, 0.3077846107694615)

# Train the attacker


def create_clf():
    _net = LM().to(torch.double).to(device)
    _optimizer = optim.Adam(_net.parameters(), lr=0.001)
    return TorchClassifier(
        _net, criterion, _optimizer, batch_size=64, epoch=100, device=device
    )


shadow_models = [create_clf() for _ in range(2)]
attack_models = [SVC(probability=True) for _ in range(10)]

attacker = ShadowMembershipInferenceAttack(clf, shadow_models, attack_models)
attacker.fit(X_shadow, y_shadow)

# Get the attack result of membership inference
in_result = attacker.predict(clf.predict_proba(X_train), y_train)
out_result = attacker.predict(clf.predict_proba(X_test), y_test)

in_label = np.ones(in_result.shape[0])
out_label = np.zeros(out_result.shape[0])

accuracy_score(
    np.concatenate([in_label, out_label]), np.concatenate([in_result, out_result])
)

0.6735168379209481