3.1. Split Learning and Label Leakage#
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from aijack.attack.labelleakage import NormAttackSplitNNManager
from aijack.collaborative.splitnn import SplitNNAPI, SplitNNClient
from aijack.utils import NumpyDataset
class FirstNet(nn.Module):
def __init__(self, train_features):
super(FirstNet, self).__init__()
self.L1 = nn.Linear(train_features.shape[-1], hidden_dim)
def forward(self, x):
x = self.L1(x)
x = nn.functional.relu(x)
return x
class SecondNet(nn.Module):
def __init__(self):
super(SecondNet, self).__init__()
self.L2 = nn.Linear(hidden_dim, 1)
def forward(self, x):
x = self.L2(x)
x = torch.sigmoid(x)
return x
3.1.1. Parameters and Pre-processing#
batch_size = 5
hidden_dim = 16
num_communication = 2
torch.manual_seed(10)
<torch._C.Generator at 0x7fbb0c497cf0>
raw_df = pd.read_csv(
"https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv"
)
raw_df_neg = raw_df[raw_df["Class"] == 0]
raw_df_pos = raw_df[raw_df["Class"] == 1]
down_df_neg = raw_df_neg # .sample(40000)
down_df = pd.concat([down_df_neg, raw_df_pos])
neg, pos = np.bincount(down_df["Class"])
total = neg + pos
print(
"Examples:\n Total: {}\n Positive: {} ({:.2f}% of total)\n".format(
total, pos, 100 * pos / total
)
)
cleaned_df = down_df.copy()
# You don't want the `Time` column.
cleaned_df.pop("Time")
# The `Amount` column covers a huge range. Convert to log-space.
eps = 0.001 # 0 => 0.1¢
cleaned_df["Log Ammount"] = np.log(cleaned_df.pop("Amount") + eps)
# Use a utility from sklearn to split and shuffle our dataset.
train_df, test_df = train_test_split(cleaned_df, test_size=0.2)
# Form np arrays of labels and features.
train_labels = np.array(train_df.pop("Class"))
train_features = np.array(train_df)
scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
train_features = np.clip(train_features, -5, 5)
train_dataset = NumpyDataset(
train_features, train_labels.astype(np.float64).reshape(-1, 1)
)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=batch_size, shuffle=True
)
Examples:
Total: 284807
Positive: 492 (0.17% of total)
3.1.2. Split Learning#
model_1 = FirstNet(train_features)
model_2 = SecondNet()
model_1.double()
model_2.double()
opt_1 = optim.Adam(model_1.parameters(), lr=1e-3)
opt_2 = optim.Adam(model_2.parameters(), lr=1e-3)
optimizers = [opt_1, opt_2]
client_1 = SplitNNClient(model_1, user_id=0)
client_2 = SplitNNClient(model_2, user_id=0)
clients = [client_1, client_2]
criterion = nn.BCELoss()
manager = NormAttackSplitNNManager(criterion, device="cpu")
NormAttackSplitNNAPI = manager.attach(SplitNNAPI)
normattacksplitnn = NormAttackSplitNNAPI(
clients, optimizers, train_loader, criterion, num_communication
)
normattacksplitnn.run()
3.1.3. Norm-based Label Leakage Attack#
train_leak_auc = normattacksplitnn.attack(train_loader)
print("Leau AUC is ", train_leak_auc)
Leau AUC is 0.9975959580734046