Split Learning and Label Leakage

Contents

3.1. Split Learning and Label Leakage#

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from aijack.attack.labelleakage import NormAttackSplitNNManager
from aijack.collaborative.splitnn import SplitNNAPI, SplitNNClient
from aijack.utils import NumpyDataset

class FirstNet(nn.Module):
    def __init__(self, train_features):
        super(FirstNet, self).__init__()
        self.L1 = nn.Linear(train_features.shape[-1], hidden_dim)

    def forward(self, x):
        x = self.L1(x)
        x = nn.functional.relu(x)
        return x


class SecondNet(nn.Module):
    def __init__(self):
        super(SecondNet, self).__init__()
        self.L2 = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = self.L2(x)
        x = torch.sigmoid(x)
        return x

3.1.1. Parameters and Pre-processing#

batch_size = 5
hidden_dim = 16
num_communication = 2
torch.manual_seed(10)

<torch._C.Generator at 0x7fbb0c497cf0>

raw_df = pd.read_csv(
    "https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv"
)
raw_df_neg = raw_df[raw_df["Class"] == 0]
raw_df_pos = raw_df[raw_df["Class"] == 1]

down_df_neg = raw_df_neg  # .sample(40000)
down_df = pd.concat([down_df_neg, raw_df_pos])

neg, pos = np.bincount(down_df["Class"])
total = neg + pos
print(
    "Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n".format(
        total, pos, 100 * pos / total
    )
)

cleaned_df = down_df.copy()
# You don't want the `Time` column.
cleaned_df.pop("Time")
# The `Amount` column covers a huge range. Convert to log-space.
eps = 0.001  # 0 => 0.1¢
cleaned_df["Log Ammount"] = np.log(cleaned_df.pop("Amount") + eps)

# Use a utility from sklearn to split and shuffle our dataset.
train_df, test_df = train_test_split(cleaned_df, test_size=0.2)

# Form np arrays of labels and features.
train_labels = np.array(train_df.pop("Class"))

train_features = np.array(train_df)

scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
train_features = np.clip(train_features, -5, 5)

train_dataset = NumpyDataset(
    train_features, train_labels.astype(np.float64).reshape(-1, 1)
)
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True
)

Examples:
    Total: 284807
    Positive: 492 (0.17% of total)

3.1.2. Split Learning#

model_1 = FirstNet(train_features)
model_2 = SecondNet()
model_1.double()
model_2.double()
opt_1 = optim.Adam(model_1.parameters(), lr=1e-3)
opt_2 = optim.Adam(model_2.parameters(), lr=1e-3)
optimizers = [opt_1, opt_2]
client_1 = SplitNNClient(model_1, user_id=0)
client_2 = SplitNNClient(model_2, user_id=0)
clients = [client_1, client_2]
criterion = nn.BCELoss()

manager = NormAttackSplitNNManager(criterion, device="cpu")
NormAttackSplitNNAPI = manager.attach(SplitNNAPI)
normattacksplitnn = NormAttackSplitNNAPI(
    clients, optimizers, train_loader, criterion, num_communication
)

normattacksplitnn.run()

3.1.3. Norm-based Label Leakage Attack#

train_leak_auc = normattacksplitnn.attack(train_loader)
print("Leau AUC is ", train_leak_auc)

Leau AUC is  0.9975959580734046