1.5. SecureBoost: Vertically Federated XGBoost with Paillier Encryption#

original paper: https://arxiv.org/abs/1901.08755

SecureBoost allows multiple parties to train the same model as normal XGBoost without disclosing their local datasets.

from aijack.defense.paillier import PaillierKeyGenerator
from aijack.collaborative.tree import (
    SecureBoostClassifierAPI,
    SecureBoostClient,
    XGBoostClassifierAPI,
    XGBoostClient,
)

1.5.1. Paillier Encryption#

keygenerator = PaillierKeyGenerator(512)
pk, sk = keygenerator.generate_keypair()

pk.get_publickeyvalues()
('75242327359052187572169010861692481547454348330868546066717438408853157480428647976190531420312698828694087675121469049411809985056891481101471011077945157641578304180814676083157925906917435382632123247869197335370689139712763870678635308347455305720914931243235831753850053494946430764740457718377179707311',
 '75242327359052187572169010861692481547454348330868546066717438408853157480428647976190531420312698828694087675121469049411809985056891481101471011077945157641578304180814676083157925906917435382632123247869197335370689139712763870678635308347455305720914931243235831753850053494946430764740457718377179707312')
ct_1 = pk.encrypt(13)
print("ct_1 = ", sk.decrypt2int(ct_1))

ct_2 = ct_1 * 2
print("ct_2 = ", sk.decrypt2int(ct_2))

ct_3 = ct_1 + 5.6
print("ct_3 = ", sk.decrypt2float(ct_3))

print("ct_1 + ct_3 = ", sk.decrypt2float(ct_1 + ct_3))
ct_1 =  13
ct_2 =  26
ct_3 =  18.600000381469727
ct_1 + ct_3 =  31.600000381469727

1.5.2. SecureBoost#

min_leaf = 1
depth = 3
learning_rate = 0.4
boosting_rounds = 2
lam = 1.0
gamma = 0.0
eps = 1.0
min_child_weight = -1 * float("inf")
subsample_cols = 1.0

# dummy data

x1 = [12, 32, 15, 24, 20, 25, 17, 16]
x1 = [[x] for x in x1]
x2 = [1, 1, 0, 0, 1, 1, 0, 1]
x2 = [[x] for x in x2]
y = [1, 0, 1, 0, 1, 1, 0, 1]

x3 = [[12, 1], [32, 1], [15, 0], [24, 0], [20, 1], [25, 1], [17, 0], [16, 1]]
p1 = XGBoostClient(x1, 2, [0], 0, min_leaf, subsample_cols, 256, False, 0)
p2 = XGBoostClient(x2, 2, [1], 1, min_leaf, subsample_cols, 256, False, 0)
parties = [p1, p2]

clf = XGBoostClassifierAPI(
    2,
    subsample_cols,
    min_child_weight,
    depth,
    min_leaf,
    learning_rate,
    boosting_rounds,
    lam,
    gamma,
    eps,
    -1,
    0,
    1.0,
    1,
    True,
)
clf.fit(parties, y)
clf.predict_proba(x3)
[[0.20040041208267212, 0.7995995879173279],
 [0.3700332045555115, 0.6299667954444885],
 [0.20040041208267212, 0.7995995879173279],
 [0.44300776720046997, 0.55699223279953],
 [0.3700332045555115, 0.6299667954444885],
 [0.3700332045555115, 0.6299667954444885],
 [0.44300776720046997, 0.55699223279953],
 [0.20040041208267212, 0.7995995879173279]]
keygenerator = PaillierKeyGenerator(512)
pk, sk = keygenerator.generate_keypair()

sp1 = SecureBoostClient(x1, 2, [0], 0, min_leaf, subsample_cols, 256, False, 0)
sp2 = SecureBoostClient(x2, 2, [1], 1, min_leaf, subsample_cols, 256, False, 0)
sparties = [sp1, sp2]

sparties[0].set_publickey(pk)
sparties[1].set_publickey(pk)
sparties[0].set_secretkey(sk)

sclf = SecureBoostClassifierAPI(
    2,
    subsample_cols,
    min_child_weight,
    depth,
    min_leaf,
    learning_rate,
    boosting_rounds,
    lam,
    gamma,
    eps,
    0,
    0,
    1.0,
    1,
    True,
)
sclf.fit(sparties, y)
sclf.predict_proba(x3)
[[0.20040041208267212, 0.7995995879173279],
 [0.3700332045555115, 0.6299667954444885],
 [0.20040041208267212, 0.7995995879173279],
 [0.44300776720046997, 0.55699223279953],
 [0.3700332045555115, 0.6299667954444885],
 [0.3700332045555115, 0.6299667954444885],
 [0.44300776720046997, 0.55699223279953],
 [0.20040041208267212, 0.7995995879173279]]