1.5. SecureBoost: Vertically Federated XGBoost with Paillier Encryption#
original paper: https://arxiv.org/abs/1901.08755
SecureBoost allows multiple parties to train the same model as normal XGBoost without disclosing their local datasets.
from aijack.defense.paillier import PaillierKeyGenerator
from aijack.collaborative.tree import (
SecureBoostClassifierAPI,
SecureBoostClient,
XGBoostClassifierAPI,
XGBoostClient,
)
1.5.1. Paillier Encryption#
keygenerator = PaillierKeyGenerator(512)
pk, sk = keygenerator.generate_keypair()
pk.get_publickeyvalues()
('75242327359052187572169010861692481547454348330868546066717438408853157480428647976190531420312698828694087675121469049411809985056891481101471011077945157641578304180814676083157925906917435382632123247869197335370689139712763870678635308347455305720914931243235831753850053494946430764740457718377179707311',
'75242327359052187572169010861692481547454348330868546066717438408853157480428647976190531420312698828694087675121469049411809985056891481101471011077945157641578304180814676083157925906917435382632123247869197335370689139712763870678635308347455305720914931243235831753850053494946430764740457718377179707312')
ct_1 = pk.encrypt(13)
print("ct_1 = ", sk.decrypt2int(ct_1))
ct_2 = ct_1 * 2
print("ct_2 = ", sk.decrypt2int(ct_2))
ct_3 = ct_1 + 5.6
print("ct_3 = ", sk.decrypt2float(ct_3))
print("ct_1 + ct_3 = ", sk.decrypt2float(ct_1 + ct_3))
ct_1 = 13
ct_2 = 26
ct_3 = 18.600000381469727
ct_1 + ct_3 = 31.600000381469727
1.5.2. SecureBoost#
min_leaf = 1
depth = 3
learning_rate = 0.4
boosting_rounds = 2
lam = 1.0
gamma = 0.0
eps = 1.0
min_child_weight = -1 * float("inf")
subsample_cols = 1.0
# dummy data
x1 = [12, 32, 15, 24, 20, 25, 17, 16]
x1 = [[x] for x in x1]
x2 = [1, 1, 0, 0, 1, 1, 0, 1]
x2 = [[x] for x in x2]
y = [1, 0, 1, 0, 1, 1, 0, 1]
x3 = [[12, 1], [32, 1], [15, 0], [24, 0], [20, 1], [25, 1], [17, 0], [16, 1]]
p1 = XGBoostClient(x1, 2, [0], 0, min_leaf, subsample_cols, 256, False, 0)
p2 = XGBoostClient(x2, 2, [1], 1, min_leaf, subsample_cols, 256, False, 0)
parties = [p1, p2]
clf = XGBoostClassifierAPI(
2,
subsample_cols,
min_child_weight,
depth,
min_leaf,
learning_rate,
boosting_rounds,
lam,
gamma,
eps,
-1,
0,
1.0,
1,
True,
)
clf.fit(parties, y)
clf.predict_proba(x3)
[[0.20040041208267212, 0.7995995879173279],
[0.3700332045555115, 0.6299667954444885],
[0.20040041208267212, 0.7995995879173279],
[0.44300776720046997, 0.55699223279953],
[0.3700332045555115, 0.6299667954444885],
[0.3700332045555115, 0.6299667954444885],
[0.44300776720046997, 0.55699223279953],
[0.20040041208267212, 0.7995995879173279]]
keygenerator = PaillierKeyGenerator(512)
pk, sk = keygenerator.generate_keypair()
sp1 = SecureBoostClient(x1, 2, [0], 0, min_leaf, subsample_cols, 256, False, 0)
sp2 = SecureBoostClient(x2, 2, [1], 1, min_leaf, subsample_cols, 256, False, 0)
sparties = [sp1, sp2]
sparties[0].set_publickey(pk)
sparties[1].set_publickey(pk)
sparties[0].set_secretkey(sk)
sclf = SecureBoostClassifierAPI(
2,
subsample_cols,
min_child_weight,
depth,
min_leaf,
learning_rate,
boosting_rounds,
lam,
gamma,
eps,
0,
0,
1.0,
1,
True,
)
sclf.fit(sparties, y)
sclf.predict_proba(x3)
[[0.20040041208267212, 0.7995995879173279],
[0.3700332045555115, 0.6299667954444885],
[0.20040041208267212, 0.7995995879173279],
[0.44300776720046997, 0.55699223279953],
[0.3700332045555115, 0.6299667954444885],
[0.3700332045555115, 0.6299667954444885],
[0.44300776720046997, 0.55699223279953],
[0.20040041208267212, 0.7995995879173279]]