forked from Networks-Learning/prediction-powered-ranking
-
Notifications
You must be signed in to change notification settings - Fork 0
/
estimate.py
95 lines (78 loc) · 4.48 KB
/
estimate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import numpy as np
def sample_avg(k, Dn):
"""
Calculates the sample average win probability for each model
Parameters
----------
k : int
number of models
Dn : dictionary
Same structure as returned by data_process.summarized()
Keys: 'winner_predicted', numpy.ndarray indicating the winner of each pairwise comparison (1:model_a won, 0:model_b won, 0.5:tie)
'model_a_matrix', matrix where each column is a one-hot vector corresponding to each sample indicating which model was model_a
'model_b_matrix', matrix where each column is a one-hot vector corresponding to each sample indicating which model was model_b
Returns
-------
a : list of floats
sample average win probability of each model from the samples in dataset Dn
"""
n = np.shape(Dn['model_a_matrix'])[1]
M1, M2 = Dn['model_a_matrix'], Dn['model_b_matrix']
w = Dn['winner_predicted']
a = np.matmul(np.ones((k,1)), np.transpose(np.matmul(M1+M2,np.ones((n,1)))))*np.identity(k)
for i in range(k):
a[i][i] = 1/a[i][i]
a = np.matmul(a,np.matmul(M1,w) + np.matmul(M2,np.ones((n,1)) - w))
return a
def estimate(k,Dn,DN):
"""
Calculates prediction-powered estimate and sample covariance of win probability for each model
Parameters
----------
k : int
number of models
Dn : dictionary
Summarized dataset of human and llm pairwise comparisons
Same structure as returned by data_process.summarized() including human winner
Keys: 'winner_predicted', numpy.ndarray indicating the winner of each pairwise comparison (1:model_a won, 0:model_b won, 0.5:tie)
'model_a_matrix', numpy.ndarray where each column is a one-hot vector corresponding to each sample indicating which model was model_a
'model_b_matrix', numpy.ndarray where each column is a one-hot vector corresponding to each sample indicating which model was model_b
'winner_human', numpy.ndarray indicating the winner of each pairwise comparison in human_dataset (1:model_a won, 0:model_b won, 0.5:tie)
DN : dictionary
Summarized dataset of llm pairwise comparisons
Same structure as returned by data_process.summarized()
Keys: 'winner_predicted', numpy.ndarray indicating the winner of each pairwise comparison (1:model_a won, 0:model_b won, 0.5:tie)
'model_a_matrix', numpy.ndarray where each column is a one-hot vector corresponding to each sample indicating which model was model_a
'model_b_matrix', numpy.ndarray where each column is a one-hot vector corresponding to each sample indicating which model was model_b
DESCRIPTION.
Returns
-------
thetahat : numpy.ndarray
prediction-powered estimate of win probability of each model from samples in datasets Dn and DN
Sigma : numpy.ndarray, size k times k
sample covariance of thetahat
"""
n = np.shape(Dn['model_a_matrix'])[1]
N = np.shape(DN['model_a_matrix'])[1]
M1N, M2N = DN['model_a_matrix'], DN['model_b_matrix']
wfN = DN['winner_predicted']
a = np.matmul(np.ones((k,1)), np.transpose(np.matmul(M1N+M2N, np.ones((N,1)))))*np.identity(k)
for i in range(k):
assert a[i][i]>0,'There must be at least one comparison for each model in dataset Dn (human data)'
a[i][i] = 1/a[i][i]
a = np.matmul(a, np.matmul(M1N,wfN) + np.matmul(M2N, np.ones((N,1))-wfN))
M1n, M2n = Dn['model_a_matrix'], Dn['model_b_matrix']
assert 'winner_human' in Dn, 'dataset Dn must include "winner_human"'
wn, wfn = Dn['winner_human'], Dn['winner_predicted']
b = np.matmul(np.ones((k,1)), np.transpose(np.matmul(M1n+M2n, np.ones((n,1)))))*np.identity(k)
for i in range(k):
assert b[i][i]>0, 'There must be at least one comparison for each model in dataset DN (llm data)'
b[i][i] = 1/b[i][i]
b = np.matmul(b,np.matmul(M1n, wfn-wn) + np.matmul(M2n, wn-wfn))
A = np.matmul(np.ones((k,1)), np.transpose(wfN - np.matmul(np.transpose(M1N) ,a)))*M1N
A += np.matmul(np.ones((k,1)), np.transpose(np.ones((N,1)) - wfN - np.matmul(np.transpose(M2N), a)))*M2N
B = np.matmul(np.ones((k,1)), np.transpose(wfn-wn - np.matmul(np.transpose(M1n), b)))*M1n
B += np.matmul(np.ones((k,1)), np.transpose(wn-wfn - np.matmul(np.transpose(M2n), b)))*M2n
thetahat = a - b
Sigma = (np.matmul(A, np.transpose(A)))/(N**2) + (np.matmul(B, np.transpose(B)))/(n**2)
return thetahat, Sigma