-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathuTestv1.R
62 lines (56 loc) · 2.61 KB
/
uTestv1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# Performs Mann-Whitney U-tests
# Results: input data. Name: name to attach to csv.
# Shift: The step between unfairness weights being compared.
# Column: which feature is being tested
doUTest <- function(results, name, shift, column) {
# create cutoff for loop based on shift. 9000 comes from data points per metric
cutoff = 9000-(100*(shift))
# create empty data frame to store results
all_results <- data.frame("model"="",
"unfairness_metric"="",
"unfairness_weight_one"="",
"unfairness_weight_two"="",
"statistic"="",
"p-value"=""
)
datalist = list()
for(n in seq(1,cutoff,100)){
# define indices
i_one = n
i_two = n+99
i_three = n+100 * shift
i_four = n + 100 * shift + 99
model_first = results[i_one, "model"]
metric_first = results[i_one, "unfairness_metric"]
model_second = results[i_three, "model"]
metric_second = results[i_three, "unfairness_metric"]
# only do comparison if the model and metric match up
if(model_first == model_second && metric_first == metric_second){
weight_first = results[i_one, "unfairness_weight"]
weight_second = results[i_three, "unfairness_weight"]
first_column = results[i_one:i_two, column]
second_column = results[i_three:i_four, column]
test_results <- wilcox.test(first_column, second_column)
temp_df = data.frame("model"=model_first,
"unfairness_metric"=metric_first,
"unfairness_weight_one"=weight_first,
"unfairness_weight_two"=weight_second,
"statistic"=test_results$statistic,
"p-value"=test_results$p.value
)
datalist[[n]] <- temp_df
}
}
all_results = do.call(rbind, datalist)
write.csv(all_results,paste("/Users/clarabelitz/Documents/git/illinois/fairfs/utest_", shift,"_protected_feature_", name, ".csv", sep=""), row.names = FALSE)
}
# read data.
sim_data = read.csv('fairfs_results_simulated_data.csv')
sp_math_data = read.csv('fairfs_results_uci_student_performance_math.csv')
sp_port_data = read.csv('fairfs_results_uci_student_performance_portuguese.csv')
sa_data = read.csv('fairfs_results_uci_student_academics.csv')
adult_data = read.csv('fairfs_results_uci_adult.csv')
# perform U tests for all steps between 1 and 4, inclusive. Edit to adjust which data is used. Works for 'unfairness' and 'protected_column_selected_prop'
for(n in 1:4) {
doUTest(adult_data, "adult_data", n, "protected_column_selected_prop")
}