dvreed77
2/19/2013 - 5:18 AM

The effect of using uniform priors with unbalanced classes.

The effect of using uniform priors with unbalanced classes.

N1 = 3500; N2 = 1500;
data = [randn(N1, 1) ; 1 + randn(N2, 1)];
labels = [zeros(N1, 1) ; ones(N2, 1)];

for ii = 1:200
    cv(ii) = cvpartition(labels, 'Holdout', 0.2);
end

b_pcc = zeros(1, cv.NumTestSets);
for ii = 1:length(cv)
    train_idx = cv(ii).training;
    test_idx = cv(ii).test;    
    
    y_pred = classify(data(test_idx, :), data(train_idx, :), labels(train_idx));
    
    TP = sum(y_pred == 1 & labels(test_idx) == 1);
    TN = sum(y_pred == 0 & labels(test_idx) == 0);
    FP = sum(y_pred == 1 & labels(test_idx) == 0);
    FN = sum(y_pred == 0 & labels(test_idx) == 1);
    
    b_pcc(ii) = 0.5 * (TP/(TP + FN) + TN/(TN + FP));
end

mean(b_pcc)
std(b_pcc)

b_pcc = zeros(1, cv.NumTestSets);
for ii = 1:length(cv)
    train_idx = cv(ii).training;
    test_idx = cv(ii).test; 
    
    y_pred = classify(data(test_idx, :), data(train_idx, :), labels(train_idx), 'linear', 'empirical');
    
    TP = sum(y_pred == 1 & labels(test_idx) == 1);
    TN = sum(y_pred == 0 & labels(test_idx) == 0);
    FP = sum(y_pred == 1 & labels(test_idx) == 0);
    FN = sum(y_pred == 0 & labels(test_idx) == 1);
    
    b_pcc(ii) = 0.5 * (TP/(TP + FN) + TN/(TN + FP));
end

mean(b_pcc)
std(b_pcc)