001    package calhoun.analysis.crf.statistics;
002    
003    import java.io.Serializable;
004    import java.text.DecimalFormat;
005    import java.text.NumberFormat;
006    
007    import calhoun.util.Assert;
008    
009    public class PredictedActualBinaryContingencyTable implements Serializable {
010            private static final long serialVersionUID = 557844841355570852L;
011            private int tp,fp,fn,tn;
012            private boolean frozen;
013            
014            ///////////////////////////////////////////
015            // Constructors and modifiers are below:
016            
017            public PredictedActualBinaryContingencyTable() {
018                    this.tp = 0;
019                    this.fp = 0;
020                    this.fn = 0;
021                    this.tn = 0;
022                    frozen = false;
023            }       
024    
025            public void set(int tp,int fp, int fn, int tn) {
026                    Assert.a(!frozen);
027                    this.tp = tp;
028                    this.fp = fp;
029                    this.fn = fn;
030                    this.tn = tn;
031                    frozen = true;
032            }
033            
034            public void set(int tp,int fp, int fn) {
035                    Assert.a(!frozen);
036                    this.tp = tp;
037                    this.fp = fp;
038                    this.fn = fn;
039                    this.tn = -1;
040                    frozen = true;
041            }
042            
043            public void forgetTN() {
044                    Assert.a(!frozen);
045                    Assert.a(tn ==0);
046                    tn = -1;
047            }
048    
049            public void incrementTP() {
050                    Assert.a(!frozen);
051                    tp++;
052            }
053            
054            public void incrementFP(){
055                    Assert.a(!frozen);
056                    fp++;
057            }
058            
059            public void incrementFN(){
060                    Assert.a(!frozen);
061                    fn++;
062            }
063            
064            public void incrementTN(){
065                    Assert.a(!frozen);
066                    Assert.a(tn>=0);
067                    tn++;
068            }
069            
070            public void increment(boolean predicted, boolean actual) {
071                    Assert.a(!frozen);
072                    if ((predicted) && (actual)) { tp++; }
073                    if ((predicted) && (!actual)) { fp++; }
074                    if ((!predicted) && (actual)) { fn++; }
075                    if ((!predicted)&&(!actual)) {
076                            Assert.a(tn>=0);
077                            tn++;
078                    }
079            }
080            
081            public void freeze() {
082                    frozen = true;
083            }
084                    
085            /////////////////////////////////////////////
086            // Statistical measures
087            
088            public int getTP() {
089                    Assert.a(frozen);
090                    return tp;
091            }
092            
093            public int getFP() {
094                    Assert.a(frozen);
095                    return fp;
096            }
097            
098            public int getFN() {
099                    Assert.a(frozen);
100                    return fn;
101            }
102            
103            public int getTN() {
104                    Assert.a(frozen);
105                    Assert.a(tn>=0);
106                    return tn;
107            }
108            
109            /** Actual Positives */
110            public int ap(){
111                    Assert.a(frozen);
112                    return (tp + fn);
113            }
114    
115            /** Actual Negatives */
116            public int an(){
117                    Assert.a(frozen);
118                    Assert.a(tn>=0);
119                    return (fp + tn);
120            }
121            
122            /** Predicted Positives */
123            public int pp(){
124                    Assert.a(frozen);
125                    return (tp + fp);
126            }
127            
128            /** Predicted Negatives */
129            public int pn(){
130                    Assert.a(frozen);
131                    Assert.a(tn>=0);
132                    return (tn+fn);
133            }
134            
135            private boolean splitMargins() {
136                    if(tn>=0) { 
137                            if (tp+fn<=0) { return false; }
138                            if (tn+fp<=0) { return false; }
139                            if (tp+fp<=0) { return false; }
140                            if (tn+fn<=0) { return false; }
141                    } else {
142                            if (tp+fn<=0) { return false; }                      
143                            if (tp+fp<=0) { return false; }
144                    }
145                    return true;
146            }
147            
148            /** This is the Pearson correlation of two 0-1 random variables X=prediction and Y=reality 
149             *  CC = Cov(X,Y)/(Stddev(X)*Stddev(Y))
150             *  If either RV has zero variance, the CC is underfined: assertion faliure
151             *  If this contingency table is not tracking TN, then CC is undefined: assertion failure
152             */
153            public double correlationCoefficient() {        
154                    Assert.a(frozen);
155                    Assert.a(splitMargins());
156                    double num = (tp*tn)-(fn*fp);
157                    double den2 = (double) (tp+fn)*(tn+fp)*(tp+fp)*(tn+fn);
158                    double cc = num/Math.sqrt(den2);
159                    return cc;
160            }
161            
162            
163            /** This is the average conditional probability.  Only defined if TN is being tracked, and
164             *  if all four marginal values are positive.
165             */
166            public double averageConditionalProbability(){
167                    Assert.a(frozen);
168                    Assert.a(splitMargins());
169                    double acp = 0;
170                    acp += (double) tp/(tp+fn);
171                    acp += (double) tp/(tp+fp);
172                    acp += (double) tn/(tn+fp);
173                    acp += (double) tn/(tn+fn);
174                    acp /= 4.0;
175                    return acp;
176            }
177            
178            /** This is the approximate correlation.  Only defined if TN is being tracked, and
179             *  if all four marginal values are positive.  Equal to
180             *  2*(averageConditionalProbability - 0.5)
181             */     
182            public double approximateCorrelation(){
183                    Assert.a(frozen);
184                    return 2*(averageConditionalProbability() - 0.5);
185            }
186            
187            /** This is the average of sensitivity and specifity.  This is equal to the limit of ACP as TN->infinity
188             *  This is defined even if we are not tracking TN.
189             */
190            public double averageSensitivitySpecificity(){
191                    Assert.a(frozen);
192                    return 0.5*( sensitivity() + specificity() );
193            }
194            
195            /** Sensitivity is TP/(TP+FN) is the fraction of actual events that are predicted.
196             *  If TP+FN is zero then sensitivity is undefined, resulting in assertion failure
197             *  Sensitivity is defined even if not tracking TN.
198             */
199            public double sensitivity() {
200                    Assert.a(frozen);
201                    Assert.a(tp+fn > 0);
202                    return (double) tp/(tp+fn);
203            }
204            
205            /** Specificity is the TP/(TP+FP) is the fraction of predicted events that are real.
206             *  If TP+FP=0 then specificity is undefined and result in assertion failure
207             *  Specificity is defined even if not tracking TN  
208             */
209            public double specificity() {
210                    Assert.a(frozen);
211                    Assert.a(tp+fp > 0);
212                    return (double) tp/(tp+fp);
213            }
214            
215            
216            ////////////////////////////////////////////////////
217            // Printed summaries
218            
219            public String summarize() {
220                    String ret = "";
221                    NumberFormat d3 = new DecimalFormat();
222                    d3.setMinimumFractionDigits(3);
223                    d3.setMaximumFractionDigits(3);
224                    
225                    if (tn>=0) {
226                            ret += "( TP=" + tp + ", FP=" + fp + ", FN=" + fn + ", TN=" + tn + " ) ";
227                            ret += "( AP=" + ap() + ", AN=" + an() + ", PP=" + pp() + ", PN=" + pn() + " ) ";
228                            if (splitMargins()) {
229                                    ret += "( CC=" + d3.format(correlationCoefficient()) + " ) ";
230                                    ret += "( ACP=" + d3.format(averageConditionalProbability()) + " ) ";
231                                    ret += "( AC=" + d3.format(approximateCorrelation()) + " ) ";
232                                    ret += "( sens=" + d3.format(sensitivity()) + ", spec=" + d3.format(specificity()) + ", avSS=" + d3.format(averageSensitivitySpecificity()) + " ) ";
233                            } else {
234                                    ret += "( margins not split ) ";
235                            }
236                    } else {
237                            ret += "( TP=" + tp + ", FP=" + fp + ", FN=" + fn + " ) ";                      
238                            ret += "( AP=" + ap() + ", PP=" + pp() + " )";
239                            if (splitMargins() ) {
240                                    ret += "( sens=" + d3.format(sensitivity()) + ", spec=" + d3.format(specificity()) + ", avSS=" + d3.format(averageSensitivitySpecificity()) + " ) ";
241                            } else {
242                                    ret += "( margins not split ) ";
243                            } 
244                    }
245                    
246                    return ret;
247            }
248    
249            
250    }