001 package calhoun.analysis.crf.statistics;
002
003 import java.io.Serializable;
004 import java.text.DecimalFormat;
005 import java.text.NumberFormat;
006
007 import calhoun.util.Assert;
008
009 public class PredictedActualBinaryContingencyTable implements Serializable {
010 private static final long serialVersionUID = 557844841355570852L;
011 private int tp,fp,fn,tn;
012 private boolean frozen;
013
014 ///////////////////////////////////////////
015 // Constructors and modifiers are below:
016
017 public PredictedActualBinaryContingencyTable() {
018 this.tp = 0;
019 this.fp = 0;
020 this.fn = 0;
021 this.tn = 0;
022 frozen = false;
023 }
024
025 public void set(int tp,int fp, int fn, int tn) {
026 Assert.a(!frozen);
027 this.tp = tp;
028 this.fp = fp;
029 this.fn = fn;
030 this.tn = tn;
031 frozen = true;
032 }
033
034 public void set(int tp,int fp, int fn) {
035 Assert.a(!frozen);
036 this.tp = tp;
037 this.fp = fp;
038 this.fn = fn;
039 this.tn = -1;
040 frozen = true;
041 }
042
043 public void forgetTN() {
044 Assert.a(!frozen);
045 Assert.a(tn ==0);
046 tn = -1;
047 }
048
049 public void incrementTP() {
050 Assert.a(!frozen);
051 tp++;
052 }
053
054 public void incrementFP(){
055 Assert.a(!frozen);
056 fp++;
057 }
058
059 public void incrementFN(){
060 Assert.a(!frozen);
061 fn++;
062 }
063
064 public void incrementTN(){
065 Assert.a(!frozen);
066 Assert.a(tn>=0);
067 tn++;
068 }
069
070 public void increment(boolean predicted, boolean actual) {
071 Assert.a(!frozen);
072 if ((predicted) && (actual)) { tp++; }
073 if ((predicted) && (!actual)) { fp++; }
074 if ((!predicted) && (actual)) { fn++; }
075 if ((!predicted)&&(!actual)) {
076 Assert.a(tn>=0);
077 tn++;
078 }
079 }
080
081 public void freeze() {
082 frozen = true;
083 }
084
085 /////////////////////////////////////////////
086 // Statistical measures
087
088 public int getTP() {
089 Assert.a(frozen);
090 return tp;
091 }
092
093 public int getFP() {
094 Assert.a(frozen);
095 return fp;
096 }
097
098 public int getFN() {
099 Assert.a(frozen);
100 return fn;
101 }
102
103 public int getTN() {
104 Assert.a(frozen);
105 Assert.a(tn>=0);
106 return tn;
107 }
108
109 /** Actual Positives */
110 public int ap(){
111 Assert.a(frozen);
112 return (tp + fn);
113 }
114
115 /** Actual Negatives */
116 public int an(){
117 Assert.a(frozen);
118 Assert.a(tn>=0);
119 return (fp + tn);
120 }
121
122 /** Predicted Positives */
123 public int pp(){
124 Assert.a(frozen);
125 return (tp + fp);
126 }
127
128 /** Predicted Negatives */
129 public int pn(){
130 Assert.a(frozen);
131 Assert.a(tn>=0);
132 return (tn+fn);
133 }
134
135 private boolean splitMargins() {
136 if(tn>=0) {
137 if (tp+fn<=0) { return false; }
138 if (tn+fp<=0) { return false; }
139 if (tp+fp<=0) { return false; }
140 if (tn+fn<=0) { return false; }
141 } else {
142 if (tp+fn<=0) { return false; }
143 if (tp+fp<=0) { return false; }
144 }
145 return true;
146 }
147
148 /** This is the Pearson correlation of two 0-1 random variables X=prediction and Y=reality
149 * CC = Cov(X,Y)/(Stddev(X)*Stddev(Y))
150 * If either RV has zero variance, the CC is underfined: assertion faliure
151 * If this contingency table is not tracking TN, then CC is undefined: assertion failure
152 */
153 public double correlationCoefficient() {
154 Assert.a(frozen);
155 Assert.a(splitMargins());
156 double num = (tp*tn)-(fn*fp);
157 double den2 = (double) (tp+fn)*(tn+fp)*(tp+fp)*(tn+fn);
158 double cc = num/Math.sqrt(den2);
159 return cc;
160 }
161
162
163 /** This is the average conditional probability. Only defined if TN is being tracked, and
164 * if all four marginal values are positive.
165 */
166 public double averageConditionalProbability(){
167 Assert.a(frozen);
168 Assert.a(splitMargins());
169 double acp = 0;
170 acp += (double) tp/(tp+fn);
171 acp += (double) tp/(tp+fp);
172 acp += (double) tn/(tn+fp);
173 acp += (double) tn/(tn+fn);
174 acp /= 4.0;
175 return acp;
176 }
177
178 /** This is the approximate correlation. Only defined if TN is being tracked, and
179 * if all four marginal values are positive. Equal to
180 * 2*(averageConditionalProbability - 0.5)
181 */
182 public double approximateCorrelation(){
183 Assert.a(frozen);
184 return 2*(averageConditionalProbability() - 0.5);
185 }
186
187 /** This is the average of sensitivity and specifity. This is equal to the limit of ACP as TN->infinity
188 * This is defined even if we are not tracking TN.
189 */
190 public double averageSensitivitySpecificity(){
191 Assert.a(frozen);
192 return 0.5*( sensitivity() + specificity() );
193 }
194
195 /** Sensitivity is TP/(TP+FN) is the fraction of actual events that are predicted.
196 * If TP+FN is zero then sensitivity is undefined, resulting in assertion failure
197 * Sensitivity is defined even if not tracking TN.
198 */
199 public double sensitivity() {
200 Assert.a(frozen);
201 Assert.a(tp+fn > 0);
202 return (double) tp/(tp+fn);
203 }
204
205 /** Specificity is the TP/(TP+FP) is the fraction of predicted events that are real.
206 * If TP+FP=0 then specificity is undefined and result in assertion failure
207 * Specificity is defined even if not tracking TN
208 */
209 public double specificity() {
210 Assert.a(frozen);
211 Assert.a(tp+fp > 0);
212 return (double) tp/(tp+fp);
213 }
214
215
216 ////////////////////////////////////////////////////
217 // Printed summaries
218
219 public String summarize() {
220 String ret = "";
221 NumberFormat d3 = new DecimalFormat();
222 d3.setMinimumFractionDigits(3);
223 d3.setMaximumFractionDigits(3);
224
225 if (tn>=0) {
226 ret += "( TP=" + tp + ", FP=" + fp + ", FN=" + fn + ", TN=" + tn + " ) ";
227 ret += "( AP=" + ap() + ", AN=" + an() + ", PP=" + pp() + ", PN=" + pn() + " ) ";
228 if (splitMargins()) {
229 ret += "( CC=" + d3.format(correlationCoefficient()) + " ) ";
230 ret += "( ACP=" + d3.format(averageConditionalProbability()) + " ) ";
231 ret += "( AC=" + d3.format(approximateCorrelation()) + " ) ";
232 ret += "( sens=" + d3.format(sensitivity()) + ", spec=" + d3.format(specificity()) + ", avSS=" + d3.format(averageSensitivitySpecificity()) + " ) ";
233 } else {
234 ret += "( margins not split ) ";
235 }
236 } else {
237 ret += "( TP=" + tp + ", FP=" + fp + ", FN=" + fn + " ) ";
238 ret += "( AP=" + ap() + ", PP=" + pp() + " )";
239 if (splitMargins() ) {
240 ret += "( sens=" + d3.format(sensitivity()) + ", spec=" + d3.format(specificity()) + ", avSS=" + d3.format(averageSensitivitySpecificity()) + " ) ";
241 } else {
242 ret += "( margins not split ) ";
243 }
244 }
245
246 return ret;
247 }
248
249
250 }