using System; using System.IO; using System.Collections; using System.Text.RegularExpressions; namespace Regression { class Regression { TextWriter twCpAll = new StreamWriter("CpTableAll.txt"); TextWriter twCI = new StreamWriter("CIndices.txt"); ArrayList data = new ArrayList(); ArrayList Y = new ArrayList(); double variance = 0.0; #region ReadData public void ReadData(string FileName) { StreamReader reader = File.OpenText(FileName); string input = null; while ((input = reader.ReadLine()) != null) { ArrayList line = new ArrayList(); Regex r = new Regex(","); foreach (string ss in r.Split(input)) { if(!ss.Equals("")) line.Add(ss); } data.Add(line); } Console.WriteLine("data Count: " + data.Count); } #endregion #region Find Variance public void FindVariance() { //Find Variance double k = 9; //including intercept double n = data.Count; ArrayList X = new ArrayList(); X = FullParameters(); double sum = 0.0; for(int i = 0; i < data.Count; i++) { ArrayList row = (ArrayList)data[i]; double y = double.Parse(row[8].ToString()); double yHat = double.Parse(((ArrayList)X[0])[0].ToString()); for(int x = 1; x < X.Count; x++) { yHat += double.Parse(((ArrayList)X[x])[0].ToString()) * double.Parse(row[x-1].ToString()); } sum += Math.Pow(yHat-y, 2); } variance = sum / (n-k); } #endregion #region FullParameters public ArrayList FullParameters() { ArrayList A = new ArrayList(); for(int i = 0; i < data.Count; i++) { ArrayList dataRow = (ArrayList)data[i]; ArrayList newRow = new ArrayList(); newRow.Add(1.0); for(int j = 0; j < 9; j++) { if(j != 8) { newRow.Add(dataRow[j]); } else { ArrayList yRow = new ArrayList(); yRow.Add(double.Parse(dataRow[j].ToString())); Y.Add(yRow); } } A.Add(newRow); } ArrayList X = new ArrayList(); #region Print A and Y values //PrintMatrix(A); //Console.Write("Y: "); //for(int i = 0; i < Y.Count; i++) // { // Console.Write(((ArrayList)Y[i])[0] + " "); // } //Console.WriteLine(); #endregion X = ComputeParameters(A, Y); TextWriter tw = new StreamWriter("BetasLog.txt"); for(int i = 0; i < X.Count; i++) { Console.WriteLine("B" + i + " = " + double.Parse(((ArrayList)X[i])[0].ToString())); tw.WriteLine("B" + i + " = " + double.Parse(((ArrayList)X[i])[0].ToString())); } tw.Close(); return X; } #endregion #region Compute Parameters public ArrayList ComputeParameters(ArrayList A, ArrayList Y) { //Return X = (A^T *A)^-1 A^T*Y ArrayList AT = new ArrayList(); AT = MatrixTranspose(A); ArrayList ATA = new ArrayList(); ATA = MatrixMultiply(AT, A); ArrayList ATAInv = new ArrayList(); ATAInv = MatrixInverse(ATA); ArrayList ATY = MatrixMultiply(AT, Y); ArrayList X = new ArrayList(); X = MatrixMultiply(ATAInv, ATY); return X; } #endregion #region Matrix Inverse public ArrayList MatrixInverse(ArrayList AO) { ArrayList A = new ArrayList(AO); int n = A.Count; for(int i = 0; i < n; i++) { ArrayList row = (ArrayList)A[i]; for(int z = 0; z < i; z++) { row.Add(0.0); } row.Add(1.0); int c = 2*n - row.Count; for(int z = 0; z < c; z++) { row.Add(0.0); } A[i] = row; } //Console.WriteLine("Identity Added W = " + ((ArrayList)A[0]).Count + ", H = " + A.Count); //PrintMatrix(A); for(int p = 0; p < n; p++) { for(int k = p+1; k < n; k++) { if(Math.Abs(double.Parse(((ArrayList)A[k])[p].ToString())) > Math.Abs(double.Parse(((ArrayList)A[p])[p].ToString()))) { ArrayList temp = (ArrayList)A[p]; A[p] = A[k]; A[k] = temp; } } //Console.WriteLine("After Row Swap W = " + ((ArrayList)A[0]).Count + ", H = " + A.Count); //PrintMatrix(A); ArrayList rowP = (ArrayList)A[p]; double pp = double.Parse(rowP[p].ToString()); for(int a = 0; a < 2*n; a++) { ArrayList newP = new ArrayList(); double pa = double.Parse(rowP[a].ToString()); //Console.WriteLine("pp = " + pp + ", pa = " + pa); double newVal = pa / pp; //Console.WriteLine("newVal = " + newVal); rowP[a] = newVal; } //PrintRow((ArrayList)A[p]); for(int i = 0; i < n; i++) { if( i != p) { double product = 0.0; double ip = double.Parse(((ArrayList)A[i])[p].ToString()); for(int b = 0; b < 2*n; b++) { double pb = double.Parse(((ArrayList)A[p])[b].ToString()); product = ip * pb; double ib = double.Parse(((ArrayList)A[i])[b].ToString()); double newEntry = ib - product; //Console.WriteLine("ip= " + ip + ", pb= " + pb + ", product= " + product + ", ib= " + ib // + "newEntry= " + newEntry); ((ArrayList)A[i])[b] = newEntry; } } } } //Console.WriteLine("After Elimination W = " + ((ArrayList)A[0]).Count + ", H = " + A.Count); //PrintMatrix(A); ArrayList inverse = new ArrayList(); for(int i = 0; i < n; i++) { ArrayList row = (ArrayList)A[i]; ArrayList newRow = new ArrayList(); for(int x = n; x < row.Count; x++) { newRow.Add(row[x]); } inverse.Add(newRow); } //Console.WriteLine("Inverse W = " + ((ArrayList)inverse[0]).Count + ", H = " + inverse.Count); //PrintMatrix(inverse); return inverse; } #endregion #region Print Matrix public void PrintMatrix(ArrayList M) { for(int i = 0; i < M.Count; i++) { ArrayList row = (ArrayList)M[i]; for(int a = 0; a < row.Count; a++) { Console.Write(row[a] + " "); } Console.WriteLine(); } } #endregion #region Print Row public void PrintRow(ArrayList R) { for(int i = 0; i < R.Count; i++) { Console.Write(R[i] + " "); Console.WriteLine(); } } #endregion #region Matrix Transpose public ArrayList MatrixTranspose(ArrayList A) { ArrayList AT = new ArrayList(); int width = ((ArrayList)A[0]).Count; int height = A.Count; for(int w = 0; w < width; w++) { ArrayList newRow = new ArrayList(); for(int h = 0; h < height; h++) { ArrayList row = (ArrayList)A[h]; newRow.Add(row[w]); } AT.Add(newRow); } //Console.WriteLine("Transpose W = " + ((ArrayList)AT[0]).Count + ", H = " + AT.Count); //PrintMatrix(AT); return AT; } #endregion #region Matrix Multiply public ArrayList MatrixMultiply(ArrayList A, ArrayList B) { ArrayList product = new ArrayList(); for(int bw = 0; bw < ((ArrayList)B[0]).Count; bw++) { for(int ah = 0; ah < A.Count; ah++) { double sum = 0.0; ArrayList aRow = (ArrayList)A[ah]; for(int bh = 0; bh < B.Count; bh++) { ArrayList bRow = (ArrayList)B[bh]; double prod = double.Parse(bRow[bw].ToString())*double.Parse(aRow[bh].ToString()); sum += prod; } if(bw == 0) { ArrayList newRow = new ArrayList(); newRow.Add(sum); product.Add(newRow); } else { ArrayList row = (ArrayList)product[ah]; row.Add(sum); product[ah] = row; } }//for }//for //Console.WriteLine("Product W = " + ((ArrayList)product[0]).Count + ", H = " + product.Count); //PrintMatrix(product); return product; } #endregion #region MallowsCp (Greedy) public void MallowsCpGreedy() { TextWriter twCp = new StreamWriter("CpTable.txt"); ArrayList existingSet = new ArrayList(); ArrayList remainingSet = new ArrayList(); //double k = 9; //including intercept double n = data.Count; //Initialize remainingSet for(int i = 0; i < 8; i++) { remainingSet.Add(i); } //Cp Calculations while(remainingSet.Count > 0) { ArrayList CpList = new ArrayList(); for(int i = 0; i < remainingSet.Count; i++) { ArrayList currentSet = new ArrayList(existingSet); currentSet.Add(int.Parse(remainingSet[i].ToString())); double p = currentSet.Count; //Generate New Data Set ArrayList newData = new ArrayList(); ArrayList newY = new ArrayList(); for(int d = 0; d < data.Count; d++) { ArrayList row = (ArrayList)data[d]; ArrayList newRow = new ArrayList(); newRow.Add(1.0); for(int c = 0; c < currentSet.Count; c++) { newRow.Add(row[int.Parse(currentSet[c].ToString())]); }//for newData.Add(newRow); ArrayList newYRow = new ArrayList(); newYRow.Add(row[8]); newY.Add(newYRow); }//for //Find Optimal Parameters ArrayList newBs = new ArrayList(); newBs = ComputeParameters(newData, newY); //Find Residual Sum of Squares double sum = 0.0; for(int a = 0; a < newData.Count; a++) { ArrayList row = (ArrayList)newData[a]; double y = double.Parse(((ArrayList)newY[a])[0].ToString()); double yHat = 0.0; for(int x = 0; x < newBs.Count; x++) { yHat += double.Parse(((ArrayList)newBs[x])[0].ToString()) * double.Parse(row[x].ToString()); } sum += Math.Pow(yHat-y, 2); } //Calculate Cp and Add to List double Cp = sum/variance - (n-2*p); ArrayList CpRow = new ArrayList(); CpRow.Add(Cp); CpRow.Add(remainingSet[i]); CpRow.Add(i); CpList.Add(CpRow); }//for //Sort by Cp Values Quicksort(CpList, 0, CpList.Count-1); //Print Data for Set with Minumum Cp Value to File double minCp = double.Parse(((ArrayList)CpList[0])[0].ToString()); int addition = int.Parse(((ArrayList)CpList[0])[1].ToString()); int index = int.Parse(((ArrayList)CpList[0])[2].ToString()); existingSet.Add(addition); remainingSet.RemoveAt(index); twCp.Write((existingSet.Count+1) + " " + existingSet.Count + " "); Console.Write((existingSet.Count+1) + " " + existingSet.Count + " "); twCp.Write(existingSet[0]); Console.Write(existingSet[0]); if(existingSet.Count > 1) { for(int e = 1; e < existingSet.Count; e++) { twCp.Write(", " + existingSet[e]); Console.Write(", " + existingSet[e]); } } twCp.WriteLine(" " + minCp); Console.WriteLine(" " + minCp); }//while twCp.Close(); } #endregion #region MallowsCp (All Subsets) public void MallowsCpAll() { int count = 1; ArrayList one = new ArrayList(); ArrayList two = new ArrayList(); ArrayList three = new ArrayList(); ArrayList four = new ArrayList(); ArrayList five = new ArrayList(); ArrayList six = new ArrayList(); ArrayList seven = new ArrayList(); ArrayList eight = new ArrayList(); #region Generate Subsets //Create All Subsets of All Sizes while(count < 256) { ArrayList members = new ArrayList(); if((0x01 & count) == 1) members.Add(7); if((0x01 & (count>>1)) == 1) members.Add(6); if((0x01 & (count>>2)) == 1) members.Add(5); if((0x01 & (count>>3)) == 1) members.Add(4); if((0x01 & (count>>4)) == 1) members.Add(3); if((0x01 & (count>>5)) == 1) members.Add(2); if((0x01 & (count>>6)) == 1) members.Add(1); if((0x01 & (count>>7)) == 1) members.Add(0); int num = members.Count; switch(num) { case 1: { one.Add(members); break; } case 2: { two.Add(members); break; } case 3: { three.Add(members); break; } case 4: { four.Add(members); break; } case 5: { five.Add(members); break; } case 6: { six.Add(members); break; } case 7: { seven.Add(members); break; } case 8: { eight.Add(members); break; } default: { Console.WriteLine("Strange Subset!"); break; } }//switch count++; }//while #endregion CalculateCps(one, 1); CalculateCps(two, 2); CalculateCps(three, 3); CalculateCps(four, 4); CalculateCps(five, 5); CalculateCps(six, 6); CalculateCps(seven, 7); CalculateCps(eight, 8); twCpAll.Close(); } #endregion #region CalculateCps (All Subsets) public void CalculateCps(ArrayList subsetList, int size) { ArrayList CpList = new ArrayList(); //double k = 9; //including intercept double n = data.Count; //Generate Dataset for(int s = 0; s < subsetList.Count; s++) { ArrayList dataSet = new ArrayList(); ArrayList subY = new ArrayList(); ArrayList subset = (ArrayList)subsetList[s]; double p = subset.Count; for(int d = 0; d < data.Count; d++) { ArrayList row = (ArrayList)data[d]; ArrayList newRow = new ArrayList(); newRow.Add(1.0); for(int c = 0; c < subset.Count; c++) { newRow.Add(row[int.Parse(subset[c].ToString())]); }//for dataSet.Add(newRow); ArrayList newYRow = new ArrayList(); newYRow.Add(row[8]); subY.Add(newYRow); }//for //Find Optimal Parameters ArrayList X = new ArrayList(); X = ComputeParameters(dataSet, subY); //Find Residual Sum of Squares double sum = 0.0; for(int a = 0; a < dataSet.Count; a++) { ArrayList row = (ArrayList)dataSet[a]; double y = double.Parse(((ArrayList)subY[a])[0].ToString()); double yHat = 0.0; for(int x = 0; x < X.Count; x++) { yHat += double.Parse(((ArrayList)X[x])[0].ToString()) * double.Parse(row[x].ToString()); } sum += Math.Pow(yHat-y, 2); } //Find Cp Value and add to List double Cp = sum/variance - (n-2*p); ArrayList CpRow = new ArrayList(); CpRow.Add(Cp); CpRow.Add(subset); CpList.Add(CpRow); }//for //Sort by Cp Values Quicksort(CpList, 0, CpList.Count-1); //Print Data to File for(int t = 0; t < CpList.Count; t++) { double CpVal = double.Parse(((ArrayList)CpList[t])[0].ToString()); ArrayList subset = (ArrayList)((ArrayList)CpList[t])[1]; twCpAll.Write((size+1) + " " + size + " "); Console.Write((size+1) + " " + size + " "); twCpAll.Write(subset[0]); Console.Write(subset[0]); if(size > 1) { for(int s = 1; s < subset.Count; s++) { twCpAll.Write(", " + subset[s]); Console.Write(", " + subset[s]); } } twCpAll.WriteLine(" " + CpVal); Console.WriteLine(" " + CpVal); }//for } #endregion #region Quicksort public ArrayList Quicksort(ArrayList pairs, int low, int high) { // lo is the lower index, hi is the upper index // of the region of array a that is to be sorted int i=low; int j=high; ArrayList temp; double x = (double)(((ArrayList)pairs[(low+high)/2])[0]); // partition do { while ((double)((ArrayList)pairs[i])[0]x) j--; if (i<=j) { temp = new ArrayList((ArrayList)pairs[i]); //Console.WriteLine("temp: (i)" + temp[0].ToString() + ", " + temp[1].ToString() + ", " + temp[2].ToString()); pairs[i]=pairs[j]; //Console.WriteLine("pairs[i] (j): " +((ArrayList)pairs[i])[0].ToString() + ", " + ((ArrayList)pairs[i])[1].ToString() + ", " + ((ArrayList)pairs[i])[2].ToString()); pairs[j]=temp; //Console.WriteLine("pairs[j] (i): " +((ArrayList)pairs[j])[0].ToString() + ", " + ((ArrayList)pairs[j])[1].ToString() + ", " + ((ArrayList)pairs[j])[2].ToString()); i++; j--; } } while (i<=j); // recursion if (low < j) Quicksort(pairs, low, j); if (i < high) Quicksort(pairs, i, high); return pairs; } #endregion #region Generate Files for Logistic Regression public void GenerateFiles() { TextWriter twY = new StreamWriter("data_Y.txt"); ArrayList testSubsets = new ArrayList(); ArrayList info = new ArrayList(); ArrayList members = new ArrayList(); #region Add All Test Subsets members.Add(1); info.Add(members); info.Add("data_4B.txt"); testSubsets.Add(info); info = new ArrayList(); members = new ArrayList(members); members.Add(5); info.Add(members); info.Add("data_3B.txt"); testSubsets.Add(info); info = new ArrayList(); members = new ArrayList(members); members.Add(0); info.Add(members); info.Add("data_2B.txt"); testSubsets.Add(info); info = new ArrayList(); members = new ArrayList(members); members.Add(6); info.Add(members); info.Add("data_1B.txt"); testSubsets.Add(info); info = new ArrayList(); members = new ArrayList(members); members.Add(2); info.Add(members); info.Add("data_MS.txt"); testSubsets.Add(info); info = new ArrayList(); members = new ArrayList(members); members.Add(7); info.Add(members); info.Add("data_1F.txt"); testSubsets.Add(info); info = new ArrayList(); members = new ArrayList(members); members.Add(4); info.Add(members); info.Add("data_2F.txt"); testSubsets.Add(info); info = new ArrayList(); members = new ArrayList(members); members.Add(3); info.Add(members); info.Add("data_3F.txt"); testSubsets.Add(info); #endregion for(int t = 0; t < testSubsets.Count; t++) { ArrayList temp = (ArrayList)testSubsets[t]; ArrayList subset = (ArrayList)temp[0]; string fileName = (string)temp[1]; TextWriter twD = new StreamWriter(fileName); for(int i = 0; i < data.Count; i++) { ArrayList row = (ArrayList)data[i]; for(int s = 0; s < subset.Count; s++) { twD.Write(row[int.Parse(subset[s].ToString())] + ","); } twD.Write(row[8]); twD.WriteLine(); }//for twD.Close(); }//for twY.Close(); } #endregion #region GetPValues public ArrayList GetPValues(string FileName, int lines, int start) { ArrayList pValues = new ArrayList(); StreamReader reader = File.OpenText(FileName); string input = null; for(int t = 0; t < lines-1; t++) { reader.ReadLine(); } while ((input = reader.ReadLine()) != null) { string pvalue = input.Substring(start-1, 6); pValues.Add(pvalue); } //Console.WriteLine("pValues Count: " + pValues.Count); return pValues; } #endregion #region Find C-Index public void FindCIndex(ArrayList P) { ArrayList healthy = new ArrayList(); ArrayList sick = new ArrayList(); double CIndex = 0.0; double concordant = 0.0; double discordant = 0.0; double tie = 0.0; for(int i = 0; i < Y.Count; i++) { ArrayList ins = (ArrayList)Y[i]; if(int.Parse(ins[0].ToString()) == 0) { healthy.Add(double.Parse(P[i].ToString())); } else if(int.Parse(ins[0].ToString()) == 1) { sick.Add(double.Parse(P[i].ToString())); } else { Console.WriteLine("ERROR"); } } for(int i = 0; i < healthy.Count; i++) { concordant = 0.0; discordant = 0.0; tie = 0.0; for(int j = 0; j < sick.Count; j++) { double healthyVal = double.Parse(healthy[i].ToString()); double sickVal = double.Parse(sick[j].ToString()); //Console.WriteLine("healthy: " + healthyVal + ", sick: " + sickVal); if(healthyVal < sickVal) { concordant++; } else if(healthyVal > sickVal) { discordant++; } else if(healthyVal == sickVal) { tie++; } else { Console.WriteLine("ERROR2"); } } } CIndex = (concordant + 0.5*tie) / (concordant + discordant + tie); twCI.WriteLine(CIndex); Console.WriteLine(CIndex); } #endregion [STAThread] static void Main(string[] args) { Regression system = new Regression(); string filePath = "C:\\Documents and Settings\\Jessie\\Desktop\\6.873 MDS\\FinalProject\\data.txt"; system.ReadData(filePath); system.FindVariance(); //system.MallowsCpGreedy(); system.MallowsCpAll(); //system.GenerateFiles(); ArrayList P = new ArrayList(); string filePath4B = "C:\\Documents and Settings\\Jessie\\Desktop\\6.873 MDS\\FinalProject\\Regression\\4B_Results.txt"; P = system.GetPValues(filePath4B, 29, 28); system.FindCIndex(P); P = new ArrayList(); string filePath3B = "C:\\Documents and Settings\\Jessie\\Desktop\\6.873 MDS\\FinalProject\\Regression\\3B_Results.txt"; P = system.GetPValues(filePath3B, 33, 38); system.FindCIndex(P); P = new ArrayList(); string filePath2B = "C:\\Documents and Settings\\Jessie\\Desktop\\6.873 MDS\\FinalProject\\Regression\\2B_Results.txt"; P = system.GetPValues(filePath2B, 37, 48); system.FindCIndex(P); P = new ArrayList(); string filePath1B = "C:\\Documents and Settings\\Jessie\\Desktop\\6.873 MDS\\FinalProject\\Regression\\1B_Results.txt"; P = system.GetPValues(filePath1B, 41, 58); system.FindCIndex(P); P = new ArrayList(); string filePathMS = "C:\\Documents and Settings\\Jessie\\Desktop\\6.873 MDS\\FinalProject\\Regression\\MS_Results.txt"; P = system.GetPValues(filePathMS, 45, 68); system.FindCIndex(P); P = new ArrayList(); string filePath1F = "C:\\Documents and Settings\\Jessie\\Desktop\\6.873 MDS\\FinalProject\\Regression\\1F_Results.txt"; P = system.GetPValues(filePath1F, 49, 78); system.FindCIndex(P); P = new ArrayList(); string filePath2F = "C:\\Documents and Settings\\Jessie\\Desktop\\6.873 MDS\\FinalProject\\Regression\\2F_Results.txt"; P = system.GetPValues(filePath2F, 53, 88); system.FindCIndex(P); P = new ArrayList(); string filePath3F = "C:\\Documents and Settings\\Jessie\\Desktop\\6.873 MDS\\FinalProject\\Regression\\3F_Results.txt"; P = system.GetPValues(filePath3F, 57, 98); system.FindCIndex(P); system.twCI.Close(); Console.WriteLine("DONE"); Console.ReadLine(); } #region Material for Logistic Regression (not used) //Parameters for Logistic Regression const double B0 = -8.4047; const double B1 = 0.1232; const double B2 = 0.0352; const double B3 = -0.0133; const double B4 = 0.0006; const double B5 = -0.0012; const double B6 = 0.0897; const double B7 = 0.9452; const double B8 = 0.0149; #region Calculate Pi's public void CalculatePs() { for(int i = 0; i < data.Count; i++) { double p; ArrayList ins = (ArrayList)data[i]; double exp = B0 + B1*double.Parse(ins[0].ToString()) + B2*double.Parse(ins[1].ToString()) + B3*double.Parse(ins[2].ToString()) + B4*double.Parse(ins[3].ToString()) + B5*double.Parse(ins[4].ToString()) + B6*double.Parse(ins[5].ToString()) + B7*double.Parse(ins[6].ToString()) + B8*double.Parse(ins[7].ToString()); p = 1.0 / (1.0 + Math.Exp(-exp)); ins.Add(p); } } #endregion #endregion } }