function [mismatchPositions, initialCodons, finalCodons]=parseBLATAlignment(filename); %Takes as input the BLAt output in txt format, %where the first row of data is the first row of the file (i.e., assumed no empty %lines at start of file). It returns the positions of the mismatching %nucleotides in the query sequence (human sequence), and for each mismatch, the codons before %and after the mutation in initialCodons and finalCodons. You can use this to figure out whether the change % is synonimous or non-synonimous. initialCodons=[]; finalCodons=[]; mismatchPositions=[]; fid=fopen(filename); pastLine = fgetl(fid); I=find(pastLine==' '); k=I(1)-1; startPos=str2num(pastLine(1:k)); curLine = fgetl(fid); c=0; %counts number of mismatches found while 1 nextLine= fgetl(fid); if ~ischar(nextLine); break, end; if length(curLine>k); curLine(1); if (curLine(1)=='>')|(curLine(1)=='<'), curPos=str2num(pastLine(1:k)); I=find(curLine=='|'); d=curLine(I(1): I(length(I))); J=find(d==' '); if isempty(J)==0, n=length(J); for i=1:n, c=c+1; mismatchPos=curPos+J(i)-1; m=mod(mismatchPos-startPos+1, 3); %to find proper reading frame if m==0, iCodon=pastLine(I(1)+J(i)-3:I(1)+J(i)-1); fCodon=strcat(iCodon(1:2), nextLine(I(1)+J(i)-1)); elseif m==1, iCodon=pastLine(I(1)+J(i)-1:I(1)+J(i)+1); fCodon=strcat(nextLine(I(1)+J(i)-1), iCodon(2:3)); elseif m==2, iCodon=pastLine(I(1)+J(i)-2:I(1)+J(i)); fCodon=strcat(iCodon(1), nextLine(I(1)+J(i)-1), iCodon(3)); end; mismatchPositions(c)=mismatchPos; initialCodons=[initialCodons; iCodon]; finalCodons=[finalCodons; fCodon]; end; end; end; end; pastLine=curLine; curLine=nextLine; end; fclose(fid);