def main(k,inter,con): # this makes everything happen # k is the length of the motif # intergenic is a string with the name ofthe intergenic file # METHOD 1 intergenic = read_data(inter) count = exhaustive(k,intergenic) # METHOD 2 con = read_data(con) count2 = exhaustivec(k,intergenic,con) # METHOD 3 count2 = exhaustiveone(k,intergenic) def enummotif(k, alphabet): #enummotif exhaustively enummerates all motifs of length k #alphabet is a list of letters in the alphabet size = len(alphabet) ra = range(size) pm = alphabet #pm is premotif rk = range(k-1) #because the alphabet itself counts pm2 = [] for i in rk: rpm = range(len(pm)) for j in rpm: for k in ra: #print k pm2[:0]=[pm[j]+alphabet[k]] #print pm2 #print pm pm=[] pm[:0]=pm2 pm2=[] return pm profile_ambiguities = {'A': 'A', 'C': 'C', 'G':'G', 'T':'T', 'S':'CG','W':'AT','R':'AG','Y':'CT','M':'AC','K':'TG', 'B':'TCG','D':'ATG','H':'ATC','V':'ACG','N':'ATCG'} def profile2seqs(profile): # creates all the instances of a motif that are possible, # expanding Y into CT etc list = [] if not profile: return [''] for rest in profile2seqs(profile[:-1]): for first in map(None, profile_ambiguities[profile[-1]]): list.append(rest+first) return list def read_data(file): f=open(file,'r') t=f.read().replace('-','') return t def exhaustive(k, intergenic): # scans the intergenic genome counting all k-mers # intergenic is the string to be parsed (read with read_data) # k is the length of the profile # written by Reina Riemann Spring05 6.096 alphabet = ['C','G','T','A'] kmer = enummotif(k, alphabet) count=[] for i in range(len(kmer)): #PROVIDE YOUR CODE HERE final=[(count[i],kmer[i]) for i in range(len(kmer))] final.sort() print final return final def exhaustivec(k, intergenic,con): # enumerates all k-mers, scans the intergenic genome # intergenic is the string to be parsed (read with read_data) # k is the length of the profile # con is the string with the conservation information # written by Reina Riemann Spring05 6.096 alphabet = ['C','G','T','A'] kmer = enummotif(k, alphabet) count=[] stars=k*'*' for i in range(len(kmer)): count.append(0) #PROVIDE YOUR CODE HERE final=[(count[i],kmer[i]) for i in range(len(kmer))] final.sort() print final return final def exhaustiveone(k, intergenic): # exhaustive scans the intergenic genome counting all k-mers # intergenic is the string to be parsed (read with read_data) # k is the length of the profile # con is the string with the conservation information # written by Reina Riemann Spring 05 alphabet = ['C','G','T','A'] motif={} #this is a dictionary for j in range(0, len(intergenic)-k+1): #PROVIDE YOUR CODE HERE keys= motif.keys() final =[(motif[keys[i]],keys[i]) for i in range(len(keys))] final.sort() print final return final