#!/usr/bin/env python #### INSTRUCTIONS FOR USE: # call program as follows: ./pstemp.py # e.g. ./ps1temp.py 6 allinter allintercons # make sure the pstemp.py is marked as executable: chmod +x pstemp.py # or in windows with: python ps1temp.py 6 allinter allintercons # once you have put python in your path import sys mlen = int(sys.argv[1]) datafile = sys.argv[2] consfile = sys.argv[3] alphabet = ['A', 'T', 'G', 'C'] def main(): data = readdata(datafile) cons = readdata(consfile) motcount = countmotif(data) motcountcons = countmotifcons(data,cons) motcons = [] ########## YOUR CODE HERE # create a list motcons that fills the conservation score (motcons) of each motif # note: in python 3/4 = 0, therefore you should use float(3)/4 print "Top 50 motifs:" printtop(motcount,50) print "" print "Top 50 motifs when considering conserved regions:" printtop(motcountcons,50) print "" print "Top 50 most conserved motifs:" printtop(motcons,50) def countmotif(data): # function returns an array indexed by motif (see m2idx below) containing # counts for all motifs found in data counts = [] for i in range(pow(4,mlen)): counts.insert(i,0) ########## YOUR CODE HERE # write code to fill counts: the count for each motif. notice that counts is # indexed by the motif number # you will want to look up how to perform substrings. you will need to use # m2idx to produce your indexes return counts def countmotifcons(data,cons): # like countmotif below but only counts a motif if all positions are marked with # *'s in cons. counts = [] for i in range(pow(4,mlen)): counts.insert(i,0) ########## YOUR CODE HERE # fill in code to do the same as countmotif except to only count motifs where # all positions are conserved return counts def printtop(count,numprint): motcount = [(count[i], idx2m(i)) for i in range(len(count))] motcount.sort() motcount.reverse() for i in range(numprint): print motcount[i] def m2idx(motif): # maps a dna motif to a unique index number # returns -1 if motif contains characters not in alphabet idx=0 try: for i in range(len(motif)): idx = (idx << 2) + alphabet.index(motif[i]) except: return -1 return idx def idx2m(idx): # maps an index to a unique motif motif="" for i in range(mlen): motif = alphabet[idx % 4] + motif idx = idx >> 2 return motif def readdata(file): f = open(file,'r') data = f.read() f.close() return data main()