1
2
3
4
5
6 """Approximate calculation of appropriate thresholds for motif finding
7 """
8 import math,random
9
11 """ Class representing approximate score distribution for a given motif.
12
13 Utilizes a dynamic programming approch to calculate the distribution of
14 scores with a predefined precision. Provides a number of methods for calculating
15 thresholds for motif occurences.
16 """
17 - def __init__(self,motif,precision=10**3):
29
31 return int((x-y+0.5*self.step)//self.step)
32
34 return max(0,min(self.n_points-1,i+j))
35
36 - def modify(self,scores,mo_probs,bg_probs):
37 mo_new=[0.0]*self.n_points
38 bg_new=[0.0]*self.n_points
39 for k, v in scores.iteritems():
40 d=self._index_diff(v)
41 for i in range(self.n_points):
42 mo_new[self._add(i,d)]+=self.mo_density[i]*mo_probs[k]
43 bg_new[self._add(i,d)]+=self.bg_density[i]*bg_probs[k]
44 self.mo_density=mo_new
45 self.bg_density=bg_new
46
48 """
49 Approximate the log-odds threshold which makes the type I error (false positive rate).
50 """
51 i=self.n_points
52 prob=0.0
53 while prob<fpr:
54 i-=1
55 prob+=self.bg_density[i]
56 return self.min_score+i*self.step
57
59 """
60 Approximate the log-odds threshold which makes the type II error (false negative rate).
61 """
62 i=-1
63 prob=0.0
64 while prob<fnr:
65 i+=1
66 prob+=self.mo_density[i]
67 return self.min_score+i*self.step
68
70 """
71 Approximate the log-odds threshold which makes FNR equal to FPR times rate_proportion
72 """
73 i=self.n_points
74 fpr=0.0
75 fnr=1.0
76 while fpr*rate_proportion<fnr:
77 i-=1
78 fpr+=self.bg_density[i]
79 fnr-=self.mo_density[i]
80 if return_rate:
81 return self.min_score+i*self.step,fpr
82 else:
83 return self.min_score+i*self.step
84
86 """Threshold selection mimicking the behaviour of patser (Hertz, Stormo 1999) software.
87
88 It selects such a threshold that the log(fpr)=-ic(M)
89 note: the actual patser software uses natural logarithms instead of log_2, so the numbers
90 are not directly comparable.
91 """
92 return self.threshold_fpr(fpr=2**-self.ic)
93