1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """ SCOP: Structural Classification of Proteins.
17
18 The SCOP database aims to provide a manually constructed classification of
19 all know protein structures into a hierarchy, the main levels of which
20 are family, superfamily and fold.
21
22 * "SCOP":http://scop.mrc-lmb.cam.ac.uk/scop/
23
24 * "Introduction":http://scop.mrc-lmb.cam.ac.uk/scop/intro.html
25
26 * "SCOP parsable files":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
27
28 The Scop object in this module represents the entire SCOP classification. It
29 can be built from the three SCOP parsable files, modified is so desired, and
30 converted back to the same file formats. A single SCOP domain (represented
31 by the Domain class) can be obtained from Scop using the domain's SCOP
32 identifier (sid).
33
34
35 nodeCodeDict -- A mapping between known 2 letter node codes and a longer
36 description. The known node types are 'cl' (class), 'cf'
37 (fold), 'sf' (superfamily), 'fa' (family), 'dm' (domain),
38 'sp' (species), 'px' (domain). Additional node types may
39 be added in the future.
40
41 This module also provides code to access SCOP over the WWW.
42
43 Functions:
44 search -- Access the main CGI script.
45 _open -- Internally used function.
46
47 """
48
49 from types import *
50 import os
51
52 import Des
53 import Cla
54 import Hie
55 from Residues import *
56 from Bio import SeqIO
57 from Bio.Seq import Seq
58
59
60 nodeCodeDict = { 'cl':'class', 'cf':'fold', 'sf':'superfamily',
61 'fa':'family', 'dm':'protein', 'sp':'species', 'px':'domain'}
62
63
64 _nodetype_to_code= { 'class': 'cl', 'fold': 'cf', 'superfamily': 'sf',
65 'family': 'fa', 'protein': 'dm', 'species': 'sp', 'domain': 'px'}
66
67 nodeCodeOrder = [ 'ro', 'cl', 'cf', 'sf', 'fa', 'dm', 'sp', 'px' ]
68
69 astralBibIds = [10,20,25,30,35,40,50,70,90,95,100]
70
71 astralEvs = [10, 5, 1, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001, 1e-4, 1e-5, 1e-10, 1e-15,
72 1e-20, 1e-25, 1e-50]
73
74 astralEv_to_file = { 10: 'e+1', 5: 'e+0,7', 1: 'e+0', 0.5: 'e-0,3', 0.1: 'e-1',
75 0.05: 'e-1,3', 0.01: 'e-2', 0.005: 'e-2,3', 0.001: 'e-3',
76 1e-4: 'e-4', 1e-5: 'e-5', 1e-10: 'e-10', 1e-15: 'e-15',
77 1e-20: 'e-20', 1e-25: 'e-25', 1e-50: 'e-50' }
78
79 astralEv_to_sql = { 10: 'e1', 5: 'e0_7', 1: 'e0', 0.5: 'e_0_3', 0.1: 'e_1',
80 0.05: 'e_1_3', 0.01: 'e_2', 0.005: 'e_2_3', 0.001: 'e_3',
81 1e-4: 'e_4', 1e-5: 'e_5', 1e-10: 'e_10', 1e-15: 'e_15',
82 1e-20: 'e_20', 1e-25: 'e_25', 1e-50: 'e_50' }
83
84 try:
85
86 _cmp = cmp
87 except NameError:
89 """Implementation of cmp(x,y) for Python 3 (PRIVATE).
90
91 Based on Python 3 docs which say if you really need the cmp()
92 functionality, you could use the expression (a > b) - (a < b)
93 as the equivalent for cmp(a, b)
94 """
95 return (a > b) - (a < b)
96
98 """Order SCOP concise classification strings (sccs).
99
100 a.4.5.1 < a.4.5.11 < b.1.1.1
101
102 A sccs (e.g. a.4.5.11) compactly represents a domain's classification.
103 The letter represents the class, and the numbers are the fold,
104 superfamily, and family, respectively.
105
106 """
107
108 s1 = sccs1.split(".")
109 s2 = sccs2.split(".")
110
111 if s1[0] != s2[0]: return _cmp(s1[0], s2[0])
112
113 s1 = list(map(int, s1[1:]))
114 s2 = list(map(int, s2[1:]))
115
116 return _cmp(s1,s2)
117
118
119 _domain_re = re.compile(r">?([\w_\.]*)\s+([\w\.]*)\s+\(([^)]*)\) (.*)")
120
121 -def parse_domain(str):
122 """Convert an ASTRAL header string into a Scop domain.
123
124 An ASTRAL (http://astral.stanford.edu/) header contains a concise
125 description of a SCOP domain. A very similar format is used when a
126 Domain object is converted into a string. The Domain returned by this
127 method contains most of the SCOP information, but it will not be located
128 within the SCOP hierarchy (i.e. The parent node will be None). The
129 description is composed of the SCOP protein and species descriptions.
130
131 A typical ASTRAL header looks like --
132 >d1tpt_1 a.46.2.1 (1-70) Thymidine phosphorylase {Escherichia coli}
133 """
134
135 m = _domain_re.match(str)
136 if (not m) : raise ValueError("Domain: "+ str)
137
138 dom = Domain()
139 dom.sid = m.group(1)
140 dom.sccs = m.group(2)
141 dom.residues = Residues(m.group(3))
142 if not dom.residues.pdbid:
143 dom.residues.pdbid= dom.sid[1:5]
144 dom.description = m.group(4).strip()
145
146 return dom
147
149 filename = "dir.%s.scop.txt_%s" % (filetype,version)
150 handle = open(os.path.join( scop_dir_path, filename))
151 return handle
152
153
155 """The entire SCOP hierarchy.
156
157 root -- The root node of the hierarchy
158 """
159 - def __init__(self, cla_handle=None, des_handle=None, hie_handle=None,
160 dir_path=None, db_handle=None, version=None):
161 """Build the SCOP hierarchy from the SCOP parsable files, or a sql backend.
162
163 If no file handles are given, then a Scop object with a single
164 empty root node is returned.
165
166 If a directory and version are given (with dir_path=.., version=...) or
167 file handles for each file, the whole scop tree will be built in memory.
168
169 If a MySQLdb database handle is given, the tree will be built as needed,
170 minimising construction times. To build the SQL database to the methods
171 write_xxx_sql to create the tables.
172
173 """
174 self._sidDict = {}
175 self._sunidDict = {}
176
177 if cla_handle==des_handle==hie_handle==dir_path==db_handle==None: return
178
179 if dir_path is None and db_handle is None:
180 if cla_handle == None or des_handle==None or hie_handle==None:
181 raise RuntimeError("Need CLA, DES and HIE files to build SCOP")
182
183 sunidDict = {}
184
185 self.db_handle = db_handle
186 try:
187
188 if db_handle:
189
190 pass
191
192 else:
193
194 if dir_path:
195 if not version:
196 raise RuntimeError("Need SCOP version to find parsable files in directory")
197 if cla_handle or des_handle or hie_handle:
198 raise RuntimeError("Cannot specify SCOP directory and specific files")
199
200 cla_handle = _open_scop_file( dir_path, version, 'cla')
201 des_handle = _open_scop_file( dir_path, version, 'des')
202 hie_handle = _open_scop_file( dir_path, version, 'hie')
203
204 root = Node()
205 domains = []
206 root.sunid=0
207 root.type='ro'
208 sunidDict[root.sunid] = root
209 self.root = root
210 root.description = 'SCOP Root'
211
212
213 records = Des.parse(des_handle)
214 for record in records:
215 if record.nodetype =='px':
216 n = Domain()
217 n.sid = record.name
218 domains.append(n)
219 else :
220 n = Node()
221 n.sunid = record.sunid
222 n.type = record.nodetype
223 n.sccs = record.sccs
224 n.description = record.description
225
226 sunidDict[n.sunid] = n
227
228
229 records = Hie.parse(hie_handle)
230 for record in records:
231 if record.sunid not in sunidDict:
232 print record.sunid
233
234 n = sunidDict[record.sunid]
235
236 if record.parent != '' :
237
238 if record.parent not in sunidDict:
239 raise ValueError("Incomplete data?")
240
241 n.parent = sunidDict[record.parent]
242
243 for c in record.children:
244 if c not in sunidDict:
245 raise ValueError("Incomplete data?")
246 n.children.append(sunidDict[c])
247
248
249
250 sidDict = {}
251 records = Cla.parse(cla_handle)
252 for record in records:
253 n = sunidDict[record.sunid]
254 assert n.sccs == record.sccs
255 assert n.sid == record.sid
256 n.residues = record.residues
257 sidDict[n.sid] = n
258
259
260 self._sunidDict = sunidDict
261 self._sidDict = sidDict
262 self._domains = tuple(domains)
263
264 finally:
265 if dir_path:
266
267 if cla_handle : cla_handle.close()
268 if des_handle : des_handle.close()
269 if hie_handle : hie_handle.close()
270
271
274
275
276 - def getDomainBySid(self, sid):
277 """Return a domain from its sid"""
278 if sid in self._sidDict:
279 return self._sidDict[sid]
280 if self.db_handle:
281 self.getDomainFromSQL(sid=sid)
282 if sid in self._sidDict:
283 return self._sidDict[sid]
284 else:
285 return None
286
287
289 """Return a node from its sunid"""
290 if sunid in self._sunidDict:
291 return self._sunidDict[sunid]
292 if self.db_handle:
293 self.getDomainFromSQL(sunid=sunid)
294 if sunid in self._sunidDict:
295 return self._sunidDict[sunid]
296 else:
297 return None
298
299
300 - def getDomains(self):
301 """Returns an ordered tuple of all SCOP Domains"""
302 if self.db_handle:
303 return self.getRoot().getDescendents('px')
304 else:
305 return self._domains
306
307
308
316
317
326
327
335
336
337 - def getDomainFromSQL(self, sunid=None, sid=None):
338 """Load a node from the SQL backend using sunid or sid"""
339 if sunid==sid==None: return None
340
341 cur = self.db_handle.cursor()
342
343 if sid:
344 cur.execute("SELECT sunid FROM cla WHERE sid=%s", sid)
345 res = cur.fetchone()
346 if res is None:
347 return None
348 sunid = res[0]
349
350 cur.execute("SELECT * FROM des WHERE sunid=%s", sunid)
351 data = cur.fetchone()
352
353 if data is not None:
354 n = None
355
356
357 if data[1] != "px":
358 n = Node(scop=self)
359
360 cur.execute("SELECT child FROM hie WHERE parent=%s", sunid)
361 children = []
362 for c in cur.fetchall():
363 children.append(c[0])
364 n.children = children
365
366
367 else:
368 n = Domain(scop=self)
369 cur.execute("select sid, residues, pdbid from cla where sunid=%s",
370 sunid)
371
372 [n.sid,n.residues,pdbid] = cur.fetchone()
373 n.residues = Residues(n.residues)
374 n.residues.pdbid=pdbid
375 self._sidDict[n.sid] = n
376
377 [n.sunid,n.type,n.sccs,n.description] = data
378
379 if data[1] != 'ro':
380 cur.execute("SELECT parent FROM hie WHERE child=%s", sunid)
381 n.parent = cur.fetchone()[0]
382
383 n.sunid = int(n.sunid)
384
385 self._sunidDict[n.sunid] = n
386
387
399
400
402 """Get descendents of a node using the database backend. This avoids
403 repeated iteration of SQL calls and is therefore much quicker than
404 repeatedly calling node.getChildren().
405 """
406 if nodeCodeOrder.index(type) <= nodeCodeOrder.index(node.type): return []
407
408 des_list = []
409
410
411
412 if node.type == 'ro':
413 for c in node.getChildren():
414 for d in self.getDescendentsFromSQL(c,type):
415 des_list.append(d)
416 return des_list
417
418 cur = self.db_handle.cursor()
419
420
421 if type != 'px':
422 cur.execute("SELECT DISTINCT des.sunid,des.type,des.sccs,description FROM \
423 cla,des WHERE cla."+node.type+"=%s AND cla."+type+"=des.sunid", (node.sunid))
424 data = cur.fetchall()
425 for d in data:
426 if int(d[0]) not in self._sunidDict:
427 n = Node(scop=self)
428 [n.sunid,n.type,n.sccs,n.description] = d
429 n.sunid=int(n.sunid)
430 self._sunidDict[n.sunid] = n
431
432 cur.execute("SELECT parent FROM hie WHERE child=%s", n.sunid)
433 n.parent = cur.fetchone()[0]
434
435 cur.execute("SELECT child FROM hie WHERE parent=%s", n.sunid)
436 children = []
437 for c in cur.fetchall():
438 children.append(c[0])
439 n.children = children
440
441
442 des_list.append( self._sunidDict[int(d[0])] )
443
444 else:
445 cur.execute("SELECT cla.sunid,sid,pdbid,residues,cla.sccs,type,description,sp\
446 FROM cla,des where cla.sunid=des.sunid and cla."+node.type+"=%s",
447 node.sunid)
448
449 data = cur.fetchall()
450 for d in data:
451 if int(d[0]) not in self._sunidDict:
452 n = Domain(scop=self)
453
454
455 [n.sunid,n.sid, pdbid,n.residues,n.sccs,n.type,n.description,
456 n.parent] = d[0:8]
457 n.residues = Residues(n.residues)
458 n.residues.pdbid = pdbid
459 n.sunid = int(n.sunid)
460 self._sunidDict[n.sunid] = n
461 self._sidDict[n.sid] = n
462
463
464 des_list.append( self._sunidDict[int(d[0])] )
465
466 return des_list
467
468
469
470
472 """Write HIE data to SQL database"""
473 cur = handle.cursor()
474
475 cur.execute("DROP TABLE IF EXISTS hie")
476 cur.execute("CREATE TABLE hie (parent INT, child INT, PRIMARY KEY (child),\
477 INDEX (parent) )")
478
479 for p in self._sunidDict.itervalues():
480 for c in p.children:
481 cur.execute("INSERT INTO hie VALUES (%s,%s)" % (p.sunid, c.sunid))
482
483
485 """Write CLA data to SQL database"""
486 cur = handle.cursor()
487
488 cur.execute("DROP TABLE IF EXISTS cla")
489 cur.execute("CREATE TABLE cla (sunid INT, sid CHAR(8), pdbid CHAR(4),\
490 residues VARCHAR(50), sccs CHAR(10), cl INT, cf INT, sf INT, fa INT,\
491 dm INT, sp INT, px INT, PRIMARY KEY (sunid), INDEX (SID) )")
492
493 for n in self._sidDict.itervalues():
494 c = n.toClaRecord()
495 cur.execute( "INSERT INTO cla VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
496 (n.sunid, n.sid, c.residues.pdbid, c.residues, n.sccs,
497 n.getAscendent('cl').sunid, n.getAscendent('cf').sunid,
498 n.getAscendent('sf').sunid, n.getAscendent('fa').sunid,
499 n.getAscendent('dm').sunid, n.getAscendent('sp').sunid,
500 n.sunid ))
501
502
504 """Write DES data to SQL database"""
505 cur = handle.cursor()
506
507 cur.execute("DROP TABLE IF EXISTS des")
508 cur.execute("CREATE TABLE des (sunid INT, type CHAR(2), sccs CHAR(10),\
509 description VARCHAR(255),\
510 PRIMARY KEY (sunid) )")
511
512 for n in self._sunidDict.itervalues():
513 cur.execute( "INSERT INTO des VALUES (%s,%s,%s,%s)",
514 ( n.sunid, n.type, n.sccs, n.description ) )
515
516
517
519 """ A node in the Scop hierarchy
520
521 sunid -- SCOP unique identifiers. e.g. '14986'
522
523 parent -- The parent node
524
525 children -- A list of child nodes
526
527 sccs -- SCOP concise classification string. e.g. 'a.1.1.2'
528
529 type -- A 2 letter node type code. e.g. 'px' for domains
530
531 description --
532
533 """
535 """Create a Node in the scop hierarchy. If a Scop instance is provided to the
536 constructor, this will be used to lookup related references using the SQL
537 methods. If no instance is provided, it is assumed the whole tree exists
538 and is connected."""
539 self.sunid=''
540 self.parent = None
541 self.children=[]
542 self.sccs = ''
543 self.type =''
544 self.description =''
545 self.scop=scop
546
555
557 """Return an Hie.Record"""
558 rec = Hie.Record()
559 rec.sunid = str(self.sunid)
560 if self.getParent() :
561 rec.parent = str(self.getParent().sunid)
562 else:
563 rec.parent = '-'
564 for c in self.getChildren():
565 rec.children.append(str(c.sunid))
566 return rec
567
569 """Return a Des.Record"""
570 rec = Des.Record()
571 rec.sunid = str(self.sunid)
572 rec.nodetype = self.type
573 rec.sccs = self.sccs
574 rec.description = self.description
575 return rec
576
578 """Return a list of children of this Node"""
579 if self.scop is None:
580 return self.children
581 else:
582 return map ( self.scop.getNodeBySunid, self.children )
583
585 """Return the parent of this Node"""
586 if self.scop is None:
587 return self.parent
588 else:
589 return self.scop.getNodeBySunid( self.parent )
590
592 """ Return a list of all decendent nodes of the given type. Node type can a
593 two letter code or longer description. e.g. 'fa' or 'family'
594 """
595 if node_type in _nodetype_to_code:
596 node_type = _nodetype_to_code[node_type]
597
598 nodes = [self]
599 if self.scop:
600 return self.scop.getDescendentsFromSQL(self,node_type)
601 while nodes[0].type != node_type:
602 if nodes[0].type == 'px' : return []
603 child_list = []
604 for n in nodes:
605 for child in n.getChildren():
606 child_list.append( child )
607 nodes = child_list
608
609 return nodes
610
611
613 """ Return the ancenstor node of the given type, or None.Node type can a
614 two letter code or longer description. e.g. 'fa' or 'family'"""
615 if node_type in _nodetype_to_code:
616 node_type = _nodetype_to_code[node_type]
617
618 if self.scop:
619 return self.scop.getAscendentFromSQL(self,node_type)
620 else:
621 n = self
622 if n.type == node_type: return None
623 while n.type != node_type:
624 if n.type == 'ro': return None
625 n = n.getParent()
626
627 return n
628
629
630
632 """ A SCOP domain. A leaf node in the Scop hierarchy.
633
634 sid -- The SCOP domain identifier. e.g. 'd5hbib_'
635
636 residues -- A Residue object. It defines the collection
637 of PDB atoms that make up this domain.
638 """
639 - def __init__(self,scop=None):
640 Node.__init__(self,scop=scop)
641 self.sid = ''
642 self.residues = None
643
645 s = []
646 s.append(self.sid)
647 s.append(self.sccs)
648 s.append("("+str(self.residues)+")")
649
650 if not self.getParent():
651 s.append(self.description)
652 else:
653 sp = self.getParent()
654 dm = sp.getParent()
655 s.append(dm.description)
656 s.append("{"+sp.description+"}")
657
658 return " ".join(s)
659
660 - def toDesRecord(self):
661 """Return a Des.Record"""
662 rec = Node.toDesRecord(self)
663 rec.name = self.sid
664 return rec
665
666 - def toClaRecord(self):
667 """Return a Cla.Record"""
668 rec = Cla.Record()
669 rec.sid = self.sid
670 rec.residues = self.residues
671 rec.sccs = self.sccs
672 rec.sunid = self.sunid
673
674 n = self
675 while n.sunid != 0:
676 rec.hierarchy.append( (n.type, str(n.sunid)) )
677 n = n.getParent()
678
679 rec.hierarchy.reverse()
680
681 return rec
682
684 """Abstraction of the ASTRAL database, which has sequences for all the SCOP domains,
685 as well as clusterings by percent id or evalue.
686 """
687
688 - def __init__( self, dir_path=None, version=None, scop=None,
689 astral_file=None, db_handle=None):
690 """
691 Initialise the astral database.
692
693 You must provide either a directory of SCOP files:
694
695 dir_path - string, the path to location of the scopseq-x.xx directory
696 (not the directory itself), and
697 version -a version number.
698
699 or, a FASTA file:
700
701 astral_file - string, a path to a fasta file (which will be loaded in memory)
702
703 or, a MYSQL database:
704
705 db_handle - a database handle for a MYSQL database containing a table
706 'astral' with the astral data in it. This can be created
707 using writeToSQL.
708 """
709
710 if astral_file==dir_path==db_handle==None:
711 raise RuntimeError("Need either file handle, or (dir_path + "\
712 + "version) or database handle to construct Astral")
713 if not scop:
714 raise RuntimeError("Must provide a Scop instance to construct")
715
716 self.scop = scop
717 self.db_handle = db_handle
718
719
720 if not astral_file and not db_handle:
721 if dir_path == None or version == None:
722 raise RuntimeError("must provide dir_path and version")
723
724 self.version = version
725 self.path = os.path.join( dir_path, "scopseq-%s" % version)
726 astral_file = "astral-scopdom-seqres-all-%s.fa" % self.version
727 astral_file = os.path.join (self.path, astral_file)
728
729 if astral_file:
730
731 self.fasta_dict = SeqIO.to_dict(SeqIO.parse(open(astral_file), "fasta"))
732
733 self.astral_file = astral_file
734 self.EvDatasets = {}
735 self.EvDatahash = {}
736 self.IdDatasets = {}
737 self.IdDatahash = {}
738
739
741 """get domains clustered by evalue"""
742 if id not in self.EvDatasets:
743 if self.db_handle:
744 self.EvDatasets[id] = self.getAstralDomainsFromSQL(astralEv_to_sql[id])
745
746 else:
747 if not self.path:
748 raise RuntimeError("No scopseq directory specified")
749
750 file_prefix = "astral-scopdom-seqres-sel-gs"
751 filename = "%s-e100m-%s-%s.id" % (file_prefix, astralEv_to_file[id] ,
752 self.version)
753 filename = os.path.join(self.path,filename)
754 self.EvDatasets[id] = self.getAstralDomainsFromFile(filename)
755 return self.EvDatasets[id]
756
757
759 """get domains clustered by percent id"""
760 if id not in self.IdDatasets:
761 if self.db_handle:
762 self.IdDatasets[id] = self.getAstralDomainsFromSQL("id"+str(id))
763
764 else:
765 if not self.path:
766 raise RuntimeError("No scopseq directory specified")
767
768 file_prefix = "astral-scopdom-seqres-sel-gs"
769 filename = "%s-bib-%s-%s.id" % (file_prefix, id, self.version)
770 filename = os.path.join(self.path,filename)
771 self.IdDatasets[id] = self.getAstralDomainsFromFile(filename)
772 return self.IdDatasets[id]
773
774
775 - def getAstralDomainsFromFile(self,filename=None,file_handle=None):
776 """Get the scop domains from a file containing a list of sids"""
777 if file_handle == filename == None:
778 raise RuntimeError("You must provide a filename or handle")
779 if not file_handle:
780 file_handle = open(filename)
781 doms = []
782 while 1:
783 line = file_handle.readline()
784 if not line:
785 break
786 line = line.rstrip()
787 doms.append(line)
788 if filename:
789 file_handle.close()
790
791 doms = filter( lambda a: a[0]=='d', doms )
792 doms = map( self.scop.getDomainBySid, doms )
793 return doms
794
795 - def getAstralDomainsFromSQL(self, column):
796 """Load a set of astral domains from a column in the astral table of a MYSQL
797 database (which can be created with writeToSQL(...)"""
798 cur = self.db_handle.cursor()
799 cur.execute("SELECT sid FROM astral WHERE "+column+"=1")
800 data = cur.fetchall()
801 data = map( lambda x: self.scop.getDomainBySid(x[0]), data)
802
803 return data
804
805
807 """get the seq record of a given domain from its sid"""
808 if self.db_handle is None:
809 return self.fasta_dict[domain].seq
810
811 else:
812 cur = self.db_handle.cursor()
813 cur.execute("SELECT seq FROM astral WHERE sid=%s", domain)
814 return Seq(cur.fetchone()[0])
815
819
820
821 - def hashedDomainsById(self,id):
822 """Get domains clustered by sequence identity in a dict"""
823 if id not in self.IdDatahash:
824 self.IdDatahash[id] = {}
825 for d in self.domainsClusteredById(id):
826 self.IdDatahash[id][d] = 1
827 return self.IdDatahash[id]
828
829 - def hashedDomainsByEv(self,id):
830 """Get domains clustered by evalue in a dict"""
831 if id not in self.EvDatahash:
832 self.EvDatahash[id] = {}
833 for d in self.domainsClusteredByEv(id):
834 self.EvDatahash[id][d] = 1
835 return self.EvDatahash[id]
836
837
838 - def isDomainInId(self,dom,id):
839 """Returns true if the domain is in the astral clusters for percent ID"""
840 return dom in self.hashedDomainsById(id)
841
842 - def isDomainInEv(self,dom,id):
843 """Returns true if the domain is in the ASTRAL clusters for evalues"""
844 return dom in self.hashedDomainsByEv(id)
845
846
848 """Write the ASTRAL database to a MYSQL database"""
849 cur = db_handle.cursor()
850
851 cur.execute("DROP TABLE IF EXISTS astral")
852 cur.execute("CREATE TABLE astral (sid CHAR(8), seq TEXT, PRIMARY KEY (sid))")
853
854 for dom in self.fasta_dict:
855 cur.execute("INSERT INTO astral (sid,seq) values (%s,%s)",
856 (dom, self.fasta_dict[dom].seq.data))
857
858 for i in astralBibIds:
859 cur.execute("ALTER TABLE astral ADD (id"+str(i)+" TINYINT)")
860
861 for d in self.domainsClusteredById(i):
862 cur.execute("UPDATE astral SET id"+str(i)+"=1 WHERE sid=%s",
863 d.sid)
864
865 for ev in astralEvs:
866 cur.execute("ALTER TABLE astral ADD ("+astralEv_to_sql[ev]+" TINYINT)")
867
868 for d in self.domainsClusteredByEv(ev):
869
870 cur.execute("UPDATE astral SET "+astralEv_to_sql[ev]+"=1 WHERE sid=%s",
871 d.sid)
872
873 -def search(pdb=None, key=None, sid=None, disp=None, dir=None, loc=None,
874 cgi='http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi', **keywds):
875 """search(pdb=None, key=None, sid=None, disp=None, dir=None, loc=None,
876 cgi='http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi', **keywds)
877
878 Access search.cgi and return a handle to the results. See the
879 online help file for an explanation of the parameters:
880 http://scop.mrc-lmb.cam.ac.uk/scop/help.html
881
882 Raises an IOError if there's a network error.
883
884 """
885 params = {'pdb' : pdb, 'key' : key, 'sid' : sid, 'disp' : disp,
886 'dir' : dir, 'loc' : loc}
887 variables = {}
888 for k, v in params.iteritmes():
889 if v is not None:
890 variables[k] = v
891 variables.update(keywds)
892 return _open(cgi, variables)
893
894 -def _open(cgi, params={}, get=1):
895 """_open(cgi, params={}, get=1) -> UndoHandle
896
897 Open a handle to SCOP. cgi is the URL for the cgi script to access.
898 params is a dictionary with the options to pass to it. get is a boolean
899 that describes whether a GET should be used. Does some
900 simple error checking, and will raise an IOError if it encounters one.
901
902 """
903 import urllib
904 from Bio import File
905
906 options = urllib.urlencode(params)
907 if get:
908 fullcgi = cgi
909 if options:
910 fullcgi = "%s?%s" % (cgi, options)
911 handle = urllib.urlopen(fullcgi)
912 else:
913 handle = urllib.urlopen(cgi, options)
914
915
916 uhandle = File.UndoHandle(handle)
917
918 return uhandle
919