Package BioSQL :: Module BioSeqDatabase
[hide private]
[frames] | no frames]

Source Code for Module BioSQL.BioSeqDatabase

  1  # Copyright 2002 by Andrew Dalke.  All rights reserved. 
  2  # Revisions 2007-2009 copyright by Peter Cock.  All rights reserved. 
  3  # Revisions 2009 copyright by Cymon J. Cox.  All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7  # 
  8  # Note that BioSQL (including the database schema and scripts) is 
  9  # available and licensed separately.  Please consult www.biosql.org 
 10  """Connect with a BioSQL database and load Biopython like objects from it. 
 11   
 12  This provides interfaces for loading biological objects from a relational 
 13  database, and is compatible with the BioSQL standards. 
 14  """ 
 15  import BioSeq 
 16  import Loader 
 17  import DBUtils 
 18   
 19  _POSTGRES_RULES_PRESENT = False # Hack for BioSQL Bug 2839 
 20   
21 -def open_database(driver = "MySQLdb", **kwargs):
22 """Main interface for loading a existing BioSQL-style database. 23 24 This function is the easiest way to retrieve a connection to a 25 database, doing something like: 26 27 >>> from BioSeq import BioSeqDatabase 28 >>> server = BioSeqDatabase.open_database(user="root", db="minidb") 29 30 the various options are: 31 driver -> The name of the database driver to use for connecting. The 32 driver should implement the python DB API. By default, the MySQLdb 33 driver is used. 34 user -> the username to connect to the database with. 35 password, passwd -> the password to connect with 36 host -> the hostname of the database 37 database or db -> the name of the database 38 """ 39 if driver == "psycopg": 40 raise ValueError("Using BioSQL with psycopg (version one) is no " 41 "longer supported. Use psycopg2 instead.") 42 43 module = __import__(driver) 44 connect = getattr(module, "connect") 45 46 # Different drivers use different keywords... 47 kw = kwargs.copy() 48 if driver == "MySQLdb": 49 if "database" in kw: 50 kw["db"] = kw["database"] 51 del kw["database"] 52 if "password" in kw: 53 kw["passwd"] = kw["password"] 54 del kw["password"] 55 else: 56 # DB-API recommendations 57 if "db" in kw: 58 kw["database"] = kw["db"] 59 del kw["db"] 60 if "passwd" in kw: 61 kw["password"] = kw["passwd"] 62 del kw["passwd"] 63 if driver in ["psycopg2", "pgdb"] and not kw.get("database"): 64 kw["database"] = "template1" 65 # SQLite connect takes the database name as input 66 if driver in ["sqlite3"]: 67 conn = connect(kw["database"]) 68 else: 69 try: 70 conn = connect(**kw) 71 except module.InterfaceError: 72 # Ok, so let's try building a DSN 73 # (older releases of psycopg need this) 74 if "database" in kw: 75 kw["dbname"] = kw["database"] 76 del kw["database"] 77 elif "db" in kw: 78 kw["dbname"] = kw["db"] 79 del kw["db"] 80 dsn = ' '.join(['='.join(i) for i in kw.items()]) 81 conn = connect(dsn) 82 83 server = DBServer(conn, module) 84 85 # TODO - Remove the following once BioSQL Bug 2839 is fixed. 86 # Test for RULES in PostgreSQL schema, see also Bug 2833. 87 if driver in ["psycopg2", "pgdb"]: 88 sql = "SELECT ev_class FROM pg_rewrite WHERE " + \ 89 "rulename='rule_bioentry_i1' OR " + \ 90 "rulename='rule_bioentry_i2';" 91 if server.adaptor.execute_and_fetchall(sql): 92 import warnings 93 warnings.warn("Your BioSQL PostgreSQL schema includes some " 94 "rules currently required for bioperl-db but " 95 "which may cause problems loading data using " 96 "Biopython (see BioSQL Bug 2839). If you do not " 97 "use BioPerl, please remove these rules. " 98 "Biopython should cope with the rules present, " 99 "but with a performance penalty when loading " 100 "new records.") 101 global _POSTGRES_RULES_PRESENT 102 _POSTGRES_RULES_PRESENT = True 103 104 return server
105
106 -class DBServer:
107 """Represents a BioSQL database continaing namespaces (sub-databases). 108 109 This acts like a Python dictionary, giving access to each namespace 110 (defined by a row in the biodatabase table) as a BioSeqDatabase object. 111 """
112 - def __init__(self, conn, module, module_name=None):
113 self.module = module 114 if module_name is None: 115 module_name = module.__name__ 116 self.adaptor = Adaptor(conn, DBUtils.get_dbutils(module_name)) 117 self.module_name = module_name
118
119 - def __repr__(self):
120 return self.__class__.__name__ + "(%r)" % self.adaptor.conn
121
122 - def __getitem__(self, name):
123 return BioSeqDatabase(self.adaptor, name)
124
125 - def __len__(self):
126 """Number of namespaces (sub-databases) in this database.""" 127 sql = "SELECT COUNT(name) FROM biodatabase;" 128 return int(self.adaptor.execute_and_fetch_col0(sql)[0])
129
130 - def __contains__(self, value):
131 """Check if a namespace (sub-database) in this database.""" 132 sql = "SELECT COUNT(name) FROM biodatabase WHERE name=%s;" 133 return bool(self.adaptor.execute_and_fetch_col0(sql, (value,))[0])
134
135 - def __iter__(self):
136 """Iterate over namespaces (sub-databases) in the database.""" 137 #TODO - Iterate over the cursor, much more efficient 138 return iter(self.adaptor.list_biodatabase_names())
139 140 if hasattr(dict, "iteritems"): 141 #Python 2, use iteritems etc
142 - def keys(self):
143 """List of namespaces (sub-databases) in the database.""" 144 return self.adaptor.list_biodatabase_names()
145
146 - def values(self):
147 """List of BioSeqDatabase objects in the database.""" 148 return [self[key] for key in self.keys()]
149
150 - def items(self):
151 """List of (namespace, BioSeqDatabase) for entries in the database.""" 152 return [(key, self[key]) for key in self.keys()]
153
154 - def iterkeys(self):
155 """Iterate over namespaces (sub-databases) in the database.""" 156 return iter(self)
157
158 - def itervalues(self):
159 """Iterate over BioSeqDatabase objects in the database.""" 160 for key in self: 161 yield self[key]
162
163 - def iteritems(self):
164 """Iterate over (namespace, BioSeqDatabase) in the database.""" 165 for key in self: 166 yield key, self[key]
167 else: 168 #Python 3, items etc are all iterators
169 - def keys(self):
170 """Iterate over namespaces (sub-databases) in the database.""" 171 return iter(self)
172
173 - def values(self):
174 """Iterate over BioSeqDatabase objects in the database.""" 175 for key in self: 176 yield self[key]
177
178 - def items(self):
179 """Iterate over (namespace, BioSeqDatabase) in the database.""" 180 for key in self: 181 yield key, self[key]
182
183 - def __delitem__(self, name):
184 """Remove a namespace and all its entries.""" 185 if name not in self: 186 raise KeyError(name) 187 self.remove_database(name)
188
189 - def remove_database(self, db_name):
190 """Remove a namespace and all its entries (OBSOLETE). 191 192 Try to remove all references to items in a database. 193 194 server.remove_database(name) 195 196 In keeping with the dictionary interface, you can now do this: 197 198 del server[name] 199 """ 200 import warnings 201 warnings.warn("This method is obsolete. In keeping with the dictionary interface, you can now use 'del server[name]' instead", PendingDeprecationWarning) 202 db_id = self.adaptor.fetch_dbid_by_dbname(db_name) 203 remover = Loader.DatabaseRemover(self.adaptor, db_id) 204 remover.remove()
205
206 - def new_database(self, db_name, authority=None, description=None):
207 """Add a new database to the server and return it. 208 """ 209 # make the database 210 sql = r"INSERT INTO biodatabase (name, authority, description)" \ 211 r" VALUES (%s, %s, %s)" 212 self.adaptor.execute(sql, (db_name,authority, description)) 213 return BioSeqDatabase(self.adaptor, db_name)
214
215 - def load_database_sql(self, sql_file):
216 """Load a database schema into the given database. 217 218 This is used to create tables, etc when a database is first created. 219 sql_file should specify the complete path to a file containing 220 SQL entries for building the tables. 221 """ 222 # Not sophisticated enough for PG schema. Is it needed by MySQL? 223 # Looks like we need this more complicated way for both. Leaving it 224 # the default and removing the simple-minded approach. 225 226 # read the file with all comment lines removed 227 sql_handle = open(sql_file, "rU") 228 sql = r"" 229 for line in sql_handle: 230 if line.find("--") == 0: # don't include comment lines 231 pass 232 elif line.find("#") == 0: # ditto for MySQL comments 233 pass 234 elif line.strip(): # only include non-blank lines 235 sql += line.strip() 236 sql += ' ' 237 238 # two ways to load the SQL 239 # 1. PostgreSQL can load it all at once and actually needs to 240 # due to FUNCTION defines at the end of the SQL which mess up 241 # the splitting by semicolons 242 if self.module_name in ["psycopg2", "pgdb"]: 243 self.adaptor.cursor.execute(sql) 244 # 2. MySQL needs the database loading split up into single lines of 245 # SQL executed one at a time 246 elif self.module_name in ["MySQLdb", "sqlite3"]: 247 sql_parts = sql.split(";") # one line per sql command 248 for sql_line in sql_parts[:-1]: # don't use the last item, it's blank 249 self.adaptor.cursor.execute(sql_line) 250 else: 251 raise ValueError("Module %s not supported by the loader." % 252 (self.module_name))
253
254 - def commit(self):
255 """Commits the current transaction to the database.""" 256 return self.adaptor.commit()
257
258 - def rollback(self):
259 """Rolls backs the current transaction.""" 260 return self.adaptor.rollback()
261
262 - def close(self):
263 """Close the connection. No further activity possible.""" 264 return self.adaptor.close()
265
266 -class Adaptor:
267 - def __init__(self, conn, dbutils):
268 self.conn = conn 269 self.cursor = conn.cursor() 270 self.dbutils = dbutils
271
272 - def last_id(self, table):
273 return self.dbutils.last_id(self.cursor, table)
274
275 - def autocommit(self, y=True):
276 """Set the autocommit mode. True values enable; False value disable.""" 277 return self.dbutils.autocommit(self.conn, y)
278
279 - def commit(self):
280 """Commits the current transaction.""" 281 return self.conn.commit()
282
283 - def rollback(self):
284 """Rolls backs the current transaction.""" 285 return self.conn.rollback()
286
287 - def close(self):
288 """Close the connection. No further activity possible.""" 289 return self.conn.close()
290
291 - def fetch_dbid_by_dbname(self, dbname):
292 self.execute( 293 r"select biodatabase_id from biodatabase where name = %s", 294 (dbname,)) 295 rv = self.cursor.fetchall() 296 if not rv: 297 raise KeyError("Cannot find biodatabase with name %r" % dbname) 298 # Cannot happen (UK) 299 ## assert len(rv) == 1, "More than one biodatabase with name %r" % dbname 300 return rv[0][0]
301
302 - def fetch_seqid_by_display_id(self, dbid, name):
303 sql = r"select bioentry_id from bioentry where name = %s" 304 fields = [name] 305 if dbid: 306 sql += " and biodatabase_id = %s" 307 fields.append(dbid) 308 self.execute(sql, fields) 309 rv = self.cursor.fetchall() 310 if not rv: 311 raise IndexError("Cannot find display id %r" % name) 312 if len(rv) > 1: 313 raise IndexError("More than one entry with display id %r" % name) 314 return rv[0][0]
315
316 - def fetch_seqid_by_accession(self, dbid, name):
317 sql = r"select bioentry_id from bioentry where accession = %s" 318 fields = [name] 319 if dbid: 320 sql += " and biodatabase_id = %s" 321 fields.append(dbid) 322 self.execute(sql, fields) 323 rv = self.cursor.fetchall() 324 if not rv: 325 raise IndexError("Cannot find accession %r" % name) 326 if len(rv) > 1: 327 raise IndexError("More than one entry with accession %r" % name) 328 return rv[0][0]
329
330 - def fetch_seqids_by_accession(self, dbid, name):
331 sql = r"select bioentry_id from bioentry where accession = %s" 332 fields = [name] 333 if dbid: 334 sql += " and biodatabase_id = %s" 335 fields.append(dbid) 336 return self.execute_and_fetch_col0(sql, fields)
337
338 - def fetch_seqid_by_version(self, dbid, name):
339 acc_version = name.split(".") 340 if len(acc_version) > 2: 341 raise IndexError("Bad version %r" % name) 342 acc = acc_version[0] 343 if len(acc_version) == 2: 344 version = acc_version[1] 345 else: 346 version = "0" 347 sql = r"SELECT bioentry_id FROM bioentry WHERE accession = %s" \ 348 r" AND version = %s" 349 fields = [acc, version] 350 if dbid: 351 sql += " and biodatabase_id = %s" 352 fields.append(dbid) 353 self.execute(sql, fields) 354 rv = self.cursor.fetchall() 355 if not rv: 356 raise IndexError("Cannot find version %r" % name) 357 if len(rv) > 1: 358 raise IndexError("More than one entry with version %r" % name) 359 return rv[0][0]
360
361 - def fetch_seqid_by_identifier(self, dbid, identifier):
362 # YB: was fetch_seqid_by_seqid 363 sql = "SELECT bioentry_id FROM bioentry WHERE identifier = %s" 364 fields = [identifier] 365 if dbid: 366 sql += " and biodatabase_id = %s" 367 fields.append(dbid) 368 self.execute(sql, fields) 369 rv = self.cursor.fetchall() 370 if not rv: 371 raise IndexError("Cannot find display id %r" % identifier) 372 return rv[0][0]
373
374 - def list_biodatabase_names(self):
375 return self.execute_and_fetch_col0( 376 "SELECT name FROM biodatabase")
377
378 - def list_bioentry_ids(self, dbid):
379 return self.execute_and_fetch_col0( 380 "SELECT bioentry_id FROM bioentry WHERE biodatabase_id = %s", 381 (dbid,))
382
383 - def list_bioentry_display_ids(self, dbid):
384 return self.execute_and_fetch_col0( 385 "SELECT name FROM bioentry WHERE biodatabase_id = %s", 386 (dbid,))
387
388 - def list_any_ids(self, sql, args):
389 """Return ids given a SQL statement to select for them. 390 391 This assumes that the given SQL does a SELECT statement that 392 returns a list of items. This parses them out of the 2D list 393 they come as and just returns them in a list. 394 """ 395 return self.execute_and_fetch_col0(sql, args)
396
397 - def execute_one(self, sql, args=None):
398 self.execute(sql, args or ()) 399 rv = self.cursor.fetchall() 400 assert len(rv) == 1, "Expected 1 response, got %d" % len(rv) 401 return rv[0]
402
403 - def execute(self, sql, args=None):
404 """Just execute an sql command. 405 """ 406 self.dbutils.execute(self.cursor, sql, args)
407
408 - def get_subseq_as_string(self, seqid, start, end):
409 length = end - start 410 # XXX Check this on MySQL and PostgreSQL. substr should be general, 411 # does it need dbutils? 412 #return self.execute_one( 413 # """select SUBSTRING(seq FROM %s FOR %s) 414 # from biosequence where bioentry_id = %s""", 415 # (start+1, length, seqid))[0] 416 # 417 # Convert to a string on returning for databases that give back 418 # unicode. Shouldn't need unicode for sequences so this seems safe. 419 return str(self.execute_one( 420 """select SUBSTR(seq, %s, %s) 421 from biosequence where bioentry_id = %s""", 422 (start+1, length, seqid))[0])
423
424 - def execute_and_fetch_col0(self, sql, args=None):
425 self.execute(sql, args or ()) 426 return [field[0] for field in self.cursor.fetchall()]
427
428 - def execute_and_fetchall(self, sql, args=None):
429 self.execute(sql, args or ()) 430 return self.cursor.fetchall()
431 432 _allowed_lookups = { 433 # Lookup name / function name to get id, function to list all ids 434 'primary_id': "fetch_seqid_by_identifier", 435 'gi': "fetch_seqid_by_identifier", 436 'display_id': "fetch_seqid_by_display_id", 437 'name': "fetch_seqid_by_display_id", 438 'accession': "fetch_seqid_by_accession", 439 'version': "fetch_seqid_by_version", 440 } 441
442 -class BioSeqDatabase:
443 """Represents a namespace (sub-database) within the BioSQL database. 444 445 i.e. One row in the biodatabase table, and all all rows in the bioentry 446 table associated with it. 447 """
448 - def __init__(self, adaptor, name):
449 self.adaptor = adaptor 450 self.name = name 451 self.dbid = self.adaptor.fetch_dbid_by_dbname(name)
452
453 - def __repr__(self):
454 return "BioSeqDatabase(%r, %r)" % (self.adaptor, self.name)
455
456 - def get_Seq_by_id(self, name):
457 """Gets a DBSeqRecord object by its name 458 459 Example: seq_rec = db.get_Seq_by_id('ROA1_HUMAN') 460 461 The name of this method is misleading since it returns a DBSeqRecord 462 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 463 """ 464 seqid = self.adaptor.fetch_seqid_by_display_id(self.dbid, name) 465 return BioSeq.DBSeqRecord(self.adaptor, seqid)
466
467 - def get_Seq_by_acc(self, name):
468 """Gets a DBSeqRecord object by accession number 469 470 Example: seq_rec = db.get_Seq_by_acc('X77802') 471 472 The name of this method is misleading since it returns a DBSeqRecord 473 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 474 """ 475 seqid = self.adaptor.fetch_seqid_by_accession(self.dbid, name) 476 return BioSeq.DBSeqRecord(self.adaptor, seqid)
477
478 - def get_Seq_by_ver(self, name):
479 """Gets a DBSeqRecord object by version number 480 481 Example: seq_rec = db.get_Seq_by_ver('X77802.1') 482 483 The name of this method is misleading since it returns a DBSeqRecord 484 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 485 """ 486 seqid = self.adaptor.fetch_seqid_by_version(self.dbid, name) 487 return BioSeq.DBSeqRecord(self.adaptor, seqid)
488
489 - def get_Seqs_by_acc(self, name):
490 """Gets a list of DBSeqRecord objects by accession number 491 492 Example: seq_recs = db.get_Seq_by_acc('X77802') 493 494 The name of this method is misleading since it returns a list of 495 DBSeqRecord objects rather than a list of DBSeq ojbects, and presumably 496 was to mirror BioPerl. 497 """ 498 seqids = self.adaptor.fetch_seqids_by_accession(self.dbid, name) 499 return [BioSeq.DBSeqRecord(self.adaptor, seqid) for seqid in seqids]
500
501 - def get_all_primary_ids(self):
502 """All the primary_ids of the sequences in the database (OBSOLETE). 503 504 These maybe ids (display style) or accession numbers or 505 something else completely different - they *are not* 506 meaningful outside of this database implementation. 507 508 Please use .keys() instead of .get_all_primary_ids() 509 """ 510 import warnings 511 warnings.warn("Use bio_seq_database.keys() instead of " 512 "bio_seq_database.get_all_primary_ids()", 513 PendingDeprecationWarning) 514 return self.keys()
515
516 - def __getitem__(self, key):
517 return BioSeq.DBSeqRecord(self.adaptor, key)
518
519 - def __delitem__(self, key):
520 """Remove an entry and all its annotation.""" 521 if key not in self: 522 raise KeyError(key) 523 #Assuming this will automatically cascade to the other tables... 524 sql = "DELETE FROM bioentry " + \ 525 "WHERE biodatabase_id=%s AND bioentry_id=%s;" 526 self.adaptor.execute(sql, (self.dbid,key))
527
528 - def __len__(self):
529 """Number of records in this namespace (sub database).""" 530 sql = "SELECT COUNT(bioentry_id) FROM bioentry " + \ 531 "WHERE biodatabase_id=%s;" 532 return int(self.adaptor.execute_and_fetch_col0(sql, (self.dbid,))[0])
533
534 - def __contains__(self, value):
535 """Check if a primary (internal) id is this namespace (sub database).""" 536 sql = "SELECT COUNT(bioentry_id) FROM bioentry " + \ 537 "WHERE biodatabase_id=%s AND bioentry_id=%s;" 538 #The bioentry_id field is an integer in the schema. 539 #PostgreSQL will throw an error if we use a non integer in the query. 540 try: 541 bioentry_id = int(value) 542 except ValueError: 543 return False 544 return bool(self.adaptor.execute_and_fetch_col0(sql, 545 (self.dbid, bioentry_id))[0])
546
547 - def __iter__(self):
548 """Iterate over ids (which may not be meaningful outside this database).""" 549 #TODO - Iterate over the cursor, much more efficient 550 return iter(self.adaptor.list_bioentry_ids(self.dbid))
551 552 if hasattr(dict, "iteritems"): 553 #Python 2, use iteritems etc
554 - def keys(self):
555 """List of ids which may not be meaningful outside this database.""" 556 return self.adaptor.list_bioentry_ids(self.dbid)
557
558 - def values(self):
559 """List of DBSeqRecord objects in the namespace (sub database).""" 560 return [self[key] for key in self.keys()]
561
562 - def items(self):
563 """List of (id, DBSeqRecord) for the namespace (sub database).""" 564 return [(key, self[key]) for key in self.keys()]
565
566 - def iterkeys(self):
567 """Iterate over ids (which may not be meaningful outside this database).""" 568 return iter(self)
569
570 - def itervalues(self):
571 """Iterate over DBSeqRecord objects in the namespace (sub database).""" 572 for key in self: 573 yield self[key]
574
575 - def iteritems(self):
576 """Iterate over (id, DBSeqRecord) for the namespace (sub database).""" 577 for key in self: 578 yield key, self[key]
579 else: 580 #Python 3, items etc are all iterators
581 - def keys(self):
582 """Iterate over ids (which may not be meaningful outside this database).""" 583 return iter(self)
584
585 - def values(self):
586 """Iterate over DBSeqRecord objects in the namespace (sub database).""" 587 for key in self: 588 yield self[key]
589
590 - def items(self):
591 """Iterate over (id, DBSeqRecord) for the namespace (sub database).""" 592 for key in self: 593 yield key, self[key]
594
595 - def lookup(self, **kwargs):
596 if len(kwargs) != 1: 597 raise TypeError("single key/value parameter expected") 598 k, v = kwargs.items()[0] 599 if k not in _allowed_lookups: 600 raise TypeError("lookup() expects one of %s, not %r" % \ 601 (repr(_allowed_lookups.keys())[1:-1], repr(k))) 602 lookup_name = _allowed_lookups[k] 603 lookup_func = getattr(self.adaptor, lookup_name) 604 seqid = lookup_func(self.dbid, v) 605 return BioSeq.DBSeqRecord(self.adaptor, seqid)
606
607 - def get_Seq_by_primary_id(self, seqid):
608 """Get a DBSeqRecord by the primary (internal) id (OBSOLETE). 609 610 Rather than db.get_Seq_by_primary_id(my_id) use db[my_id] 611 612 The name of this method is misleading since it returns a DBSeqRecord 613 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 614 """ 615 import warnings 616 warnings.warn("Use bio_seq_database[my_id] instead of " 617 "bio_seq_database.get_Seq_by_primary_id(my_id)", 618 PendingDeprecationWarning) 619 return self[seqid]
620
621 - def load(self, record_iterator, fetch_NCBI_taxonomy=False):
622 """Load a set of SeqRecords into the BioSQL database. 623 624 record_iterator is either a list of SeqRecord objects, or an 625 Iterator object that returns SeqRecord objects (such as the 626 output from the Bio.SeqIO.parse() function), which will be 627 used to populate the database. 628 629 fetch_NCBI_taxonomy is boolean flag allowing or preventing 630 connection to the taxonomic database on the NCBI server 631 (via Bio.Entrez) to fetch a detailed taxonomy for each 632 SeqRecord. 633 634 Example: 635 from Bio import SeqIO 636 count = db.load(SeqIO.parse(open(filename), format)) 637 638 Returns the number of records loaded. 639 """ 640 db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, \ 641 fetch_NCBI_taxonomy) 642 num_records = 0 643 global _POSTGRES_RULES_PRESENT 644 for cur_record in record_iterator: 645 num_records += 1 646 #Hack to work arround BioSQL Bug 2839 - If using PostgreSQL and 647 #the RULES are present check for a duplicate record before loading 648 if _POSTGRES_RULES_PRESENT: 649 #Recreate what the Loader's _load_bioentry_table will do: 650 if cur_record.id.count(".") == 1: 651 accession, version = cur_record.id.split('.') 652 try: 653 version = int(version) 654 except ValueError: 655 accession = cur_record.id 656 version = 0 657 else: 658 accession = cur_record.id 659 version = 0 660 gi = cur_record.annotations.get("gi", None) 661 sql = "SELECT bioentry_id FROM bioentry WHERE (identifier " + \ 662 "= '%s' AND biodatabase_id = '%s') OR (accession = " + \ 663 "'%s' AND version = '%s' AND biodatabase_id = '%s')" 664 self.adaptor.execute(sql % (gi, self.dbid, accession, version, self.dbid)) 665 if self.adaptor.cursor.fetchone(): 666 raise self.adaptor.conn.IntegrityError("Duplicate record " 667 "detected: record has not been inserted") 668 #End of hack 669 db_loader.load_seqrecord(cur_record) 670 return num_records
671