1
2
3
4
5
6
7 """Provides code to access NCBI over the WWW.
8
9 The main Entrez web page is available at:
10 http://www.ncbi.nlm.nih.gov/Entrez/
11
12 A list of the Entrez utilities is available at:
13 http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
14
15 Variables:
16 email Set the Entrez email parameter (default is not set).
17 tool Set the Entrez tool parameter (default is biopython).
18
19 Functions:
20 efetch Retrieves records in the requested format from a list of one or
21 more primary IDs or from the user's environment
22 epost Posts a file containing a list of primary IDs for future use in
23 the user's environment to use with subsequent search strategies
24 esearch Searches and retrieves primary IDs (for use in EFetch, ELink,
25 and ESummary) and term translations and optionally retains
26 results for future use in the user's environment.
27 elink Checks for the existence of an external or Related Articles link
28 from a list of one or more primary IDs. Retrieves primary IDs
29 and relevancy scores for links to Entrez databases or Related
30 Articles; creates a hyperlink to the primary LinkOut provider
31 for a specific ID and database, or lists LinkOut URLs
32 and Attributes for multiple IDs.
33 einfo Provides field index term counts, last update, and available
34 links for each database.
35 esummary Retrieves document summaries from a list of primary IDs or from
36 the user's environment.
37 egquery Provides Entrez database counts in XML for a single search
38 using Global Query.
39 espell Retrieves spelling suggestions.
40
41 read Parses the XML results returned by any of the above functions.
42 Typical usage is:
43
44 >>> handle = Entrez.einfo() # or esearch, efetch, ...
45 >>> record = Entrez.read(handle)
46
47 where record is now a Python dictionary or list.
48
49 parse Parses the XML results returned by any of the above functions,
50 returning records one by one.
51 Typical usage is:
52
53 >>> handle = Entrez.efetch(...) # or esummary, elink, ...
54 >>> records = Entrez.parse(handle)
55 >>> for record in records:
56 ... # each record is a Python dictionary or list.
57 ... print record
58
59 This function is appropriate only if the XML file contains
60 multiple records, and is particular useful for large files.
61
62 _open Internally used function.
63
64 """
65 import urllib, urllib2, time, warnings
66 import os.path
67
68 email = None
69 tool = "biopython"
70
71
72
73 -def epost(db, **keywds):
74 """Post a file of identifiers for future use.
75
76 Posts a file containing a list of UIs for future use in the user's
77 environment to use with subsequent search strategies.
78
79 See the online documentation for an explanation of the parameters:
80 http://www.ncbi.nlm.nih.gov/entrez/query/static/epost_help.html
81
82 Return a handle to the results.
83
84 Raises an IOError exception if there's a network error.
85 """
86 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/epost.fcgi'
87 variables = {'db' : db}
88 variables.update(keywds)
89 return _open(cgi, variables, post=True)
90
92 """Fetches Entrez results which are returned as a handle.
93
94 EFetch retrieves records in the requested format from a list of one or
95 more UIs or from user's environment.
96
97 See the online documentation for an explanation of the parameters:
98 http://www.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html
99
100 Return a handle to the results.
101
102 Raises an IOError exception if there's a network error.
103
104 Short example:
105
106 from Bio import Entrez
107 handle = Entrez.efetch(db="nucleotide", id="57240072", rettype="gb")
108 print handle.read()
109 """
110 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
111 variables = {'db' : db}
112 variables.update(keywds)
113 return _open(cgi, variables)
114
116 """ESearch runs an Entrez search and returns a handle to the results.
117
118 ESearch searches and retrieves primary IDs (for use in EFetch, ELink
119 and ESummary) and term translations, and optionally retains results
120 for future use in the user's environment.
121
122 See the online documentation for an explanation of the parameters:
123 http://www.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html
124
125 Return a handle to the results which are always in XML format.
126
127 Raises an IOError exception if there's a network error.
128
129 Short example:
130
131 from Bio import Entez
132 handle = Entrez.esearch(db="nucleotide", retmax=10, term="Opuntia")
133 record = Entrez.read(handle)
134 print record["Count"]
135 print record["IdList"]
136 """
137 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
138 variables = {'db' : db,
139 'term' : term}
140 variables.update(keywds)
141 return _open(cgi, variables)
142
144 """ELink checks for linked external articles and returns a handle.
145
146 ELink checks for the existence of an external or Related Articles link
147 from a list of one or more primary IDs; retrieves IDs and relevancy
148 scores for links to Entrez databases or Related Articles; creates a
149 hyperlink to the primary LinkOut provider for a specific ID and
150 database, or lists LinkOut URLs and attributes for multiple IDs.
151
152 See the online documentation for an explanation of the parameters:
153 http://www.ncbi.nlm.nih.gov/entrez/query/static/elink_help.html
154
155 Return a handle to the results, by default in XML format.
156
157 Raises an IOError exception if there's a network error.
158 """
159 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi'
160 variables = {}
161 variables.update(keywds)
162 return _open(cgi, variables)
163
165 """EInfo returns a summary of the Entez databases as a results handle.
166
167 EInfo provides field names, index term counts, last update, and
168 available links for each Entrez database.
169
170 See the online documentation for an explanation of the parameters:
171 http://www.ncbi.nlm.nih.gov/entrez/query/static/einfo_help.html
172
173 Return a handle to the results, by default in XML format.
174
175 Raises an IOError exception if there's a network error.
176
177 Short example:
178
179 from Bio import Entrez
180 record = Entrez.read(Entrez.einfo())
181 print record['DbList']
182 """
183 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi'
184 variables = {}
185 variables.update(keywds)
186 return _open(cgi, variables)
187
189 """ESummary retrieves document summaries as a results handle.
190
191 ESummary retrieves document summaries from a list of primary IDs or
192 from the user's environment.
193
194 See the online documentation for an explanation of the parameters:
195 http://www.ncbi.nlm.nih.gov/entrez/query/static/esummary_help.html
196
197 Return a handle to the results, by default in XML format.
198
199 Raises an IOError exception if there's a network error.
200 """
201 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
202 variables = {}
203 variables.update(keywds)
204 return _open(cgi, variables)
205
207 """EGQuery provides Entrez database counts for a global search.
208
209 EGQuery provides Entrez database counts in XML for a single search
210 using Global Query.
211
212 See the online documentation for an explanation of the parameters:
213 http://www.ncbi.nlm.nih.gov/entrez/query/static/egquery_help.html
214
215 Return a handle to the results in XML format.
216
217 Raises an IOError exception if there's a network error.
218 """
219 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/egquery.fcgi'
220 variables = {}
221 variables.update(keywds)
222 return _open(cgi, variables)
223
225 """ESpell retrieves spelling suggestions, returned in a results handle.
226
227 ESpell retrieves spelling suggestions, if available.
228
229 See the online documentation for an explanation of the parameters:
230 http://www.ncbi.nlm.nih.gov/entrez/query/static/espell_help.html
231
232 Return a handle to the results, by default in XML format.
233
234 Raises an IOError exception if there's a network error.
235
236 Short example:
237
238 from Bio import Entrez
239 record = Entrez.read(Entrez.espell(term="biopythooon"))
240 print record["Query"]
241 print record["CorrectedQuery"]
242 """
243 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/espell.fcgi'
244 variables = {}
245 variables.update(keywds)
246 return _open(cgi, variables)
247
248 -def read(handle, validate=True):
249 """Parses an XML file from the NCBI Entrez Utilities into python objects.
250
251 This function parses an XML file created by NCBI's Entrez Utilities,
252 returning a multilevel data structure of Python lists and dictionaries.
253 Most XML files returned by NCBI's Entrez Utilities can be parsed by
254 this function, provided its DTD is available. Biopython includes the
255 DTDs for most commonly used Entrez Utilities.
256
257 If validate is True (default), the parser will validate the XML file
258 against the DTD, and raise an error if the XML file contains tags that
259 are not represented in the DTD. If validate is False, the parser will
260 simply skip such tags.
261
262 Whereas the data structure seems to consist of generic Python lists,
263 dictionaries, strings, and so on, each of these is actually a class
264 derived from the base type. This allows us to store the attributes
265 (if any) of each element in a dictionary my_element.attributes, and
266 the tag name in my_element.tag.
267 """
268 from Parser import DataHandler
269 handler = DataHandler(validate)
270 record = handler.read(handle)
271 return record
272
273 -def parse(handle, validate=True):
274 """Parses an XML file from the NCBI Entrez Utilities into python objects.
275
276 This function parses an XML file created by NCBI's Entrez Utilities,
277 returning a multilevel data structure of Python lists and dictionaries.
278 This function is suitable for XML files that (in Python) can be represented
279 as a list of individual records. Whereas 'read' reads the complete file
280 and returns a single Python list, 'parse' is a generator function that
281 returns the records one by one. This function is therefore particularly
282 useful for parsing large files.
283
284 Most XML files returned by NCBI's Entrez Utilities can be parsed by
285 this function, provided its DTD is available. Biopython includes the
286 DTDs for most commonly used Entrez Utilities.
287
288 If validate is True (default), the parser will validate the XML file
289 against the DTD, and raise an error if the XML file contains tags that
290 are not represented in the DTD. If validate is False, the parser will
291 simply skip such tags.
292
293 Whereas the data structure seems to consist of generic Python lists,
294 dictionaries, strings, and so on, each of these is actually a class
295 derived from the base type. This allows us to store the attributes
296 (if any) of each element in a dictionary my_element.attributes, and
297 the tag name in my_element.tag.
298 """
299 from Parser import DataHandler
300 handler = DataHandler(validate)
301 records = handler.parse(handle)
302 return records
303
304 -def _open(cgi, params={}, post=False):
305 """Helper function to build the URL and open a handle to it (PRIVATE).
306
307 Open a handle to Entrez. cgi is the URL for the cgi script to access.
308 params is a dictionary with the options to pass to it. Does some
309 simple error checking, and will raise an IOError if it encounters one.
310
311 This function also enforces the "up to three queries per second rule"
312 to avoid abusing the NCBI servers.
313 """
314
315
316 delay = 0.333333334
317 current = time.time()
318 wait = _open.previous + delay - current
319 if wait > 0:
320 time.sleep(wait)
321 _open.previous = current + wait
322 else:
323 _open.previous = current
324
325 for key, value in params.items():
326 if value is None:
327 del params[key]
328
329
330 if not "tool" in params:
331 params["tool"] = tool
332
333 if not "email" in params:
334 if email!=None:
335 params["email"] = email
336 else:
337 warnings.warn("""
338 Email address is not specified.
339
340 To make use of NCBI's E-utilities, NCBI strongly recommends you to specify
341 your email address with each request. From June 1, 2010, this will be
342 mandatory. As an example, if your email address is A.N.Other@example.com, you
343 can specify it as follows:
344 from Bio import Entrez
345 Entrez.email = 'A.N.Other@example.com'
346 In case of excessive usage of the E-utilities, NCBI will attempt to contact
347 a user at the email address provided before blocking access to the
348 E-utilities.""", UserWarning)
349
350 options = urllib.urlencode(params, doseq=True)
351 try:
352 if post:
353
354 handle = urllib2.urlopen(cgi, data=options)
355 else:
356
357 cgi += "?" + options
358 handle = urllib2.urlopen(cgi)
359 except urllib2.HTTPError, exception:
360 raise exception
361
362 return handle
363
364 _open.previous = 0
365