1
2
3
4
5
6
7 """General mechanisms to access applications in Biopython.
8
9 This module is not intended for direct use. It provides the basic objects which
10 are subclassed by our command line wrappers, such as:
11
12 - Bio.Align.Applications
13 - Bio.Blast.Applications
14 - Bio.Emboss.Applications
15 - Bio.Sequencing.Applications
16
17 These modules provide wrapper classes for command line tools to help you
18 construct command line strings by setting the values of each parameter.
19 The finished command line strings are then normally invoked via the built-in
20 Python module subprocess.
21
22 This module also includes some deprecated functionality (function generic_run
23 and class ApplicationResult) which should not be used anymore.
24 """
25 import os, sys
26 import StringIO
27 import subprocess
28 import re
29
30 from Bio import File
31
32
33
34 _re_prop_name = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*")
35 assert _re_prop_name.match("t")
36 assert _re_prop_name.match("test")
37 assert _re_prop_name.match("_test") is None
38 assert _re_prop_name.match("-test") is None
39 assert _re_prop_name.match("test_name")
40 assert _re_prop_name.match("test2")
41
42 _reserved_names = ["and", "del", "from", "not", "while", "as", "elif",
43 "global", "or", "with", "assert", "else", "if", "pass",
44 "yield", "break", "except", "import", "print", "class",
45 "exec", "in", "raise", "continue", "finally", "is",
46 "return", "def", "for", "lambda", "try"]
47
48 _local_reserved_names = ["set_parameter"]
49
51 """Run an application with the given commandline (DEPRECATED).
52
53 This expects a pre-built commandline that derives from
54 AbstractCommandline, and returns a ApplicationResult object
55 to get results from a program, along with handles of the
56 standard output and standard error.
57
58 WARNING - This will read in the full program output into memory!
59 This may be in issue when the program writes a large amount of
60 data to standard output.
61
62 NOTE - This function is considered to be obsolete, and we intend to
63 deprecate it and then remove it in future releases of Biopython.
64 We now recommend you invoke subprocess directly, using str(commandline)
65 to turn an AbstractCommandline wrapper into a command line string. This
66 will give you full control of the tool's input and output as well.
67 """
68 import warnings
69 warnings.warn("Bio.Application.generic_run and the associated "
70 "Bio.Application.ApplicationResult are deprecated. "
71 "Please use the Bio.Application based wrappers with "
72 "the built in Python module subprocess instead, as "
73 "described in the Biopython Tutorial.",
74 DeprecationWarning)
75
76
77
78
79 child = subprocess.Popen(str(commandline),
80 stdin=subprocess.PIPE,
81 stdout=subprocess.PIPE,
82 stderr=subprocess.PIPE,
83 shell=(sys.platform!="win32"))
84
85 r_out, e_out = child.communicate()
86
87 error_code = child.returncode
88 return ApplicationResult(commandline, error_code), \
89 File.UndoHandle(StringIO.StringIO(r_out)), \
90 File.UndoHandle(StringIO.StringIO(e_out))
91
93 """Make results of a program available through a standard interface (DEPRECATED).
94
95 This tries to pick up output information available from the program
96 and make it available programmatically.
97
98 NOTE - This class hase been deprecated and we intend to remove it in
99 a future release of Biopython.
100 """
101 - def __init__(self, application_cl, return_code):
102 """Intialize with the commandline from the program.
103 """
104 import warnings
105 warnings.warn("Bio.Application.ApplicationResult and the "
106 "associated function Bio.Application.generic_run "
107 "are deprecated. Please use the Bio.Application "
108 "based wrappers with the built in Python module "
109 "subprocess instead, as described in the Biopython "
110 "Tutorial.", DeprecationWarning)
111 self._cl = application_cl
112
113
114 self.return_code = return_code
115
116
117
118 self._results = {}
119
120 for parameter in self._cl.parameters:
121 if "file" in parameter.param_types and \
122 "output" in parameter.param_types:
123 if parameter.is_set:
124 self._results[parameter.names[-1]] = parameter.value
125
127 """Retrieve result information for the given output.
128
129 Supports any of the defined parameters aliases (assuming the
130 parameter is defined as an output).
131 """
132 try:
133 return self._results[output_name]
134 except KeyError, err:
135
136 for parameter in self._cl.parameters:
137 if output_name in parameter.names:
138 return self._results[parameter.names[-1]]
139
140 raise err
141
143 """Retrieve a list of all available results.
144 """
145 result_names = self._results.keys()
146 result_names.sort()
147 return result_names
148
150 """Generic interface for constructing command line strings.
151
152 This class shouldn't be called directly; it should be subclassed to
153 provide an implementation for a specific application.
154
155 For a usage example we'll show one of the EMBOSS wrappers. You can set
156 options when creating the wrapper object using keyword arguments - or later
157 using their corresponding properties:
158
159 >>> from Bio.Emboss.Applications import WaterCommandline
160 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
161 >>> cline
162 WaterCommandline(cmd='water', gapopen=10, gapextend=0.5)
163
164 You can instead manipulate the parameters via their properties, e.g.
165
166 >>> cline.gapopen
167 10
168 >>> cline.gapopen = 20
169 >>> cline
170 WaterCommandline(cmd='water', gapopen=20, gapextend=0.5)
171
172 You can clear a parameter you have already added by 'deleting' the
173 corresponding property:
174
175 >>> del cline.gapopen
176 >>> cline.gapopen
177 >>> cline
178 WaterCommandline(cmd='water', gapextend=0.5)
179
180 Once you have set the parameters you need, turn the object into a string:
181
182 >>> str(cline)
183 Traceback (most recent call last):
184 ...
185 ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout).
186
187 In this case the wrapper knows certain arguments are required to construct
188 a valid command line for the tool. For a complete example,
189
190 >>> from Bio.Emboss.Applications import WaterCommandline
191 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
192 >>> cline.asequence = "asis:ACCCGGGCGCGGT"
193 >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
194 >>> cline.outfile = "temp_water.txt"
195 >>> print cline
196 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
197 >>> cline
198 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
199
200 You would typically run the command line via a standard Python operating
201 system call (e.g. using the subprocess module).
202 """
204 """Create a new instance of a command line wrapper object."""
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221 self.program_name = cmd
222 try:
223 parameters = self.parameters
224 except AttributeError:
225 raise AttributeError("Subclass should have defined self.parameters")
226
227 aliases = set()
228 for p in parameters:
229 for name in p.names:
230 if name in aliases:
231 raise ValueError("Parameter alias %s multiply defined" \
232 % name)
233 aliases.add(name)
234 name = p.names[-1]
235 if _re_prop_name.match(name) is None:
236 raise ValueError("Final parameter name %s cannot be used as "
237 "an argument or property name in python"
238 % repr(name))
239 if name in _reserved_names:
240 raise ValueError("Final parameter name %s cannot be used as "
241 "an argument or property name because it is "
242 "a reserved word in python" % repr(name))
243 if name in _local_reserved_names:
244 raise ValueError("Final parameter name %s cannot be used as "
245 "an argument or property name due to the "
246 "way the AbstractCommandline class works"
247 % repr(name))
248
249 def getter(name):
250 return lambda x : x._get_parameter(name)
251 def setter(name):
252 return lambda x, value : x.set_parameter(name, value)
253 def deleter(name):
254 return lambda x : x._clear_parameter(name)
255 doc = p.description
256 if isinstance(p, _Switch):
257 doc += "\n\nThis property controls the addition of the %s " \
258 "switch, treat this property as a boolean." % p.names[0]
259 else:
260 doc += "\n\nThis controls the addition of the %s parameter " \
261 "and its associated value. Set this property to the " \
262 "argument value required." % p.names[0]
263 prop = property(getter(name), setter(name), deleter(name), doc)
264 setattr(self.__class__, name, prop)
265 for key, value in kwargs.iteritems():
266 self.set_parameter(key, value)
267
269 """Make sure the required parameters have been set (PRIVATE).
270
271 No return value - it either works or raises a ValueError.
272
273 This is a separate method (called from __str__) so that subclasses may
274 override it.
275 """
276 for p in self.parameters:
277
278 if p.is_required and not(p.is_set):
279 raise ValueError("Parameter %s is not set." \
280 % p.names[-1])
281
282
284 """Make the commandline string with the currently set options.
285
286 e.g.
287 >>> from Bio.Emboss.Applications import WaterCommandline
288 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
289 >>> cline.asequence = "asis:ACCCGGGCGCGGT"
290 >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
291 >>> cline.outfile = "temp_water.txt"
292 >>> print cline
293 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
294 >>> str(cline)
295 'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5'
296 """
297 self._validate()
298 commandline = "%s " % self.program_name
299 for parameter in self.parameters:
300 if parameter.is_set:
301
302 commandline += str(parameter)
303 return commandline.strip()
304
306 """Return a representation of the command line object for debugging.
307
308 e.g.
309 >>> from Bio.Emboss.Applications import WaterCommandline
310 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
311 >>> cline.asequence = "asis:ACCCGGGCGCGGT"
312 >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
313 >>> cline.outfile = "temp_water.txt"
314 >>> print cline
315 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
316 >>> cline
317 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
318 """
319 answer = "%s(cmd=%s" % (self.__class__.__name__, repr(self.program_name))
320 for parameter in self.parameters:
321 if parameter.is_set:
322 if isinstance(parameter, _Switch):
323 answer += ", %s=True" % parameter.names[-1]
324 else:
325 answer += ", %s=%s" \
326 % (parameter.names[-1], repr(parameter.value))
327 answer += ")"
328 return answer
329
339
350
373
375 """Check whether the given value is valid.
376
377 No return value - it either works or raises a ValueError.
378
379 This uses the passed function 'check_function', which can either
380 return a [0, 1] (bad, good) value or raise an error. Either way
381 this function will raise an error if the value is not valid, or
382 finish silently otherwise.
383 """
384 if check_function is not None:
385 is_good = check_function(value)
386 assert is_good in [0,1,True,False]
387 if not is_good:
388 raise ValueError("Invalid parameter value %r for parameter %s" \
389 % (value, name))
390
392 """Set attribute name to value (PRIVATE).
393
394 This code implements a workaround for a user interface issue.
395 Without this __setattr__ attribute-based assignment of parameters
396 will silently accept invalid parameters, leading to known instances
397 of the user assuming that parameters for the application are set,
398 when they are not.
399
400 >>> from Bio.Emboss.Applications import WaterCommandline
401 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True)
402 >>> cline.asequence = "a.fasta"
403 >>> cline.bsequence = "b.fasta"
404 >>> cline.csequence = "c.fasta"
405 Traceback (most recent call last):
406 ...
407 ValueError: Option name csequence was not found.
408 >>> print cline
409 water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5
410
411 This workaround uses a whitelist of object attributes, and sets the
412 object attribute list as normal, for these. Other attributes are
413 assumed to be parameters, and passed to the self.set_parameter method
414 for validation and assignment.
415 """
416 if name in ['parameters', 'program_name']:
417 self.__dict__[name] = value
418 else:
419 self.set_parameter(name, value)
420
421
423 """A class to hold information about a parameter for a commandline.
424
425 Do not use this directly, instead use one of the subclasses.
426 """
428 raise NotImplementedError
429
431 raise NotImplementedError
432
434 """Represent an option that can be set for a program.
435
436 This holds UNIXish options like --append=yes and -a yes,
437 where a value (here "yes") is generally expected.
438
439 For UNIXish options like -kimura in clustalw which don't
440 take a value, use the _Switch object instead.
441
442 Attributes:
443
444 o names -- a list of string names by which the parameter can be
445 referenced (ie. ["-a", "--append", "append"]). The first name in
446 the list is considered to be the one that goes on the commandline,
447 for those parameters that print the option. The last name in the list
448 is assumed to be a "human readable" name describing the option in one
449 word.
450
451 o param_types -- a list of string describing the type of parameter,
452 which can help let programs know how to use it. Example descriptions
453 include 'input', 'output', 'file'. Note that if 'file' is included,
454 these argument values will automatically be escaped if the filename
455 contains spaces.
456
457 o checker_function -- a reference to a function that will determine
458 if a given value is valid for this parameter. This function can either
459 raise an error when given a bad value, or return a [0, 1] decision on
460 whether the value is correct.
461
462 o equate -- should an equals sign be inserted if a value is used?
463
464 o description -- a description of the option.
465
466 o is_required -- a flag to indicate if the parameter must be set for
467 the program to be run.
468
469 o is_set -- if the parameter has been set
470
471 o value -- the value of a parameter
472 """
473 - def __init__(self, names = [], types = [], checker_function = None,
474 is_required = False, description = "", equate=True):
475 self.names = names
476 self.param_types = types
477 self.checker_function = checker_function
478 self.description = description
479 self.equate = equate
480 self.is_required = is_required
481
482 self.is_set = False
483 self.value = None
484
486 """Return the value of this option for the commandline.
487
488 Includes a trailing space.
489 """
490
491
492
493
494 if self.value is None:
495 return "%s " % self.names[0]
496 if "file" in self.param_types:
497 v = _escape_filename(self.value)
498 else:
499 v = str(self.value)
500 if self.equate:
501 return "%s=%s " % (self.names[0], v)
502 else:
503 return "%s %s " % (self.names[0], v)
504
506 """Represent an optional argument switch for a program.
507
508 This holds UNIXish options like -kimura in clustalw which don't
509 take a value, they are either included in the command string
510 or omitted.
511
512 o names -- a list of string names by which the parameter can be
513 referenced (ie. ["-a", "--append", "append"]). The first name in
514 the list is considered to be the one that goes on the commandline,
515 for those parameters that print the option. The last name in the list
516 is assumed to be a "human readable" name describing the option in one
517 word.
518
519 o param_types -- a list of string describing the type of parameter,
520 which can help let programs know how to use it. Example descriptions
521 include 'input', 'output', 'file'. Note that if 'file' is included,
522 these argument values will automatically be escaped if the filename
523 contains spaces.
524
525 o description -- a description of the option.
526
527 o is_set -- if the parameter has been set
528
529 NOTE - There is no value attribute, see is_set instead,
530 """
531 - def __init__(self, names = [], types = [], description = ""):
537
539 """Return the value of this option for the commandline.
540
541 Includes a trailing space.
542 """
543 assert not hasattr(self, "value")
544 if self.is_set:
545 return "%s " % self.names[0]
546 else:
547 return ""
548
550 """Represent an argument on a commandline.
551 """
552 - def __init__(self, names = [], types = [], checker_function = None,
553 is_required = False, description = ""):
554 self.names = names
555 self.param_types = types
556 self.checker_function = checker_function
557 self.description = description
558 self.is_required = is_required
559 self.is_set = False
560 self.value = None
561
563 if self.value is None:
564 return " "
565 else:
566 return "%s " % self.value
567
569 """Escape filenames with spaces by adding quotes (PRIVATE).
570
571 Note this will not add quotes if they are already included:
572
573 >>> print _escape_filename('example with spaces')
574 "example with spaces"
575 >>> print _escape_filename('"example with spaces"')
576 "example with spaces"
577 """
578
579
580
581
582
583
584
585
586
587
588
589
590
591 if " " not in filename:
592 return filename
593
594 if filename.startswith('"') and filename.endswith('"'):
595
596 return filename
597 else:
598 return '"%s"' % filename
599
601 """Run the Bio.Application module's doctests."""
602 import doctest
603 doctest.testmod(verbose=1)
604
605 if __name__ == "__main__":
606
607 _test()
608