Package Bio :: Package Application
[hide private]
[frames] | no frames]

Source Code for Package Bio.Application

  1  # Copyright 2001-2004 Brad Chapman. 
  2  # Revisions copyright 2009-2010 by Peter Cock. 
  3  # All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7  """General mechanisms to access applications in Biopython. 
  8   
  9  This module is not intended for direct use. It provides the basic objects which 
 10  are subclassed by our command line wrappers, such as: 
 11   
 12   - Bio.Align.Applications 
 13   - Bio.Blast.Applications 
 14   - Bio.Emboss.Applications 
 15   - Bio.Sequencing.Applications 
 16   
 17  These modules provide wrapper classes for command line tools to help you 
 18  construct command line strings by setting the values of each parameter. 
 19  The finished command line strings are then normally invoked via the built-in 
 20  Python module subprocess. 
 21   
 22  This module also includes some deprecated functionality (function generic_run 
 23  and class ApplicationResult) which should not be used anymore. 
 24  """ 
 25  import os, sys 
 26  import StringIO 
 27  import subprocess 
 28  import re 
 29   
 30  from Bio import File 
 31   
 32  #Use this regular expresion to test the property names are going to 
 33  #be valid as Python properties or arguments 
 34  _re_prop_name = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*") 
 35  assert _re_prop_name.match("t") 
 36  assert _re_prop_name.match("test") 
 37  assert _re_prop_name.match("_test") is None # we don't want private names 
 38  assert _re_prop_name.match("-test") is None 
 39  assert _re_prop_name.match("test_name") 
 40  assert _re_prop_name.match("test2") 
 41  #These are reserved names in Python itself, 
 42  _reserved_names = ["and", "del", "from", "not", "while", "as", "elif", 
 43                     "global", "or", "with", "assert", "else", "if", "pass", 
 44                     "yield", "break", "except", "import", "print", "class", 
 45                     "exec", "in", "raise", "continue", "finally", "is", 
 46                     "return", "def", "for", "lambda", "try"] 
 47  #These are reserved names due to the way the wrappers work 
 48  _local_reserved_names = ["set_parameter"] 
 49   
50 -def generic_run(commandline):
51 """Run an application with the given commandline (DEPRECATED). 52 53 This expects a pre-built commandline that derives from 54 AbstractCommandline, and returns a ApplicationResult object 55 to get results from a program, along with handles of the 56 standard output and standard error. 57 58 WARNING - This will read in the full program output into memory! 59 This may be in issue when the program writes a large amount of 60 data to standard output. 61 62 NOTE - This function is considered to be obsolete, and we intend to 63 deprecate it and then remove it in future releases of Biopython. 64 We now recommend you invoke subprocess directly, using str(commandline) 65 to turn an AbstractCommandline wrapper into a command line string. This 66 will give you full control of the tool's input and output as well. 67 """ 68 import warnings 69 warnings.warn("Bio.Application.generic_run and the associated " 70 "Bio.Application.ApplicationResult are deprecated. " 71 "Please use the Bio.Application based wrappers with " 72 "the built in Python module subprocess instead, as " 73 "described in the Biopython Tutorial.", 74 DeprecationWarning) 75 #We don't need to supply any piped input, but we setup the 76 #standard input pipe anyway as a work around for a python 77 #bug if this is called from a Windows GUI program. For 78 #details, see http://bugs.python.org/issue1124861 79 child = subprocess.Popen(str(commandline), 80 stdin=subprocess.PIPE, 81 stdout=subprocess.PIPE, 82 stderr=subprocess.PIPE, 83 shell=(sys.platform!="win32")) 84 #Use .communicate as might get deadlocks with .wait(), see Bug 2804/2806 85 r_out, e_out = child.communicate() 86 # capture error code: 87 error_code = child.returncode 88 return ApplicationResult(commandline, error_code), \ 89 File.UndoHandle(StringIO.StringIO(r_out)), \ 90 File.UndoHandle(StringIO.StringIO(e_out))
91
92 -class ApplicationResult:
93 """Make results of a program available through a standard interface (DEPRECATED). 94 95 This tries to pick up output information available from the program 96 and make it available programmatically. 97 98 NOTE - This class hase been deprecated and we intend to remove it in 99 a future release of Biopython. 100 """
101 - def __init__(self, application_cl, return_code):
102 """Intialize with the commandline from the program. 103 """ 104 import warnings 105 warnings.warn("Bio.Application.ApplicationResult and the " 106 "associated function Bio.Application.generic_run " 107 "are deprecated. Please use the Bio.Application " 108 "based wrappers with the built in Python module " 109 "subprocess instead, as described in the Biopython " 110 "Tutorial.", DeprecationWarning) 111 self._cl = application_cl 112 113 # provide the return code of the application 114 self.return_code = return_code 115 116 # get the application dependent results we can provide 117 # right now the only results we handle are output files 118 self._results = {} 119 120 for parameter in self._cl.parameters: 121 if "file" in parameter.param_types and \ 122 "output" in parameter.param_types: 123 if parameter.is_set: 124 self._results[parameter.names[-1]] = parameter.value
125
126 - def get_result(self, output_name):
127 """Retrieve result information for the given output. 128 129 Supports any of the defined parameters aliases (assuming the 130 parameter is defined as an output). 131 """ 132 try: 133 return self._results[output_name] 134 except KeyError, err: 135 #Try the aliases... 136 for parameter in self._cl.parameters: 137 if output_name in parameter.names: 138 return self._results[parameter.names[-1]] 139 #No, really was a key error: 140 raise err
141
142 - def available_results(self):
143 """Retrieve a list of all available results. 144 """ 145 result_names = self._results.keys() 146 result_names.sort() 147 return result_names
148
149 -class AbstractCommandline(object):
150 """Generic interface for constructing command line strings. 151 152 This class shouldn't be called directly; it should be subclassed to 153 provide an implementation for a specific application. 154 155 For a usage example we'll show one of the EMBOSS wrappers. You can set 156 options when creating the wrapper object using keyword arguments - or later 157 using their corresponding properties: 158 159 >>> from Bio.Emboss.Applications import WaterCommandline 160 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 161 >>> cline 162 WaterCommandline(cmd='water', gapopen=10, gapextend=0.5) 163 164 You can instead manipulate the parameters via their properties, e.g. 165 166 >>> cline.gapopen 167 10 168 >>> cline.gapopen = 20 169 >>> cline 170 WaterCommandline(cmd='water', gapopen=20, gapextend=0.5) 171 172 You can clear a parameter you have already added by 'deleting' the 173 corresponding property: 174 175 >>> del cline.gapopen 176 >>> cline.gapopen 177 >>> cline 178 WaterCommandline(cmd='water', gapextend=0.5) 179 180 Once you have set the parameters you need, turn the object into a string: 181 182 >>> str(cline) 183 Traceback (most recent call last): 184 ... 185 ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout). 186 187 In this case the wrapper knows certain arguments are required to construct 188 a valid command line for the tool. For a complete example, 189 190 >>> from Bio.Emboss.Applications import WaterCommandline 191 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 192 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 193 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 194 >>> cline.outfile = "temp_water.txt" 195 >>> print cline 196 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 197 >>> cline 198 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5) 199 200 You would typically run the command line via a standard Python operating 201 system call (e.g. using the subprocess module). 202 """
203 - def __init__(self, cmd, **kwargs):
204 """Create a new instance of a command line wrapper object.""" 205 # Init method - should be subclassed! 206 # 207 # The subclass methods should look like this: 208 # 209 # def __init__(self, cmd="muscle", **kwargs): 210 # self.parameters = [...] 211 # AbstractCommandline.__init__(self, cmd, **kwargs) 212 # 213 # i.e. There should have an optional argument "cmd" to set the location 214 # of the executable (with a sensible default which should work if the 215 # command is on the path on Unix), and keyword arguments. It should 216 # then define a list of parameters, all objects derived from the base 217 # class _AbstractParameter. 218 # 219 # The keyword arguments should be any valid parameter name, and will 220 # be used to set the associated parameter. 221 self.program_name = cmd 222 try: 223 parameters = self.parameters 224 except AttributeError: 225 raise AttributeError("Subclass should have defined self.parameters") 226 #Create properties for each parameter at run time 227 aliases = set() 228 for p in parameters: 229 for name in p.names: 230 if name in aliases: 231 raise ValueError("Parameter alias %s multiply defined" \ 232 % name) 233 aliases.add(name) 234 name = p.names[-1] 235 if _re_prop_name.match(name) is None: 236 raise ValueError("Final parameter name %s cannot be used as " 237 "an argument or property name in python" 238 % repr(name)) 239 if name in _reserved_names: 240 raise ValueError("Final parameter name %s cannot be used as " 241 "an argument or property name because it is " 242 "a reserved word in python" % repr(name)) 243 if name in _local_reserved_names: 244 raise ValueError("Final parameter name %s cannot be used as " 245 "an argument or property name due to the " 246 "way the AbstractCommandline class works" 247 % repr(name)) 248 #Beware of binding-versus-assignment confusion issues 249 def getter(name): 250 return lambda x : x._get_parameter(name)
251 def setter(name): 252 return lambda x, value : x.set_parameter(name, value)
253 def deleter(name): 254 return lambda x : x._clear_parameter(name) 255 doc = p.description 256 if isinstance(p, _Switch): 257 doc += "\n\nThis property controls the addition of the %s " \ 258 "switch, treat this property as a boolean." % p.names[0] 259 else: 260 doc += "\n\nThis controls the addition of the %s parameter " \ 261 "and its associated value. Set this property to the " \ 262 "argument value required." % p.names[0] 263 prop = property(getter(name), setter(name), deleter(name), doc) 264 setattr(self.__class__, name, prop) #magic! 265 for key, value in kwargs.iteritems(): 266 self.set_parameter(key, value) 267
268 - def _validate(self):
269 """Make sure the required parameters have been set (PRIVATE). 270 271 No return value - it either works or raises a ValueError. 272 273 This is a separate method (called from __str__) so that subclasses may 274 override it. 275 """ 276 for p in self.parameters: 277 #Check for missing required parameters: 278 if p.is_required and not(p.is_set): 279 raise ValueError("Parameter %s is not set." \ 280 % p.names[-1])
281 #Also repeat the parameter validation here, just in case? 282
283 - def __str__(self):
284 """Make the commandline string with the currently set options. 285 286 e.g. 287 >>> from Bio.Emboss.Applications import WaterCommandline 288 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 289 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 290 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 291 >>> cline.outfile = "temp_water.txt" 292 >>> print cline 293 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 294 >>> str(cline) 295 'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5' 296 """ 297 self._validate() 298 commandline = "%s " % self.program_name 299 for parameter in self.parameters: 300 if parameter.is_set: 301 #This will include a trailing space: 302 commandline += str(parameter) 303 return commandline.strip() # remove trailing space
304
305 - def __repr__(self):
306 """Return a representation of the command line object for debugging. 307 308 e.g. 309 >>> from Bio.Emboss.Applications import WaterCommandline 310 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 311 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 312 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 313 >>> cline.outfile = "temp_water.txt" 314 >>> print cline 315 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 316 >>> cline 317 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5) 318 """ 319 answer = "%s(cmd=%s" % (self.__class__.__name__, repr(self.program_name)) 320 for parameter in self.parameters: 321 if parameter.is_set: 322 if isinstance(parameter, _Switch): 323 answer += ", %s=True" % parameter.names[-1] 324 else: 325 answer += ", %s=%s" \ 326 % (parameter.names[-1], repr(parameter.value)) 327 answer += ")" 328 return answer
329
330 - def _get_parameter(self, name):
331 """Get a commandline option value.""" 332 for parameter in self.parameters: 333 if name in parameter.names: 334 if isinstance(parameter, _Switch): 335 return parameter.is_set 336 else: 337 return parameter.value 338 raise ValueError("Option name %s was not found." % name)
339
340 - def _clear_parameter(self, name):
341 """Reset or clear a commandline option value.""" 342 cleared_option = False 343 for parameter in self.parameters: 344 if name in parameter.names: 345 parameter.value = None 346 parameter.is_set = False 347 cleared_option = True 348 if not cleared_option: 349 raise ValueError("Option name %s was not found." % name)
350
351 - def set_parameter(self, name, value = None):
352 """Set a commandline option for a program. 353 """ 354 set_option = False 355 for parameter in self.parameters: 356 if name in parameter.names: 357 if isinstance(parameter, _Switch): 358 if value is None: 359 import warnings 360 warnings.warn("For a switch type argument like %s, " 361 "we expect a boolean. None is treated " 362 "as FALSE!" % parameter.names[-1]) 363 parameter.is_set = bool(value) 364 set_option = True 365 else: 366 if value is not None: 367 self._check_value(value, name, parameter.checker_function) 368 parameter.value = value 369 parameter.is_set = True 370 set_option = True 371 if not set_option: 372 raise ValueError("Option name %s was not found." % name)
373
374 - def _check_value(self, value, name, check_function):
375 """Check whether the given value is valid. 376 377 No return value - it either works or raises a ValueError. 378 379 This uses the passed function 'check_function', which can either 380 return a [0, 1] (bad, good) value or raise an error. Either way 381 this function will raise an error if the value is not valid, or 382 finish silently otherwise. 383 """ 384 if check_function is not None: 385 is_good = check_function(value) #May raise an exception 386 assert is_good in [0,1,True,False] 387 if not is_good: 388 raise ValueError("Invalid parameter value %r for parameter %s" \ 389 % (value, name))
390
391 - def __setattr__(self, name, value):
392 """Set attribute name to value (PRIVATE). 393 394 This code implements a workaround for a user interface issue. 395 Without this __setattr__ attribute-based assignment of parameters 396 will silently accept invalid parameters, leading to known instances 397 of the user assuming that parameters for the application are set, 398 when they are not. 399 400 >>> from Bio.Emboss.Applications import WaterCommandline 401 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True) 402 >>> cline.asequence = "a.fasta" 403 >>> cline.bsequence = "b.fasta" 404 >>> cline.csequence = "c.fasta" 405 Traceback (most recent call last): 406 ... 407 ValueError: Option name csequence was not found. 408 >>> print cline 409 water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5 410 411 This workaround uses a whitelist of object attributes, and sets the 412 object attribute list as normal, for these. Other attributes are 413 assumed to be parameters, and passed to the self.set_parameter method 414 for validation and assignment. 415 """ 416 if name in ['parameters', 'program_name']: # Allowed attributes 417 self.__dict__[name] = value 418 else: 419 self.set_parameter(name, value) # treat as a parameter
420 421
422 -class _AbstractParameter:
423 """A class to hold information about a parameter for a commandline. 424 425 Do not use this directly, instead use one of the subclasses. 426 """
427 - def __init__(self):
428 raise NotImplementedError
429
430 - def __str__(self):
431 raise NotImplementedError
432
433 -class _Option(_AbstractParameter):
434 """Represent an option that can be set for a program. 435 436 This holds UNIXish options like --append=yes and -a yes, 437 where a value (here "yes") is generally expected. 438 439 For UNIXish options like -kimura in clustalw which don't 440 take a value, use the _Switch object instead. 441 442 Attributes: 443 444 o names -- a list of string names by which the parameter can be 445 referenced (ie. ["-a", "--append", "append"]). The first name in 446 the list is considered to be the one that goes on the commandline, 447 for those parameters that print the option. The last name in the list 448 is assumed to be a "human readable" name describing the option in one 449 word. 450 451 o param_types -- a list of string describing the type of parameter, 452 which can help let programs know how to use it. Example descriptions 453 include 'input', 'output', 'file'. Note that if 'file' is included, 454 these argument values will automatically be escaped if the filename 455 contains spaces. 456 457 o checker_function -- a reference to a function that will determine 458 if a given value is valid for this parameter. This function can either 459 raise an error when given a bad value, or return a [0, 1] decision on 460 whether the value is correct. 461 462 o equate -- should an equals sign be inserted if a value is used? 463 464 o description -- a description of the option. 465 466 o is_required -- a flag to indicate if the parameter must be set for 467 the program to be run. 468 469 o is_set -- if the parameter has been set 470 471 o value -- the value of a parameter 472 """
473 - def __init__(self, names = [], types = [], checker_function = None, 474 is_required = False, description = "", equate=True):
475 self.names = names 476 self.param_types = types 477 self.checker_function = checker_function 478 self.description = description 479 self.equate = equate 480 self.is_required = is_required 481 482 self.is_set = False 483 self.value = None
484
485 - def __str__(self):
486 """Return the value of this option for the commandline. 487 488 Includes a trailing space. 489 """ 490 # Note: Before equate was handled explicitly, the old 491 # code would do either "--name " or "--name=value ", 492 # or " -name " or " -name value ". This choice is now 493 # now made explicitly when setting up the option. 494 if self.value is None: 495 return "%s " % self.names[0] 496 if "file" in self.param_types: 497 v = _escape_filename(self.value) 498 else: 499 v = str(self.value) 500 if self.equate: 501 return "%s=%s " % (self.names[0], v) 502 else: 503 return "%s %s " % (self.names[0], v)
504
505 -class _Switch(_AbstractParameter):
506 """Represent an optional argument switch for a program. 507 508 This holds UNIXish options like -kimura in clustalw which don't 509 take a value, they are either included in the command string 510 or omitted. 511 512 o names -- a list of string names by which the parameter can be 513 referenced (ie. ["-a", "--append", "append"]). The first name in 514 the list is considered to be the one that goes on the commandline, 515 for those parameters that print the option. The last name in the list 516 is assumed to be a "human readable" name describing the option in one 517 word. 518 519 o param_types -- a list of string describing the type of parameter, 520 which can help let programs know how to use it. Example descriptions 521 include 'input', 'output', 'file'. Note that if 'file' is included, 522 these argument values will automatically be escaped if the filename 523 contains spaces. 524 525 o description -- a description of the option. 526 527 o is_set -- if the parameter has been set 528 529 NOTE - There is no value attribute, see is_set instead, 530 """
531 - def __init__(self, names = [], types = [], description = ""):
532 self.names = names 533 self.param_types = types 534 self.description = description 535 self.is_set = False 536 self.is_required = False
537
538 - def __str__(self):
539 """Return the value of this option for the commandline. 540 541 Includes a trailing space. 542 """ 543 assert not hasattr(self, "value") 544 if self.is_set: 545 return "%s " % self.names[0] 546 else: 547 return ""
548
549 -class _Argument(_AbstractParameter):
550 """Represent an argument on a commandline. 551 """
552 - def __init__(self, names = [], types = [], checker_function = None, 553 is_required = False, description = ""):
554 self.names = names 555 self.param_types = types 556 self.checker_function = checker_function 557 self.description = description 558 self.is_required = is_required 559 self.is_set = False 560 self.value = None
561
562 - def __str__(self):
563 if self.value is None: 564 return " " 565 else: 566 return "%s " % self.value
567
568 -def _escape_filename(filename):
569 """Escape filenames with spaces by adding quotes (PRIVATE). 570 571 Note this will not add quotes if they are already included: 572 573 >>> print _escape_filename('example with spaces') 574 "example with spaces" 575 >>> print _escape_filename('"example with spaces"') 576 "example with spaces" 577 """ 578 #Is adding the following helpful 579 #if os.path.isfile(filename): 580 # #On Windows, if the file exists, we can ask for 581 # #its alternative short name (DOS style 8.3 format) 582 # #which has no spaces in it. Note that this name 583 # #is not portable between machines, or even folder! 584 # try: 585 # import win32api 586 # short = win32api.GetShortPathName(filename) 587 # assert os.path.isfile(short) 588 # return short 589 # except ImportError: 590 # pass 591 if " " not in filename: 592 return filename 593 #We'll just quote it - works on Windows, Mac OS X etc 594 if filename.startswith('"') and filename.endswith('"'): 595 #Its already quoted 596 return filename 597 else: 598 return '"%s"' % filename
599
600 -def _test():
601 """Run the Bio.Application module's doctests.""" 602 import doctest 603 doctest.testmod(verbose=1)
604 605 if __name__ == "__main__": 606 #Run the doctests 607 _test() 608