Package pyxb :: Package utils :: Module utility
[hide private]
[frames] | no frames]

Source Code for Module pyxb.utils.utility

   1  # -*- coding: utf-8 -*- 
   2  # Copyright 2009-2013, Peter A. Bigot 
   3  # 
   4  # Licensed under the Apache License, Version 2.0 (the "License"); you may 
   5  # not use this file except in compliance with the License. You may obtain a 
   6  # copy of the License at: 
   7  # 
   8  #            http://www.apache.org/licenses/LICENSE-2.0 
   9  # 
  10  # Unless required by applicable law or agreed to in writing, software 
  11  # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
  12  # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
  13  # License for the specific language governing permissions and limitations 
  14  # under the License. 
  15   
  16  """Utility functions and classes.""" 
  17   
  18  import re 
  19  import os 
  20  import errno 
  21  import pyxb 
  22  from pyxb.utils.six.moves.urllib import parse as urlparse 
  23  import time 
  24  import datetime 
  25  import logging 
  26  from pyxb.utils import six 
  27   
  28  _log = logging.getLogger(__name__) 
29 30 -def BackfillComparisons (cls):
31 """Class decorator that fills in missing ordering methods. 32 33 Concept derived from Python 2.7.5 functools.total_ordering, 34 but this version requires that __eq__ and __lt__ be provided, 35 and unconditionally overrides __ne__, __gt__, __le__, and __ge__ 36 with the derived versions. 37 38 This is still necessary in Python 3 because in Python 3 the 39 comparison x >= y is done by the __ge__ inherited from object, 40 which does not handle the case where x and y are not the same type 41 even if the underlying y < x would convert x to be compatible. """ 42 43 def applyconvert (cls, derived): 44 for (opn, opx) in derived: 45 opx.__name__ = opn 46 opx.__doc__ = getattr(int, opn).__doc__ 47 setattr(cls, opn, opx)
48 49 applyconvert(cls, ( 50 ('__gt__', lambda self, other: not (self.__lt__(other) or self.__eq__(other))), 51 ('__le__', lambda self, other: self.__lt__(other) or self.__eq__(other)), 52 ('__ge__', lambda self, other: not self.__lt__(other)) 53 )) 54 applyconvert(cls, ( 55 ('__ne__', lambda self, other: not self.__eq__(other)), 56 )) 57 return cls 58
59 -def IteratedCompareMixed (lhs, rhs):
60 """Tuple comparison that permits C{None} as lower than any value, 61 and defines other cross-type comparison. 62 63 @return: -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs.""" 64 li = iter(lhs) 65 ri = iter(rhs) 66 while True: 67 try: 68 (lv, rv) = (next(li), next(ri)) 69 if lv is None: 70 if rv is None: 71 continue 72 return -1 73 if rv is None: 74 return 1 75 if lv == rv: 76 continue 77 if lv < rv: 78 return -1 79 return 1 80 except StopIteration: 81 nl = len(lhs) 82 nr = len(rhs) 83 if nl < nr: 84 return -1 85 if nl == nr: 86 return 0 87 return 1
88
89 -def QuotedEscaped (s):
90 """Convert a string into a literal value that can be used in Python source. 91 92 This just calls C{repr}. No point in getting all complex when the language 93 already gives us what we need. 94 95 @rtype: C{str} 96 """ 97 return repr(s)
98
99 -def _DefaultXMLIdentifierToPython (identifier):
100 """Default implementation for _XMLIdentifierToPython 101 102 For historical reasons, this converts the identifier from a str to 103 unicode in the system default encoding. This should have no 104 practical effect. 105 106 @param identifier : some XML identifier 107 108 @return: C{unicode(identifier)} 109 """ 110 111 return six.text_type(identifier)
112
113 -def _SetXMLIdentifierToPython (xml_identifier_to_python):
114 """Configure a callable L{MakeIdentifier} uses to pre-process an XM Lidentifier. 115 116 In Python3, identifiers can be full Unicode tokens, but in Python2, 117 all identifiers must be ASCII characters. L{MakeIdentifier} enforces 118 this by removing all characters that are not valid within an 119 identifier. 120 121 In some cases, an application generating bindings may be able to 122 transliterate Unicode code points that are not valid Python identifier 123 characters into something else. This callable can be assigned to 124 perform that translation before the invalid characters are 125 stripped. 126 127 It is not the responsibility of this callable to do anything other 128 than replace whatever characters it wishes to. All 129 transformations performed by L{MakeIdentifier} will still be 130 applied, to ensure the output is in fact a legal identifier. 131 132 @param xml_identifier_to_python : A callable that takes a string 133 and returns a Unicode, possibly with non-identifier characters 134 replaced by other characters. Pass C{None} to reset to the 135 default implementation, which is L{_DefaultXMLIdentifierToPython}. 136 137 @rtype: C{unicode} 138 """ 139 global _XMLIdentifierToPython 140 if xml_identifier_to_python is None: 141 xml_identifier_to_python = _DefaultXMLIdentifierToPython 142 _XMLIdentifierToPython = xml_identifier_to_python
143 144 _XMLIdentifierToPython = _DefaultXMLIdentifierToPython 145 146 _UnderscoreSubstitute_re = re.compile(r'[- .]') 147 _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]') 148 _PrefixUnderscore_re = re.compile(r'^_+') 149 _PrefixDigit_re = re.compile(r'^\d+') 150 _CamelCase_re = re.compile(r'_\w')
151 152 -def MakeIdentifier (s, camel_case=False):
153 """Convert a string into something suitable to be a Python identifier. 154 155 The string is processed by L{_XMLIdentifierToPython}. Following 156 this, dashes, spaces, and periods are replaced by underscores, and 157 characters not permitted in Python identifiers are stripped. 158 Furthermore, any leading underscores are removed. If the result 159 begins with a digit, the character 'n' is prepended. If the 160 result is the empty string, the string 'emptyString' is 161 substituted. 162 163 No check is made for L{conflicts with keywords <DeconflictKeyword>}. 164 165 @keyword camel_case : If C{True}, any underscore in the result 166 string that is immediately followed by an alphanumeric is replaced 167 by the capitalized version of that alphanumeric. Thus, 168 'one_or_two' becomes 'oneOrTwo'. If C{False} (default), has no 169 effect. 170 171 @rtype: C{str} 172 """ 173 s = _XMLIdentifierToPython(s) 174 s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('', _UnderscoreSubstitute_re.sub('_', s))) 175 if camel_case: 176 s = _CamelCase_re.sub(lambda _m: _m.group(0)[1].upper(), s) 177 if _PrefixDigit_re.match(s): 178 s = 'n' + s 179 if 0 == len(s): 180 s = 'emptyString' 181 return s
182 183 _PythonKeywords = frozenset( ( 184 "and", "as", "assert", "break", "class", "continue", "def", "del", 185 "elif", "else", "except", "exec", "finally", "for", "from", "global", 186 "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", 187 "raise", "return", "try", "while", "with", "yield" 188 ) ) 189 """Python keywords. Note that types like int and float are not 190 keywords. 191 192 @see: U{http://docs.python.org/reference/lexical_analysis.html#keywords}.""" 193 194 _PythonBuiltInConstants = frozenset( ( 195 "False", "True", "None", "NotImplemented", "Ellipsis", "__debug__", 196 # "set" is neither a keyword nor a constant, but if some fool 197 # like {http://www.w3.org/2001/SMIL20/}set gets defined there's 198 # no way to access the builtin constructor. 199 "set" 200 ) ) 201 """Other symbols that aren't keywords but that can't be used. 202 203 @see: U{http://docs.python.org/library/constants.html}.""" 204 205 _Keywords = frozenset(_PythonKeywords.union(_PythonBuiltInConstants)) 206 """The keywords reserved for Python, derived from L{_PythonKeywords} 207 and L{_PythonBuiltInConstants}."""
208 209 -def DeconflictKeyword (s, aux_keywords=frozenset()):
210 """If the provided string C{s} matches a Python language keyword, 211 append an underscore to distinguish them. 212 213 See also L{MakeUnique}. 214 215 @param s: string to be deconflicted 216 217 @keyword aux_keywords: optional iterable of additional strings 218 that should be treated as keywords. 219 220 @rtype: C{str} 221 222 """ 223 if (s in _Keywords) or (s in aux_keywords): 224 return '%s_' % (s,) 225 return s
226
227 -def MakeUnique (s, in_use):
228 """Return an identifier based on C{s} that is not in the given set. 229 230 The returned identifier is made unique by appending an underscore 231 and, if necessary, a serial number. 232 233 The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ... 234 235 @param in_use: The set of identifiers already in use in the 236 relevant scope. C{in_use} is updated to contain the returned 237 identifier. 238 239 @rtype: C{str} 240 """ 241 if s in in_use: 242 ctr = 2 243 s = s.rstrip('_') 244 candidate = '%s_' % (s,) 245 while candidate in in_use: 246 candidate = '%s_%d' % (s, ctr) 247 ctr += 1 248 s = candidate 249 in_use.add(s) 250 return s
251
252 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False):
253 """Combine everything required to create a unique identifier. 254 255 Leading and trailing underscores are stripped from all 256 identifiers. 257 258 @param in_use: the set of already used identifiers. Upon return 259 from this function, it is updated to include the returned 260 identifier. 261 262 @keyword aux_keywords: an optional set of additional symbols that 263 are illegal in the given context; use this to prevent conflicts 264 with known method names. 265 266 @keyword private: if C{False} (default), all leading underscores 267 are stripped, guaranteeing the identifier will not be private. If 268 C{True}, the returned identifier has two leading underscores, 269 making it a private variable within a Python class. 270 271 @keyword protected: as for C{private}, but uses only one 272 underscore. 273 274 @rtype: C{str} 275 276 @note: Only module-level identifiers should be treated as 277 protected. The class-level L{_DeconflictSymbols_mixin} 278 infrastructure does not include protected symbols. All class and 279 instance members beginning with a single underscore are reserved 280 for the PyXB infrastructure.""" 281 s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords) 282 if private: 283 s = '__' + s 284 elif protected: 285 s = '_' + s 286 return MakeUnique(s, in_use)
287
288 # @todo: descend from pyxb.cscRoot, if we import pyxb 289 -class _DeconflictSymbols_mixin (object):
290 """Mix-in used to deconflict public symbols in classes that may be 291 inherited by generated binding classes. 292 293 Some classes, like the L{pyxb.binding.basis.element} or 294 L{pyxb.binding.basis.simpleTypeDefinition} classes in 295 L{pyxb.binding.basis}, have public symbols associated with 296 functions and variables. It is possible that an XML schema might 297 include tags and attribute names that match these symbols. To 298 avoid conflict, the reserved symbols marked in this class are 299 added to the pre-defined identifier set. 300 301 Subclasses should create a class-level variable 302 C{_ReservedSymbols} that contains a set of strings denoting the 303 symbols reserved in this class, combined with those from any 304 superclasses that also have reserved symbols. Code like the 305 following is suggested:: 306 307 # For base classes (direct mix-in): 308 _ReservedSymbols = set([ 'one', 'two' ]) 309 # For subclasses: 310 _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ])) 311 312 Only public symbols (those with no underscores) are currently 313 supported. (Private symbols can't be deconflicted that easily, 314 and no protected symbols that derive from the XML are created by 315 the binding generator.) 316 """ 317 318 _ReservedSymbols = set() 319 """There are no reserved symbols in the base class."""
320 321 # Regular expression detecting tabs, carriage returns, and line feeds 322 __TabCRLF_re = re.compile("[\t\n\r]") 323 # Regular expressoin detecting sequences of two or more spaces 324 __MultiSpace_re = re.compile(" +")
325 326 -def NormalizeWhitespace (text, preserve=False, replace=False, collapse=False):
327 """Normalize the given string. 328 329 Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword 330 parameters must be assigned the value C{True} by the caller. 331 332 - C{preserve}: the text is returned unchanged. 333 334 - C{replace}: all tabs, newlines, and carriage returns are 335 replaced with ASCII spaces. 336 337 - C{collapse}: the C{replace} normalization is done, then 338 sequences of two or more spaces are replaced by a single space. 339 340 See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}. 341 342 @rtype: C{str} 343 """ 344 if preserve: 345 return text 346 text = __TabCRLF_re.sub(' ', text) 347 if replace: 348 return text 349 if collapse: 350 return __MultiSpace_re.sub(' ', text).strip() 351 # pyxb not imported here; could be. 352 raise Exception('NormalizeWhitespace: No normalization specified')
353
354 -class Graph:
355 """Represent a directed graph with arbitrary objects as nodes. 356 357 This is used in the L{code 358 generator<pyxb.binding.generate.Generator>} to determine order 359 dependencies among components within a namespace, and schema that 360 comprise various namespaces. An edge from C{source} to C{target} 361 indicates that some aspect of C{source} requires that some aspect 362 of C{target} already be available. 363 """ 364
365 - def __init__ (self, root=None):
366 self.__roots = None 367 if root is not None: 368 self.__roots = set([root]) 369 self.__edges = set() 370 self.__edgeMap = { } 371 self.__reverseMap = { } 372 self.__nodes = set()
373 374 __scc = None 375 __sccMap = None 376 __dfsOrder = None 377
378 - def addEdge (self, source, target):
379 """Add a directed edge from the C{source} to the C{target}. 380 381 The nodes are added to the graph if necessary. 382 """ 383 self.__edges.add( (source, target) ) 384 self.__edgeMap.setdefault(source, set()).add(target) 385 if source != target: 386 self.__reverseMap.setdefault(target, set()).add(source) 387 self.__nodes.add(source) 388 self.__nodes.add(target)
389
390 - def addNode (self, node):
391 """Add the given node to the graph.""" 392 self.__nodes.add(node)
393 394 __roots = None
395 - def roots (self, reset=False):
396 """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges). 397 398 This caches the roots calculated in a previous invocation 399 unless the C{reset} keyword is given the value C{True}. 400 401 @note: Upon reset, any notes that had been manually added 402 using L{addNode} will no longer be in the set. 403 404 @keyword reset: If C{True}, any cached value is discarded and 405 recomputed. No effect if C{False} (defalut). 406 407 @rtype: C{set} 408 """ 409 if reset or (self.__roots is None): 410 self.__roots = set() 411 for n in self.__nodes: 412 if not (n in self.__reverseMap): 413 self.__roots.add(n) 414 return self.__roots
415 - def addRoot (self, root):
416 """Add the provided node as a root node, even if it has incoming edges. 417 418 The node need not be present in the graph (if necessary, it is added). 419 420 Note that roots added in this way do not survive a reset using 421 L{roots}. 422 423 @return: C{self} 424 """ 425 if self.__roots is None: 426 self.__roots = set() 427 self.__nodes.add(root) 428 self.__roots.add(root) 429 return self
430
431 - def edgeMap (self):
432 """Return the edges in the graph. 433 434 The edge data structure is a map from the source node to the 435 set of nodes that can be reached in a single step from the 436 source. 437 """ 438 return self.__edgeMap
439 __edgeMap = None 440
441 - def edges (self):
442 """Return the edges in the graph. 443 444 The edge data structure is a set of node pairs represented as C{( source, target )}. 445 """ 446 return self.__edges
447
448 - def nodes (self):
449 """Return the set of nodes in the graph. 450 451 The node collection data structure is a set containing node 452 objects, whatever they may be.""" 453 return self.__nodes
454
455 - def tarjan (self, reset=False):
456 """Execute Tarjan's algorithm on the graph. 457 458 U{Tarjan's 459 algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>} 460 computes the U{strongly-connected 461 components<http://en.wikipedia.org/wiki/Strongly_connected_component>} 462 of the graph: i.e., the sets of nodes that form a minimal 463 closed set under edge transition. In essence, the loops. We 464 use this to detect groups of components that have a dependency 465 cycle. 466 467 @keyword reset: If C{True}, any cached component set is erased 468 and recomputed. If C{True}, an existing previous result is 469 left unchanged.""" 470 471 if (self.__scc is not None) and (not reset): 472 return 473 self.__sccMap = { } 474 self.__stack = [] 475 self.__sccOrder = [] 476 self.__scc = [] 477 self.__index = 0 478 self.__tarjanIndex = { } 479 self.__tarjanLowLink = { } 480 for v in self.__nodes: 481 self.__tarjanIndex[v] = None 482 roots = self.roots() 483 if (0 == len(roots)) and (0 < len(self.__nodes)): 484 raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),)) 485 for r in roots: 486 self._tarjan(r) 487 self.__didTarjan = True
488
489 - def _tarjan (self, v):
490 """Do the work of Tarjan's algorithm for a given root node.""" 491 if self.__tarjanIndex.get(v) is not None: 492 # "Root" was already reached. 493 return 494 self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index 495 self.__index += 1 496 self.__stack.append(v) 497 source = v 498 for target in self.__edgeMap.get(source, []): 499 if self.__tarjanIndex[target] is None: 500 self._tarjan(target) 501 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 502 elif target in self.__stack: 503 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 504 else: 505 pass 506 507 if self.__tarjanLowLink[v] == self.__tarjanIndex[v]: 508 scc = [] 509 while True: 510 scc.append(self.__stack.pop()) 511 if v == scc[-1]: 512 break 513 self.__sccOrder.append(scc) 514 if 1 < len(scc): 515 self.__scc.append(scc) 516 [ self.__sccMap.setdefault(_v, scc) for _v in scc ]
517
518 - def scc (self, reset=False):
519 """Return the strongly-connected components of the graph. 520 521 The data structure is a set, each element of which is itself a 522 set containing one or more nodes from the graph. 523 524 @see: L{tarjan}. 525 """ 526 if reset or (self.__scc is None): 527 self.tarjan(reset) 528 return self.__scc
529 __scc = None 530
531 - def sccMap (self, reset=False):
532 """Return a map from nodes to the strongly-connected component 533 to which the node belongs. 534 535 @keyword reset: If C{True}, the L{tarjan} method will be 536 re-invoked, propagating the C{reset} value. If C{False} 537 (default), a cached value will be returned if available. 538 539 @see: L{tarjan}. 540 """ 541 if reset or (self.__sccMap is None): 542 self.tarjan(reset) 543 return self.__sccMap
544 __sccMap = None 545
546 - def sccOrder (self, reset=False):
547 """Return the strongly-connected components in order. 548 549 The data structure is a list, in dependency order, of strongly 550 connected components (which can be single nodes). Appearance 551 of a node in a set earlier in the list indicates that it has 552 no dependencies on any node that appears in a subsequent set. 553 This order is preferred over L{dfsOrder} for code generation, 554 since it detects loops. 555 556 @see: L{tarjan}. 557 """ 558 if reset or (self.__sccOrder is None): 559 self.tarjan(reset) 560 return self.__sccOrder
561 __sccOrder = None 562
563 - def sccForNode (self, node, **kw):
564 """Return the strongly-connected component to which the given 565 node belongs. 566 567 Any keywords suppliend when invoking this method are passed to 568 the L{sccMap} method. 569 570 @return: The SCC set, or C{None} if the node is not present in 571 the results of Tarjan's algorithm.""" 572 573 return self.sccMap(**kw).get(node)
574
575 - def cyclomaticComplexity (self):
576 """Return the cyclomatic complexity of the graph.""" 577 self.tarjan() 578 return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc)
579
580 - def __dfsWalk (self, source):
581 assert not (source in self.__dfsWalked) 582 self.__dfsWalked.add(source) 583 for target in self.__edgeMap.get(source, []): 584 if not (target in self.__dfsWalked): 585 self.__dfsWalk(target) 586 self.__dfsOrder.append(source)
587
588 - def _generateDOT (self, title='UNKNOWN', labeller=None):
589 node_map = { } 590 idx = 1 591 for n in self.__nodes: 592 node_map[n] = idx 593 idx += 1 594 text = [] 595 text.append('digraph "%s" {' % (title,)) 596 for n in self.__nodes: 597 if labeller is not None: 598 nn = labeller(n) 599 else: 600 nn = str(n) 601 text.append('%s [shape=box,label="%s"];' % (node_map[n], nn)) 602 for s in self.__nodes: 603 for d in self.__edgeMap.get(s, []): 604 if s != d: 605 text.append('%s -> %s;' % (node_map[s], node_map[d])) 606 text.append("};") 607 return "\n".join(text)
608
609 - def dfsOrder (self, reset=False):
610 """Return the nodes of the graph in U{depth-first-search 611 order<http://en.wikipedia.org/wiki/Depth-first_search>}. 612 613 The data structure is a list. Calculated lists are retained 614 and returned on future invocations, subject to the C{reset} 615 keyword. 616 617 @keyword reset: If C{True}, discard cached results and recompute the order.""" 618 if reset or (self.__dfsOrder is None): 619 self.__dfsWalked = set() 620 self.__dfsOrder = [] 621 for root in self.roots(reset=reset): 622 self.__dfsWalk(root) 623 self.__dfsWalked = None 624 if len(self.__dfsOrder) != len(self.__nodes): 625 raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes))) 626 return self.__dfsOrder
627
628 - def rootSetOrder (self):
629 """Return the nodes of the graph as a sequence of root sets. 630 631 The first root set is the set of nodes that are roots: i.e., 632 have no incoming edges. The second root set is the set of 633 nodes that have incoming nodes in the first root set. This 634 continues until all nodes have been reached. The sets impose 635 a partial order on the nodes, without being as constraining as 636 L{sccOrder}. 637 638 @return: a list of the root sets.""" 639 order = [] 640 nodes = set(self.__nodes) 641 edge_map = {} 642 for (d, srcs) in six.iteritems(self.__edgeMap): 643 edge_map[d] = srcs.copy() 644 while nodes: 645 freeset = set() 646 for n in nodes: 647 if not (n in edge_map): 648 freeset.add(n) 649 if 0 == len(freeset): 650 _log.error('dependency cycle in named components') 651 return None 652 order.append(freeset) 653 nodes.difference_update(freeset) 654 new_edge_map = {} 655 for (d, srcs) in six.iteritems(edge_map): 656 srcs.difference_update(freeset) 657 if 0 != len(srcs): 658 new_edge_map[d] = srcs 659 edge_map = new_edge_map 660 return order
661 662 LocationPrefixRewriteMap_ = { }
663 664 -def SetLocationPrefixRewriteMap (prefix_map):
665 """Set the map that is used to by L{NormalizeLocation} to rewrite URI prefixes.""" 666 667 LocationPrefixRewriteMap_.clear() 668 LocationPrefixRewriteMap_.update(prefix_map)
669
670 -def NormalizeLocation (uri, parent_uri=None, prefix_map=None):
671 """Normalize a URI against an optional parent_uri in the way that is 672 done for C{schemaLocation} attribute values. 673 674 If no URI schema is present, this will normalize a file system 675 path. 676 677 Optionally, the resulting absolute URI can subsequently be 678 rewritten to replace specified prefix strings with alternative 679 strings, e.g. to convert a remote URI to a local repository. This 680 rewriting is done after the conversion to an absolute URI, but 681 before normalizing file system URIs. 682 683 @param uri : The URI to normalize. If C{None}, function returns 684 C{None} 685 686 @param parent_uri : The base URI against which normalization is 687 done, if C{uri} is a relative URI. 688 689 @param prefix_map : A map used to rewrite URI prefixes. If 690 C{None}, the value defaults to that stored by 691 L{SetLocationPrefixRewriteMap}. 692 693 """ 694 if uri is None: 695 return uri 696 if parent_uri is None: 697 abs_uri = uri 698 else: 699 #if (0 > parent_uri.find(':')) and (not parent_uri.endswith(os.sep)): 700 # parent_uri = parent_uri + os.sep 701 abs_uri = urlparse.urljoin(parent_uri, uri) 702 if prefix_map is None: 703 prefix_map = LocationPrefixRewriteMap_ 704 for (pfx, sub) in six.iteritems(prefix_map): 705 if abs_uri.startswith(pfx): 706 abs_uri = sub + abs_uri[len(pfx):] 707 if 0 > abs_uri.find(':'): 708 abs_uri = os.path.realpath(abs_uri) 709 return abs_uri
710
711 712 -def DataFromURI (uri, archive_directory=None):
713 """Retrieve the contents of the uri as raw data. 714 715 If the uri does not include a scheme (e.g., C{http:}), it is 716 assumed to be a file path on the local system.""" 717 718 from pyxb.utils.six.moves.urllib.request import urlopen 719 stream = None 720 exc = None 721 # Only something that has a colon is a non-file URI. Some things 722 # that have a colon are a file URI (sans schema). Prefer urllib2, 723 # but allow urllib (which apparently works better on Windows). 724 if 0 <= uri.find(':'): 725 try: 726 stream = urlopen(uri) 727 except Exception as e: 728 exc = e 729 if (stream is None) and six.PY2: 730 import urllib 731 try: 732 stream = urllib.urlopen(uri) 733 exc = None 734 except: 735 # Prefer urllib exception 736 pass 737 if stream is None: 738 # No go as URI; give file a chance 739 try: 740 stream = open(uri, 'rb') 741 exc = None 742 except Exception as e: 743 if exc is None: 744 exc = e 745 if exc is not None: 746 _log.error('open %s', uri, exc_info=exc) 747 raise exc 748 try: 749 # Protect this in case whatever stream is doesn't have an fp 750 # attribute. 751 if isinstance(stream, six.file) or isinstance(stream.fp, six.file): 752 archive_directory = None 753 except: 754 pass 755 xmld = stream.read() 756 if archive_directory: 757 base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2])) 758 counter = 1 759 dest_file = os.path.join(archive_directory, base_name) 760 while os.path.isfile(dest_file): 761 dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter)) 762 counter += 1 763 try: 764 OpenOrCreate(dest_file).write(xmld) 765 except OSError as e: 766 _log.warning('Unable to save %s in %s: %s', uri, dest_file, e) 767 return xmld
768
769 -def OpenOrCreate (file_name, tag=None, preserve_contents=False):
770 """Return a file object used to write binary data into the given file. 771 772 Use the C{tag} keyword to preserve the contents of existing files 773 that are not supposed to be overwritten. 774 775 To get a writable file but leaving any existing contents in place, 776 set the C{preserve_contents} keyword to C{True}. Normally, existing file 777 contents are erased. 778 779 The returned file pointer is positioned at the end of the file. 780 781 @keyword tag: If not C{None} and the file already exists, absence 782 of the given value in the first 4096 bytes of the file (decoded as 783 UTF-8) causes an C{IOError} to be raised with C{errno} set to 784 C{EEXIST}. I.e., only files with this value in the first 4KB will 785 be returned for writing. 786 787 @keyword preserve_contents: This value controls whether existing 788 contents of the file will be erased (C{False}, default) or left in 789 place (C{True}). 790 """ 791 (path, leaf) = os.path.split(file_name) 792 if path: 793 try: 794 os.makedirs(path) 795 except Exception as e: 796 if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)): 797 raise 798 fp = open(file_name, 'ab+') 799 if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size): 800 fp.seek(0) # os.SEEK_SET 801 blockd = fp.read(4096) 802 blockt = blockd.decode('utf-8') 803 if 0 > blockt.find(tag): 804 raise OSError(errno.EEXIST, os.strerror(errno.EEXIST)) 805 if not preserve_contents: 806 fp.seek(0) # os.SEEK_SET 807 fp.truncate() 808 else: 809 fp.seek(2) # os.SEEK_END 810 return fp
811 812 # hashlib didn't show up until 2.5, and sha is deprecated in 2.6. 813 __Hasher = None 814 try: 815 import hashlib 816 __Hasher = hashlib.sha1 817 except ImportError: 818 import sha 819 __Hasher = sha.new
820 821 -def HashForText (text):
822 """Calculate a cryptographic hash of the given string. 823 824 For example, this is used to verify that a given module file 825 contains bindings from a previous generation run for the same 826 namespace. See L{OpenOrCreate}. If the text is in Unicode, the 827 hash is calculated on the UTF-8 encoding of the text. 828 829 @return: A C{str}, generally a sequence of hexadecimal "digit"s. 830 """ 831 if isinstance(text, six.text_type): 832 text = text.encode('utf-8') 833 return __Hasher(text).hexdigest()
834 835 # uuid didn't show up until 2.5 836 __HaveUUID = False 837 try: 838 import uuid 839 __HaveUUID = True 840 except ImportError: 841 import random
842 -def _NewUUIDString ():
843 """Obtain a UUID using the best available method. On a version of 844 python that does not incorporate the C{uuid} class, this creates a 845 string combining the current date and time (to the second) with a 846 random number. 847 848 @rtype: C{str} 849 """ 850 if __HaveUUID: 851 return uuid.uuid1().urn 852 return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFF))
853
854 -class UniqueIdentifier (object):
855 """Records a unique identifier, generally associated with a 856 binding generation action. 857 858 The identifier is a string, but gets wrapped in an instance of 859 this class to optimize comparisons and reduce memory footprint. 860 861 Invoking the constructor for this class on the same string 862 multiple times will return the same Python object. 863 864 An instance of this class compares equal to, and hashes equivalent 865 to, the uid string. When C{str}'d, the result is the uid; when 866 C{repr}'d, the result is a constructor call to 867 C{pyxb.utils.utility.UniqueIdentifier}. 868 """ 869 870 # A map from UID string to the instance that represents it 871 __ExistingUIDs = {} 872
873 - def uid (self):
874 """The string unique identifier""" 875 return self.__uid
876 __uid = None 877 878 # Support pickling, which is done using only the UID.
879 - def __getnewargs__ (self):
880 return (self.__uid,)
881
882 - def __getstate__ (self):
883 return self.__uid
884
885 - def __setstate__ (self, state):
886 assert self.__uid == state
887 888 # Singleton-like
889 - def __new__ (cls, *args):
890 if 0 == len(args): 891 uid = _NewUUIDString() 892 else: 893 uid = args[0] 894 if isinstance(uid, UniqueIdentifier): 895 uid = uid.uid() 896 if not isinstance(uid, six.string_types): 897 raise TypeError('UniqueIdentifier uid must be a string') 898 rv = cls.__ExistingUIDs.get(uid) 899 if rv is None: 900 rv = super(UniqueIdentifier, cls).__new__(cls) 901 rv.__uid = uid 902 cls.__ExistingUIDs[uid] = rv 903 return rv
904
905 - def associateObject (self, obj):
906 """Associate the given object witth this identifier. 907 908 This is a one-way association: the object is not provided with 909 a return path to this identifier instance.""" 910 self.__associatedObjects.add(obj)
911 - def associatedObjects (self):
912 """The set of objects that have been associated with this 913 identifier instance.""" 914 return self.__associatedObjects
915 __associatedObjects = None 916
917 - def __init__ (self, uid=None):
918 """Create a new UniqueIdentifier instance. 919 920 @param uid: The unique identifier string. If present, it is 921 the callers responsibility to ensure the value is universally 922 unique. If C{None}, one will be provided. 923 @type uid: C{str} or C{unicode} 924 """ 925 assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid()) 926 self.__associatedObjects = set()
927
928 - def __eq__ (self, other):
929 if other is None: 930 return False 931 elif isinstance(other, UniqueIdentifier): 932 other_uid = other.uid() 933 elif isinstance(other, six.string_types): 934 other_uid = other 935 else: 936 raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),)) 937 return self.uid() == other_uid
938
939 - def __hash__ (self):
940 return hash(self.uid())
941
942 - def __str__ (self):
943 return self.uid()
944
945 - def __repr__ (self):
946 return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),)
947
948 @BackfillComparisons 949 -class UTCOffsetTimeZone (datetime.tzinfo):
950 """A C{datetime.tzinfo} subclass that helps deal with UTC 951 conversions in an ISO8601 world. 952 953 This class only supports fixed offsets from UTC. 954 """ 955 956 # Regular expression that matches valid ISO8601 time zone suffixes 957 __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$') 958 959 # The offset in minutes east of UTC. 960 __utcOffset_min = 0 961 962 # Same as __utcOffset_min, but as a datetime.timedelta 963 __utcOffset_td = None 964 965 # A zero-length duration 966 __ZeroDuration = datetime.timedelta(0) 967 968 # Range limits 969 __MaxOffset_td = datetime.timedelta(hours=14) 970
971 - def __init__ (self, spec=None):
972 """Create a time zone instance with a fixed offset from UTC. 973 974 @param spec: Specifies the offset. Can be an integer counting 975 minutes east of UTC, the value C{None} (equal to 0 minutes 976 east), or a string that conform to the ISO8601 time zone 977 sequence (B{Z}, or B{[+-]HH:MM}). 978 """ 979 980 if spec is not None: 981 if isinstance(spec, six.string_types): 982 if 'Z' == spec: 983 self.__utcOffset_min = 0 984 else: 985 match = self.__Lexical_re.match(spec) 986 if match is None: 987 raise ValueError('Bad time zone: %s' % (spec,)) 988 self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3)) 989 if '-' == match.group(1): 990 self.__utcOffset_min = - self.__utcOffset_min 991 elif isinstance(spec, int): 992 self.__utcOffset_min = spec 993 elif isinstance(spec, datetime.timedelta): 994 self.__utcOffset_min = spec.seconds // 60 995 else: 996 raise TypeError('%s: unexpected type %s' % (type(self), type(spec))) 997 self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min) 998 if self.__utcOffset_td < -self.__MaxOffset_td or self.__utcOffset_td > self.__MaxOffset_td: 999 raise ValueError('XSD timezone offset %s larger than %s' % (self.__utcOffset_td, self.__MaxOffset_td)) 1000 if 0 == self.__utcOffset_min: 1001 self.__tzName = 'Z' 1002 elif 0 > self.__utcOffset_min: 1003 self.__tzName = '-%02d:%02d' % divmod(-self.__utcOffset_min, 60) 1004 else: 1005 self.__tzName = '+%02d:%02d' % divmod(self.__utcOffset_min, 60)
1006
1007 - def utcoffset (self, dt):
1008 """Returns the constant offset for this zone.""" 1009 return self.__utcOffset_td
1010
1011 - def tzname (self, dt):
1012 """Return the name of the timezone in the format expected by XML Schema.""" 1013 return self.__tzName
1014
1015 - def dst (self, dt):
1016 """Returns a constant zero duration.""" 1017 return self.__ZeroDuration
1018
1019 - def __otherForComparison (self, other):
1020 if isinstance(other, UTCOffsetTimeZone): 1021 return other.__utcOffset_min 1022 return other.utcoffset(datetime.datetime.now())
1023
1024 - def __hash__ (self):
1025 return hash(self.__utcOffset_min)
1026
1027 - def __eq__ (self, other):
1028 return self.__utcOffset_min == self.__otherForComparison(other)
1029
1030 - def __lt__ (self, other):
1031 return self.__utcOffset_min < self.__otherForComparison(other)
1032
1033 -class LocalTimeZone (datetime.tzinfo):
1034 """A C{datetime.tzinfo} subclass for the local time zone. 1035 1036 Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1. 1037 """ 1038 1039 __STDOffset = datetime.timedelta(seconds=-time.timezone) 1040 __DSTOffset = __STDOffset 1041 if time.daylight: 1042 __DSTOffset = datetime.timedelta(seconds=-time.altzone) 1043 __ZeroDelta = datetime.timedelta(0) 1044 __DSTDelta = __DSTOffset - __STDOffset 1045
1046 - def utcoffset (self, dt):
1047 if self.__isDST(dt): 1048 return self.__DSTOffset 1049 return self.__STDOffset
1050
1051 - def dst (self, dt):
1052 if self.__isDST(dt): 1053 return self.__DSTDelta 1054 return self.__ZeroDelta
1055
1056 - def tzname (self, dt):
1057 return time.tzname[self.__isDST(dt)]
1058
1059 - def __isDST (self, dt):
1060 tt = (dt.year, dt.month, dt.day, 1061 dt.hour, dt.minute, dt.second, 1062 0, 0, -1) 1063 tt = time.localtime(time.mktime(tt)) 1064 return tt.tm_isdst > 0
1065
1066 -class PrivateTransient_mixin (pyxb.cscRoot):
1067 """Emulate the B{transient} keyword from Java for private member 1068 variables. 1069 1070 This class defines a C{__getstate__} method which returns a copy 1071 of C{self.__dict__} with certain members removed. Specifically, 1072 if a string "s" appears in a class member variable named 1073 C{__PrivateTransient} defined in the "Class" class, then the 1074 corresponding private variable "_Class__s" will be removed from 1075 the state dictionary. This is used to eliminate unnecessary 1076 fields from instances placed in L{namespace 1077 archives<pyxb.namespace.archive.NamespaceArchive>} without having 1078 to implement a C{__getstate__} method in every class in the 1079 instance hierarchy. 1080 1081 For an example, see 1082 L{pyxb.xmlschema.structures._SchemaComponent_mixin} 1083 1084 If you use this, it is your responsibility to define the 1085 C{__PrivateTransient} class variable and add to it the required 1086 variable names. 1087 1088 Classes that inherit from this are free to define their own 1089 C{__getstate__} method, which may or may not invoke the superclass 1090 one. If you do this, be sure that the class defining 1091 C{__getstate__} lists L{PrivateTransient_mixin} as one of its 1092 direct superclasses, lest the latter end up earlier in the mro and 1093 consequently bypass the local override. 1094 """ 1095 1096 # Suffix used when creating the class member variable in which the 1097 # transient members are cached. 1098 __Attribute = '__PrivateTransient' 1099
1100 - def __getstate__ (self):
1101 state = self.__dict__.copy() 1102 # Note that the aggregate set is stored in a class variable 1103 # with a slightly different name than the class-level set. 1104 attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute) 1105 skipped = getattr(self.__class__, attr, None) 1106 if skipped is None: 1107 skipped = set() 1108 for cl in self.__class__.mro(): 1109 for (k, v) in six.iteritems(cl.__dict__): 1110 if k.endswith(self.__Attribute): 1111 cl2 = k[:-len(self.__Attribute)] 1112 skipped.update([ '%s__%s' % (cl2, _n) for _n in v ]) 1113 setattr(self.__class__, attr, skipped) 1114 for k in skipped: 1115 if state.get(k) is not None: 1116 del state[k] 1117 # Uncomment the following to test whether undesirable types 1118 # are being pickled, generally by accidently leaving a 1119 # reference to one in an instance private member. 1120 #for (k, v) in six.iteritems(state): 1121 # import pyxb.namespace 1122 # import xml.dom 1123 # import pyxb.xmlschema.structures 1124 # if isinstance(v, (pyxb.namespace.NamespaceContext, xml.dom.Node, pyxb.xmlschema.structures.Schema)): 1125 # raise pyxb.LogicError('Unexpected instance of %s key %s in %s' % (type(v), k, self)) 1126 1127 return state
1128
1129 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None):
1130 """Provide a list of absolute paths to files present in any of a 1131 set of directories and meeting certain criteria. 1132 1133 This is used, for example, to locate namespace archive files 1134 within the archive path specified by the user. One could use:: 1135 1136 files = GetMatchingFiles('&bundles//:+', 1137 pattern=re.compile('.*\.wxs$'), 1138 default_path_wildcard='+', 1139 default_path='/usr/local/pyxb/nsarchives', 1140 prefix_pattern='&', 1141 prefix_substituend='/opt/pyxb') 1142 1143 to obtain all files that can be recursively found within 1144 C{/opt/pyxb/bundles}, or non-recursively within 1145 C{/usr/local/pyxb/nsarchives}. 1146 1147 @param path: A list of directories in which the search should be 1148 performed. The entries are separated by os.pathsep, which is a 1149 colon on POSIX platforms and a semi-colon on Windows. If a path 1150 entry ends with C{//} regardless of platform, the suffix C{//} is 1151 stripped and any directory beneath the path is scanned as well, 1152 recursively. 1153 1154 @keyword pattern: Optional regular expression object used to 1155 determine whether a given directory entry should be returned. If 1156 left as C{None}, all directory entries will be returned. 1157 1158 @keyword default_path_wildcard: An optional string which, if 1159 present as a single directory in the path, is replaced by the 1160 value of C{default-path}. 1161 1162 @keyword default_path: A system-defined directory which can be 1163 restored to the path by placing the C{default_path_wildcard} in 1164 the C{path}. 1165 1166 @keyword prefix_pattern: An optional string which, if present at 1167 the start of a path element, is replaced by the value of 1168 C{prefix_substituend}. 1169 1170 @keyword prefix_substituend: A system-defined string (path prefix) 1171 which can be combined with the user-provided path information to 1172 identify a file or subdirectory within an installation-specific 1173 area. 1174 """ 1175 matching_files = [] 1176 path_set = path.split(os.pathsep) 1177 while 0 < len(path_set): 1178 path = path_set.pop(0) 1179 if default_path_wildcard == path: 1180 if default_path is not None: 1181 path_set[0:0] = default_path.split(os.pathsep) 1182 default_path = None 1183 continue 1184 recursive = False 1185 if (prefix_pattern is not None) and path.startswith(prefix_pattern): 1186 path = os.path.join(prefix_substituend, path[len(prefix_pattern):]) 1187 if path.endswith('//'): 1188 recursive = True 1189 path = path[:-2] 1190 if os.path.isfile(path): 1191 if (pattern is None) or (pattern.search(path) is not None): 1192 matching_files.append(path) 1193 else: 1194 for (root, dirs, files) in os.walk(path): 1195 for f in files: 1196 if (pattern is None) or (pattern.search(f) is not None): 1197 matching_files.append(os.path.join(root, f)) 1198 if not recursive: 1199 break 1200 return matching_files
1201
1202 @BackfillComparisons 1203 -class Location (object):
1204 __locationBase = None 1205 __lineNumber = None 1206 __columnNumber = None 1207
1208 - def __init__ (self, location_base=None, line_number=None, column_number=None):
1209 if isinstance(location_base, str): 1210 location_base = six.intern(location_base) 1211 self.__locationBase = location_base 1212 self.__lineNumber = line_number 1213 self.__columnNumber = column_number
1214
1215 - def newLocation (self, locator=None, line_number=None, column_number=None):
1216 if locator is not None: 1217 try: 1218 line_number = locator.getLineNumber() 1219 column_number = locator.getColumnNumber() 1220 except: 1221 pass 1222 return Location(self.__locationBase, line_number, column_number)
1223 1224 locationBase = property(lambda _s: _s.__locationBase) 1225 lineNumber = property(lambda _s: _s.__lineNumber) 1226 columnNumber = property(lambda _s: _s.__columnNumber) 1227
1228 - def __cmpSingleUnlessNone (self, v1, v2):
1229 if v1 is None: 1230 if v2 is None: 1231 return None 1232 return 1 1233 if v2 is None: 1234 return -1 1235 if v1 < v2: 1236 return -1 1237 if v1 == v2: 1238 return 0 1239 return 1
1240
1241 - def __cmpTupleUnlessNone (self, v1, v2):
1242 rv = self.__cmpSingleUnlessNone(v1.__locationBase, v2.__locationBase) 1243 if rv is None: 1244 rv = self.__cmpSingleUnlessNone(v1.__lineNumber, v2.__lineNumber) 1245 if rv is None: 1246 rv = self.__cmpSingleUnlessNone(v1.__columnNumber, v2.__columnNumber) 1247 return rv
1248
1249 - def __hash__ (self):
1250 return hash((self.__locationBase, self.__lineNumber, self.__columnNumber))
1251
1252 - def __eq__ (self, other):
1253 """Comparison by locationBase, then lineNumber, then columnNumber.""" 1254 if other is None: 1255 return False 1256 rv = self.__cmpTupleUnlessNone(self, other) 1257 if rv is None: 1258 return True 1259 return 0 == rv
1260
1261 - def __lt__ (self, other):
1262 if other is None: 1263 return False 1264 rv = self.__cmpTupleUnlessNone(self, other) 1265 if rv is None: 1266 return False 1267 return -1 == rv
1268
1269 - def __str__ (self):
1270 if self.locationBase is None: 1271 lb = '<unknown>' 1272 else: 1273 # No, this should not be os.sep. The location is 1274 # expected to be a URI. 1275 lb = self.locationBase.rsplit('/', 1)[-1] 1276 return '%s[%s:%s]' % (lb, self.lineNumber, self.columnNumber)
1277
1278 - def __repr__ (self):
1279 t = type(self) 1280 ctor = '%s.%s' % (t.__module__, t.__name__) 1281 return '%s(%s, %r, %r)' % (ctor, repr2to3(self.__locationBase), self.__lineNumber, self.__columnNumber)
1282
1283 -class Locatable_mixin (pyxb.cscRoot):
1284 __location = None 1285
1286 - def __init__ (self, *args, **kw):
1287 self.__location = kw.pop('location', None) 1288 super(Locatable_mixin, self).__init__(*args, **kw)
1289
1290 - def _setLocation (self, location):
1291 self.__location = location
1292
1293 - def _location (self):
1294 return self.__location
1295
1296 -def repr2to3 (v):
1297 """Filtered built-in repr for python 2/3 compatibility in 1298 generated bindings. 1299 1300 All generated string values are to be unicode. We always import 1301 unicode_literals from __future__, so we want plain quotes with no 1302 prefix u. Strip that off. 1303 1304 Integer constants should not have the suffix L even if they do not 1305 fit in a Python2 int. The references generated through this 1306 function are never used for calculations, so the implicit cast to 1307 a larger type is sufficient. 1308 1309 All other values use their standard representations. 1310 """ 1311 if isinstance(v, six.string_types): 1312 qu = QuotedEscaped(v) 1313 if 'u' == qu[0]: 1314 return qu[1:] 1315 return qu 1316 if isinstance(v, six.integer_types): 1317 vs = repr(v) 1318 if vs.endswith('L'): 1319 return vs[:-1] 1320 return vs 1321 return repr(v)
1322