1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """Utility functions and classes."""
17
18 import re
19 import os
20 import errno
21 import pyxb
22 from pyxb.utils.six.moves.urllib import parse as urlparse
23 import time
24 import datetime
25 import logging
26 from pyxb.utils import six
27
28 _log = logging.getLogger(__name__)
31 """Class decorator that fills in missing ordering methods.
32
33 Concept derived from Python 2.7.5 functools.total_ordering,
34 but this version requires that __eq__ and __lt__ be provided,
35 and unconditionally overrides __ne__, __gt__, __le__, and __ge__
36 with the derived versions.
37
38 This is still necessary in Python 3 because in Python 3 the
39 comparison x >= y is done by the __ge__ inherited from object,
40 which does not handle the case where x and y are not the same type
41 even if the underlying y < x would convert x to be compatible. """
42
43 def applyconvert (cls, derived):
44 for (opn, opx) in derived:
45 opx.__name__ = opn
46 opx.__doc__ = getattr(int, opn).__doc__
47 setattr(cls, opn, opx)
48
49 applyconvert(cls, (
50 ('__gt__', lambda self, other: not (self.__lt__(other) or self.__eq__(other))),
51 ('__le__', lambda self, other: self.__lt__(other) or self.__eq__(other)),
52 ('__ge__', lambda self, other: not self.__lt__(other))
53 ))
54 applyconvert(cls, (
55 ('__ne__', lambda self, other: not self.__eq__(other)),
56 ))
57 return cls
58
60 """Tuple comparison that permits C{None} as lower than any value,
61 and defines other cross-type comparison.
62
63 @return: -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs."""
64 li = iter(lhs)
65 ri = iter(rhs)
66 while True:
67 try:
68 (lv, rv) = (next(li), next(ri))
69 if lv is None:
70 if rv is None:
71 continue
72 return -1
73 if rv is None:
74 return 1
75 if lv == rv:
76 continue
77 if lv < rv:
78 return -1
79 return 1
80 except StopIteration:
81 nl = len(lhs)
82 nr = len(rhs)
83 if nl < nr:
84 return -1
85 if nl == nr:
86 return 0
87 return 1
88
90 """Convert a string into a literal value that can be used in Python source.
91
92 This just calls C{repr}. No point in getting all complex when the language
93 already gives us what we need.
94
95 @rtype: C{str}
96 """
97 return repr(s)
98
100 """Default implementation for _XMLIdentifierToPython
101
102 For historical reasons, this converts the identifier from a str to
103 unicode in the system default encoding. This should have no
104 practical effect.
105
106 @param identifier : some XML identifier
107
108 @return: C{unicode(identifier)}
109 """
110
111 return six.text_type(identifier)
112
114 """Configure a callable L{MakeIdentifier} uses to pre-process an XM Lidentifier.
115
116 In Python3, identifiers can be full Unicode tokens, but in Python2,
117 all identifiers must be ASCII characters. L{MakeIdentifier} enforces
118 this by removing all characters that are not valid within an
119 identifier.
120
121 In some cases, an application generating bindings may be able to
122 transliterate Unicode code points that are not valid Python identifier
123 characters into something else. This callable can be assigned to
124 perform that translation before the invalid characters are
125 stripped.
126
127 It is not the responsibility of this callable to do anything other
128 than replace whatever characters it wishes to. All
129 transformations performed by L{MakeIdentifier} will still be
130 applied, to ensure the output is in fact a legal identifier.
131
132 @param xml_identifier_to_python : A callable that takes a string
133 and returns a Unicode, possibly with non-identifier characters
134 replaced by other characters. Pass C{None} to reset to the
135 default implementation, which is L{_DefaultXMLIdentifierToPython}.
136
137 @rtype: C{unicode}
138 """
139 global _XMLIdentifierToPython
140 if xml_identifier_to_python is None:
141 xml_identifier_to_python = _DefaultXMLIdentifierToPython
142 _XMLIdentifierToPython = xml_identifier_to_python
143
144 _XMLIdentifierToPython = _DefaultXMLIdentifierToPython
145
146 _UnderscoreSubstitute_re = re.compile(r'[- .]')
147 _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]')
148 _PrefixUnderscore_re = re.compile(r'^_+')
149 _PrefixDigit_re = re.compile(r'^\d+')
150 _CamelCase_re = re.compile(r'_\w')
153 """Convert a string into something suitable to be a Python identifier.
154
155 The string is processed by L{_XMLIdentifierToPython}. Following
156 this, dashes, spaces, and periods are replaced by underscores, and
157 characters not permitted in Python identifiers are stripped.
158 Furthermore, any leading underscores are removed. If the result
159 begins with a digit, the character 'n' is prepended. If the
160 result is the empty string, the string 'emptyString' is
161 substituted.
162
163 No check is made for L{conflicts with keywords <DeconflictKeyword>}.
164
165 @keyword camel_case : If C{True}, any underscore in the result
166 string that is immediately followed by an alphanumeric is replaced
167 by the capitalized version of that alphanumeric. Thus,
168 'one_or_two' becomes 'oneOrTwo'. If C{False} (default), has no
169 effect.
170
171 @rtype: C{str}
172 """
173 s = _XMLIdentifierToPython(s)
174 s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('', _UnderscoreSubstitute_re.sub('_', s)))
175 if camel_case:
176 s = _CamelCase_re.sub(lambda _m: _m.group(0)[1].upper(), s)
177 if _PrefixDigit_re.match(s):
178 s = 'n' + s
179 if 0 == len(s):
180 s = 'emptyString'
181 return s
182
183 _PythonKeywords = frozenset( (
184 "and", "as", "assert", "break", "class", "continue", "def", "del",
185 "elif", "else", "except", "exec", "finally", "for", "from", "global",
186 "if", "import", "in", "is", "lambda", "not", "or", "pass", "print",
187 "raise", "return", "try", "while", "with", "yield"
188 ) )
189 """Python keywords. Note that types like int and float are not
190 keywords.
191
192 @see: U{http://docs.python.org/reference/lexical_analysis.html#keywords}."""
193
194 _PythonBuiltInConstants = frozenset( (
195 "False", "True", "None", "NotImplemented", "Ellipsis", "__debug__",
196
197
198
199 "set"
200 ) )
201 """Other symbols that aren't keywords but that can't be used.
202
203 @see: U{http://docs.python.org/library/constants.html}."""
204
205 _Keywords = frozenset(_PythonKeywords.union(_PythonBuiltInConstants))
206 """The keywords reserved for Python, derived from L{_PythonKeywords}
207 and L{_PythonBuiltInConstants}."""
210 """If the provided string C{s} matches a Python language keyword,
211 append an underscore to distinguish them.
212
213 See also L{MakeUnique}.
214
215 @param s: string to be deconflicted
216
217 @keyword aux_keywords: optional iterable of additional strings
218 that should be treated as keywords.
219
220 @rtype: C{str}
221
222 """
223 if (s in _Keywords) or (s in aux_keywords):
224 return '%s_' % (s,)
225 return s
226
228 """Return an identifier based on C{s} that is not in the given set.
229
230 The returned identifier is made unique by appending an underscore
231 and, if necessary, a serial number.
232
233 The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ...
234
235 @param in_use: The set of identifiers already in use in the
236 relevant scope. C{in_use} is updated to contain the returned
237 identifier.
238
239 @rtype: C{str}
240 """
241 if s in in_use:
242 ctr = 2
243 s = s.rstrip('_')
244 candidate = '%s_' % (s,)
245 while candidate in in_use:
246 candidate = '%s_%d' % (s, ctr)
247 ctr += 1
248 s = candidate
249 in_use.add(s)
250 return s
251
252 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False):
253 """Combine everything required to create a unique identifier.
254
255 Leading and trailing underscores are stripped from all
256 identifiers.
257
258 @param in_use: the set of already used identifiers. Upon return
259 from this function, it is updated to include the returned
260 identifier.
261
262 @keyword aux_keywords: an optional set of additional symbols that
263 are illegal in the given context; use this to prevent conflicts
264 with known method names.
265
266 @keyword private: if C{False} (default), all leading underscores
267 are stripped, guaranteeing the identifier will not be private. If
268 C{True}, the returned identifier has two leading underscores,
269 making it a private variable within a Python class.
270
271 @keyword protected: as for C{private}, but uses only one
272 underscore.
273
274 @rtype: C{str}
275
276 @note: Only module-level identifiers should be treated as
277 protected. The class-level L{_DeconflictSymbols_mixin}
278 infrastructure does not include protected symbols. All class and
279 instance members beginning with a single underscore are reserved
280 for the PyXB infrastructure."""
281 s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords)
282 if private:
283 s = '__' + s
284 elif protected:
285 s = '_' + s
286 return MakeUnique(s, in_use)
287
290 """Mix-in used to deconflict public symbols in classes that may be
291 inherited by generated binding classes.
292
293 Some classes, like the L{pyxb.binding.basis.element} or
294 L{pyxb.binding.basis.simpleTypeDefinition} classes in
295 L{pyxb.binding.basis}, have public symbols associated with
296 functions and variables. It is possible that an XML schema might
297 include tags and attribute names that match these symbols. To
298 avoid conflict, the reserved symbols marked in this class are
299 added to the pre-defined identifier set.
300
301 Subclasses should create a class-level variable
302 C{_ReservedSymbols} that contains a set of strings denoting the
303 symbols reserved in this class, combined with those from any
304 superclasses that also have reserved symbols. Code like the
305 following is suggested::
306
307 # For base classes (direct mix-in):
308 _ReservedSymbols = set([ 'one', 'two' ])
309 # For subclasses:
310 _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ]))
311
312 Only public symbols (those with no underscores) are currently
313 supported. (Private symbols can't be deconflicted that easily,
314 and no protected symbols that derive from the XML are created by
315 the binding generator.)
316 """
317
318 _ReservedSymbols = set()
319 """There are no reserved symbols in the base class."""
320
321
322 __TabCRLF_re = re.compile("[\t\n\r]")
323
324 __MultiSpace_re = re.compile(" +")
327 """Normalize the given string.
328
329 Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword
330 parameters must be assigned the value C{True} by the caller.
331
332 - C{preserve}: the text is returned unchanged.
333
334 - C{replace}: all tabs, newlines, and carriage returns are
335 replaced with ASCII spaces.
336
337 - C{collapse}: the C{replace} normalization is done, then
338 sequences of two or more spaces are replaced by a single space.
339
340 See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}.
341
342 @rtype: C{str}
343 """
344 if preserve:
345 return text
346 text = __TabCRLF_re.sub(' ', text)
347 if replace:
348 return text
349 if collapse:
350 return __MultiSpace_re.sub(' ', text).strip()
351
352 raise Exception('NormalizeWhitespace: No normalization specified')
353
355 """Represent a directed graph with arbitrary objects as nodes.
356
357 This is used in the L{code
358 generator<pyxb.binding.generate.Generator>} to determine order
359 dependencies among components within a namespace, and schema that
360 comprise various namespaces. An edge from C{source} to C{target}
361 indicates that some aspect of C{source} requires that some aspect
362 of C{target} already be available.
363 """
364
366 self.__roots = None
367 if root is not None:
368 self.__roots = set([root])
369 self.__edges = set()
370 self.__edgeMap = { }
371 self.__reverseMap = { }
372 self.__nodes = set()
373
374 __scc = None
375 __sccMap = None
376 __dfsOrder = None
377
378 - def addEdge (self, source, target):
379 """Add a directed edge from the C{source} to the C{target}.
380
381 The nodes are added to the graph if necessary.
382 """
383 self.__edges.add( (source, target) )
384 self.__edgeMap.setdefault(source, set()).add(target)
385 if source != target:
386 self.__reverseMap.setdefault(target, set()).add(source)
387 self.__nodes.add(source)
388 self.__nodes.add(target)
389
391 """Add the given node to the graph."""
392 self.__nodes.add(node)
393
394 __roots = None
395 - def roots (self, reset=False):
396 """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges).
397
398 This caches the roots calculated in a previous invocation
399 unless the C{reset} keyword is given the value C{True}.
400
401 @note: Upon reset, any notes that had been manually added
402 using L{addNode} will no longer be in the set.
403
404 @keyword reset: If C{True}, any cached value is discarded and
405 recomputed. No effect if C{False} (defalut).
406
407 @rtype: C{set}
408 """
409 if reset or (self.__roots is None):
410 self.__roots = set()
411 for n in self.__nodes:
412 if not (n in self.__reverseMap):
413 self.__roots.add(n)
414 return self.__roots
416 """Add the provided node as a root node, even if it has incoming edges.
417
418 The node need not be present in the graph (if necessary, it is added).
419
420 Note that roots added in this way do not survive a reset using
421 L{roots}.
422
423 @return: C{self}
424 """
425 if self.__roots is None:
426 self.__roots = set()
427 self.__nodes.add(root)
428 self.__roots.add(root)
429 return self
430
432 """Return the edges in the graph.
433
434 The edge data structure is a map from the source node to the
435 set of nodes that can be reached in a single step from the
436 source.
437 """
438 return self.__edgeMap
439 __edgeMap = None
440
442 """Return the edges in the graph.
443
444 The edge data structure is a set of node pairs represented as C{( source, target )}.
445 """
446 return self.__edges
447
449 """Return the set of nodes in the graph.
450
451 The node collection data structure is a set containing node
452 objects, whatever they may be."""
453 return self.__nodes
454
455 - def tarjan (self, reset=False):
456 """Execute Tarjan's algorithm on the graph.
457
458 U{Tarjan's
459 algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>}
460 computes the U{strongly-connected
461 components<http://en.wikipedia.org/wiki/Strongly_connected_component>}
462 of the graph: i.e., the sets of nodes that form a minimal
463 closed set under edge transition. In essence, the loops. We
464 use this to detect groups of components that have a dependency
465 cycle.
466
467 @keyword reset: If C{True}, any cached component set is erased
468 and recomputed. If C{True}, an existing previous result is
469 left unchanged."""
470
471 if (self.__scc is not None) and (not reset):
472 return
473 self.__sccMap = { }
474 self.__stack = []
475 self.__sccOrder = []
476 self.__scc = []
477 self.__index = 0
478 self.__tarjanIndex = { }
479 self.__tarjanLowLink = { }
480 for v in self.__nodes:
481 self.__tarjanIndex[v] = None
482 roots = self.roots()
483 if (0 == len(roots)) and (0 < len(self.__nodes)):
484 raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),))
485 for r in roots:
486 self._tarjan(r)
487 self.__didTarjan = True
488
490 """Do the work of Tarjan's algorithm for a given root node."""
491 if self.__tarjanIndex.get(v) is not None:
492
493 return
494 self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index
495 self.__index += 1
496 self.__stack.append(v)
497 source = v
498 for target in self.__edgeMap.get(source, []):
499 if self.__tarjanIndex[target] is None:
500 self._tarjan(target)
501 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target])
502 elif target in self.__stack:
503 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target])
504 else:
505 pass
506
507 if self.__tarjanLowLink[v] == self.__tarjanIndex[v]:
508 scc = []
509 while True:
510 scc.append(self.__stack.pop())
511 if v == scc[-1]:
512 break
513 self.__sccOrder.append(scc)
514 if 1 < len(scc):
515 self.__scc.append(scc)
516 [ self.__sccMap.setdefault(_v, scc) for _v in scc ]
517
518 - def scc (self, reset=False):
519 """Return the strongly-connected components of the graph.
520
521 The data structure is a set, each element of which is itself a
522 set containing one or more nodes from the graph.
523
524 @see: L{tarjan}.
525 """
526 if reset or (self.__scc is None):
527 self.tarjan(reset)
528 return self.__scc
529 __scc = None
530
531 - def sccMap (self, reset=False):
532 """Return a map from nodes to the strongly-connected component
533 to which the node belongs.
534
535 @keyword reset: If C{True}, the L{tarjan} method will be
536 re-invoked, propagating the C{reset} value. If C{False}
537 (default), a cached value will be returned if available.
538
539 @see: L{tarjan}.
540 """
541 if reset or (self.__sccMap is None):
542 self.tarjan(reset)
543 return self.__sccMap
544 __sccMap = None
545
547 """Return the strongly-connected components in order.
548
549 The data structure is a list, in dependency order, of strongly
550 connected components (which can be single nodes). Appearance
551 of a node in a set earlier in the list indicates that it has
552 no dependencies on any node that appears in a subsequent set.
553 This order is preferred over L{dfsOrder} for code generation,
554 since it detects loops.
555
556 @see: L{tarjan}.
557 """
558 if reset or (self.__sccOrder is None):
559 self.tarjan(reset)
560 return self.__sccOrder
561 __sccOrder = None
562
564 """Return the strongly-connected component to which the given
565 node belongs.
566
567 Any keywords suppliend when invoking this method are passed to
568 the L{sccMap} method.
569
570 @return: The SCC set, or C{None} if the node is not present in
571 the results of Tarjan's algorithm."""
572
573 return self.sccMap(**kw).get(node)
574
576 """Return the cyclomatic complexity of the graph."""
577 self.tarjan()
578 return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc)
579
581 assert not (source in self.__dfsWalked)
582 self.__dfsWalked.add(source)
583 for target in self.__edgeMap.get(source, []):
584 if not (target in self.__dfsWalked):
585 self.__dfsWalk(target)
586 self.__dfsOrder.append(source)
587
589 node_map = { }
590 idx = 1
591 for n in self.__nodes:
592 node_map[n] = idx
593 idx += 1
594 text = []
595 text.append('digraph "%s" {' % (title,))
596 for n in self.__nodes:
597 if labeller is not None:
598 nn = labeller(n)
599 else:
600 nn = str(n)
601 text.append('%s [shape=box,label="%s"];' % (node_map[n], nn))
602 for s in self.__nodes:
603 for d in self.__edgeMap.get(s, []):
604 if s != d:
605 text.append('%s -> %s;' % (node_map[s], node_map[d]))
606 text.append("};")
607 return "\n".join(text)
608
610 """Return the nodes of the graph in U{depth-first-search
611 order<http://en.wikipedia.org/wiki/Depth-first_search>}.
612
613 The data structure is a list. Calculated lists are retained
614 and returned on future invocations, subject to the C{reset}
615 keyword.
616
617 @keyword reset: If C{True}, discard cached results and recompute the order."""
618 if reset or (self.__dfsOrder is None):
619 self.__dfsWalked = set()
620 self.__dfsOrder = []
621 for root in self.roots(reset=reset):
622 self.__dfsWalk(root)
623 self.__dfsWalked = None
624 if len(self.__dfsOrder) != len(self.__nodes):
625 raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes)))
626 return self.__dfsOrder
627
629 """Return the nodes of the graph as a sequence of root sets.
630
631 The first root set is the set of nodes that are roots: i.e.,
632 have no incoming edges. The second root set is the set of
633 nodes that have incoming nodes in the first root set. This
634 continues until all nodes have been reached. The sets impose
635 a partial order on the nodes, without being as constraining as
636 L{sccOrder}.
637
638 @return: a list of the root sets."""
639 order = []
640 nodes = set(self.__nodes)
641 edge_map = {}
642 for (d, srcs) in six.iteritems(self.__edgeMap):
643 edge_map[d] = srcs.copy()
644 while nodes:
645 freeset = set()
646 for n in nodes:
647 if not (n in edge_map):
648 freeset.add(n)
649 if 0 == len(freeset):
650 _log.error('dependency cycle in named components')
651 return None
652 order.append(freeset)
653 nodes.difference_update(freeset)
654 new_edge_map = {}
655 for (d, srcs) in six.iteritems(edge_map):
656 srcs.difference_update(freeset)
657 if 0 != len(srcs):
658 new_edge_map[d] = srcs
659 edge_map = new_edge_map
660 return order
661
662 LocationPrefixRewriteMap_ = { }
669
671 """Normalize a URI against an optional parent_uri in the way that is
672 done for C{schemaLocation} attribute values.
673
674 If no URI schema is present, this will normalize a file system
675 path.
676
677 Optionally, the resulting absolute URI can subsequently be
678 rewritten to replace specified prefix strings with alternative
679 strings, e.g. to convert a remote URI to a local repository. This
680 rewriting is done after the conversion to an absolute URI, but
681 before normalizing file system URIs.
682
683 @param uri : The URI to normalize. If C{None}, function returns
684 C{None}
685
686 @param parent_uri : The base URI against which normalization is
687 done, if C{uri} is a relative URI.
688
689 @param prefix_map : A map used to rewrite URI prefixes. If
690 C{None}, the value defaults to that stored by
691 L{SetLocationPrefixRewriteMap}.
692
693 """
694 if uri is None:
695 return uri
696 if parent_uri is None:
697 abs_uri = uri
698 else:
699
700
701 abs_uri = urlparse.urljoin(parent_uri, uri)
702 if prefix_map is None:
703 prefix_map = LocationPrefixRewriteMap_
704 for (pfx, sub) in six.iteritems(prefix_map):
705 if abs_uri.startswith(pfx):
706 abs_uri = sub + abs_uri[len(pfx):]
707 if 0 > abs_uri.find(':'):
708 abs_uri = os.path.realpath(abs_uri)
709 return abs_uri
710
713 """Retrieve the contents of the uri as raw data.
714
715 If the uri does not include a scheme (e.g., C{http:}), it is
716 assumed to be a file path on the local system."""
717
718 from pyxb.utils.six.moves.urllib.request import urlopen
719 stream = None
720 exc = None
721
722
723
724 if 0 <= uri.find(':'):
725 try:
726 stream = urlopen(uri)
727 except Exception as e:
728 exc = e
729 if (stream is None) and six.PY2:
730 import urllib
731 try:
732 stream = urllib.urlopen(uri)
733 exc = None
734 except:
735
736 pass
737 if stream is None:
738
739 try:
740 stream = open(uri, 'rb')
741 exc = None
742 except Exception as e:
743 if exc is None:
744 exc = e
745 if exc is not None:
746 _log.error('open %s', uri, exc_info=exc)
747 raise exc
748 try:
749
750
751 if isinstance(stream, six.file) or isinstance(stream.fp, six.file):
752 archive_directory = None
753 except:
754 pass
755 xmld = stream.read()
756 if archive_directory:
757 base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2]))
758 counter = 1
759 dest_file = os.path.join(archive_directory, base_name)
760 while os.path.isfile(dest_file):
761 dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter))
762 counter += 1
763 try:
764 OpenOrCreate(dest_file).write(xmld)
765 except OSError as e:
766 _log.warning('Unable to save %s in %s: %s', uri, dest_file, e)
767 return xmld
768
769 -def OpenOrCreate (file_name, tag=None, preserve_contents=False):
770 """Return a file object used to write binary data into the given file.
771
772 Use the C{tag} keyword to preserve the contents of existing files
773 that are not supposed to be overwritten.
774
775 To get a writable file but leaving any existing contents in place,
776 set the C{preserve_contents} keyword to C{True}. Normally, existing file
777 contents are erased.
778
779 The returned file pointer is positioned at the end of the file.
780
781 @keyword tag: If not C{None} and the file already exists, absence
782 of the given value in the first 4096 bytes of the file (decoded as
783 UTF-8) causes an C{IOError} to be raised with C{errno} set to
784 C{EEXIST}. I.e., only files with this value in the first 4KB will
785 be returned for writing.
786
787 @keyword preserve_contents: This value controls whether existing
788 contents of the file will be erased (C{False}, default) or left in
789 place (C{True}).
790 """
791 (path, leaf) = os.path.split(file_name)
792 if path:
793 try:
794 os.makedirs(path)
795 except Exception as e:
796 if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)):
797 raise
798 fp = open(file_name, 'ab+')
799 if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size):
800 fp.seek(0)
801 blockd = fp.read(4096)
802 blockt = blockd.decode('utf-8')
803 if 0 > blockt.find(tag):
804 raise OSError(errno.EEXIST, os.strerror(errno.EEXIST))
805 if not preserve_contents:
806 fp.seek(0)
807 fp.truncate()
808 else:
809 fp.seek(2)
810 return fp
811
812
813 __Hasher = None
814 try:
815 import hashlib
816 __Hasher = hashlib.sha1
817 except ImportError:
818 import sha
819 __Hasher = sha.new
820
821 -def HashForText (text):
822 """Calculate a cryptographic hash of the given string.
823
824 For example, this is used to verify that a given module file
825 contains bindings from a previous generation run for the same
826 namespace. See L{OpenOrCreate}. If the text is in Unicode, the
827 hash is calculated on the UTF-8 encoding of the text.
828
829 @return: A C{str}, generally a sequence of hexadecimal "digit"s.
830 """
831 if isinstance(text, six.text_type):
832 text = text.encode('utf-8')
833 return __Hasher(text).hexdigest()
834
835
836 __HaveUUID = False
837 try:
838 import uuid
839 __HaveUUID = True
840 except ImportError:
841 import random
843 """Obtain a UUID using the best available method. On a version of
844 python that does not incorporate the C{uuid} class, this creates a
845 string combining the current date and time (to the second) with a
846 random number.
847
848 @rtype: C{str}
849 """
850 if __HaveUUID:
851 return uuid.uuid1().urn
852 return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFF))
853
855 """Records a unique identifier, generally associated with a
856 binding generation action.
857
858 The identifier is a string, but gets wrapped in an instance of
859 this class to optimize comparisons and reduce memory footprint.
860
861 Invoking the constructor for this class on the same string
862 multiple times will return the same Python object.
863
864 An instance of this class compares equal to, and hashes equivalent
865 to, the uid string. When C{str}'d, the result is the uid; when
866 C{repr}'d, the result is a constructor call to
867 C{pyxb.utils.utility.UniqueIdentifier}.
868 """
869
870
871 __ExistingUIDs = {}
872
874 """The string unique identifier"""
875 return self.__uid
876 __uid = None
877
878
881
884
887
888
904
906 """Associate the given object witth this identifier.
907
908 This is a one-way association: the object is not provided with
909 a return path to this identifier instance."""
910 self.__associatedObjects.add(obj)
912 """The set of objects that have been associated with this
913 identifier instance."""
914 return self.__associatedObjects
915 __associatedObjects = None
916
918 """Create a new UniqueIdentifier instance.
919
920 @param uid: The unique identifier string. If present, it is
921 the callers responsibility to ensure the value is universally
922 unique. If C{None}, one will be provided.
923 @type uid: C{str} or C{unicode}
924 """
925 assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid())
926 self.__associatedObjects = set()
927
929 if other is None:
930 return False
931 elif isinstance(other, UniqueIdentifier):
932 other_uid = other.uid()
933 elif isinstance(other, six.string_types):
934 other_uid = other
935 else:
936 raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),))
937 return self.uid() == other_uid
938
940 return hash(self.uid())
941
944
946 return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),)
947
950 """A C{datetime.tzinfo} subclass that helps deal with UTC
951 conversions in an ISO8601 world.
952
953 This class only supports fixed offsets from UTC.
954 """
955
956
957 __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$')
958
959
960 __utcOffset_min = 0
961
962
963 __utcOffset_td = None
964
965
966 __ZeroDuration = datetime.timedelta(0)
967
968
969 __MaxOffset_td = datetime.timedelta(hours=14)
970
972 """Create a time zone instance with a fixed offset from UTC.
973
974 @param spec: Specifies the offset. Can be an integer counting
975 minutes east of UTC, the value C{None} (equal to 0 minutes
976 east), or a string that conform to the ISO8601 time zone
977 sequence (B{Z}, or B{[+-]HH:MM}).
978 """
979
980 if spec is not None:
981 if isinstance(spec, six.string_types):
982 if 'Z' == spec:
983 self.__utcOffset_min = 0
984 else:
985 match = self.__Lexical_re.match(spec)
986 if match is None:
987 raise ValueError('Bad time zone: %s' % (spec,))
988 self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3))
989 if '-' == match.group(1):
990 self.__utcOffset_min = - self.__utcOffset_min
991 elif isinstance(spec, int):
992 self.__utcOffset_min = spec
993 elif isinstance(spec, datetime.timedelta):
994 self.__utcOffset_min = spec.seconds // 60
995 else:
996 raise TypeError('%s: unexpected type %s' % (type(self), type(spec)))
997 self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min)
998 if self.__utcOffset_td < -self.__MaxOffset_td or self.__utcOffset_td > self.__MaxOffset_td:
999 raise ValueError('XSD timezone offset %s larger than %s' % (self.__utcOffset_td, self.__MaxOffset_td))
1000 if 0 == self.__utcOffset_min:
1001 self.__tzName = 'Z'
1002 elif 0 > self.__utcOffset_min:
1003 self.__tzName = '-%02d:%02d' % divmod(-self.__utcOffset_min, 60)
1004 else:
1005 self.__tzName = '+%02d:%02d' % divmod(self.__utcOffset_min, 60)
1006
1008 """Returns the constant offset for this zone."""
1009 return self.__utcOffset_td
1010
1012 """Return the name of the timezone in the format expected by XML Schema."""
1013 return self.__tzName
1014
1015 - def dst (self, dt):
1016 """Returns a constant zero duration."""
1017 return self.__ZeroDuration
1018
1023
1026
1029
1032
1034 """A C{datetime.tzinfo} subclass for the local time zone.
1035
1036 Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1.
1037 """
1038
1039 __STDOffset = datetime.timedelta(seconds=-time.timezone)
1040 __DSTOffset = __STDOffset
1041 if time.daylight:
1042 __DSTOffset = datetime.timedelta(seconds=-time.altzone)
1043 __ZeroDelta = datetime.timedelta(0)
1044 __DSTDelta = __DSTOffset - __STDOffset
1045
1050
1051 - def dst (self, dt):
1055
1058
1060 tt = (dt.year, dt.month, dt.day,
1061 dt.hour, dt.minute, dt.second,
1062 0, 0, -1)
1063 tt = time.localtime(time.mktime(tt))
1064 return tt.tm_isdst > 0
1065
1067 """Emulate the B{transient} keyword from Java for private member
1068 variables.
1069
1070 This class defines a C{__getstate__} method which returns a copy
1071 of C{self.__dict__} with certain members removed. Specifically,
1072 if a string "s" appears in a class member variable named
1073 C{__PrivateTransient} defined in the "Class" class, then the
1074 corresponding private variable "_Class__s" will be removed from
1075 the state dictionary. This is used to eliminate unnecessary
1076 fields from instances placed in L{namespace
1077 archives<pyxb.namespace.archive.NamespaceArchive>} without having
1078 to implement a C{__getstate__} method in every class in the
1079 instance hierarchy.
1080
1081 For an example, see
1082 L{pyxb.xmlschema.structures._SchemaComponent_mixin}
1083
1084 If you use this, it is your responsibility to define the
1085 C{__PrivateTransient} class variable and add to it the required
1086 variable names.
1087
1088 Classes that inherit from this are free to define their own
1089 C{__getstate__} method, which may or may not invoke the superclass
1090 one. If you do this, be sure that the class defining
1091 C{__getstate__} lists L{PrivateTransient_mixin} as one of its
1092 direct superclasses, lest the latter end up earlier in the mro and
1093 consequently bypass the local override.
1094 """
1095
1096
1097
1098 __Attribute = '__PrivateTransient'
1099
1101 state = self.__dict__.copy()
1102
1103
1104 attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute)
1105 skipped = getattr(self.__class__, attr, None)
1106 if skipped is None:
1107 skipped = set()
1108 for cl in self.__class__.mro():
1109 for (k, v) in six.iteritems(cl.__dict__):
1110 if k.endswith(self.__Attribute):
1111 cl2 = k[:-len(self.__Attribute)]
1112 skipped.update([ '%s__%s' % (cl2, _n) for _n in v ])
1113 setattr(self.__class__, attr, skipped)
1114 for k in skipped:
1115 if state.get(k) is not None:
1116 del state[k]
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127 return state
1128
1129 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None):
1130 """Provide a list of absolute paths to files present in any of a
1131 set of directories and meeting certain criteria.
1132
1133 This is used, for example, to locate namespace archive files
1134 within the archive path specified by the user. One could use::
1135
1136 files = GetMatchingFiles('&bundles//:+',
1137 pattern=re.compile('.*\.wxs$'),
1138 default_path_wildcard='+',
1139 default_path='/usr/local/pyxb/nsarchives',
1140 prefix_pattern='&',
1141 prefix_substituend='/opt/pyxb')
1142
1143 to obtain all files that can be recursively found within
1144 C{/opt/pyxb/bundles}, or non-recursively within
1145 C{/usr/local/pyxb/nsarchives}.
1146
1147 @param path: A list of directories in which the search should be
1148 performed. The entries are separated by os.pathsep, which is a
1149 colon on POSIX platforms and a semi-colon on Windows. If a path
1150 entry ends with C{//} regardless of platform, the suffix C{//} is
1151 stripped and any directory beneath the path is scanned as well,
1152 recursively.
1153
1154 @keyword pattern: Optional regular expression object used to
1155 determine whether a given directory entry should be returned. If
1156 left as C{None}, all directory entries will be returned.
1157
1158 @keyword default_path_wildcard: An optional string which, if
1159 present as a single directory in the path, is replaced by the
1160 value of C{default-path}.
1161
1162 @keyword default_path: A system-defined directory which can be
1163 restored to the path by placing the C{default_path_wildcard} in
1164 the C{path}.
1165
1166 @keyword prefix_pattern: An optional string which, if present at
1167 the start of a path element, is replaced by the value of
1168 C{prefix_substituend}.
1169
1170 @keyword prefix_substituend: A system-defined string (path prefix)
1171 which can be combined with the user-provided path information to
1172 identify a file or subdirectory within an installation-specific
1173 area.
1174 """
1175 matching_files = []
1176 path_set = path.split(os.pathsep)
1177 while 0 < len(path_set):
1178 path = path_set.pop(0)
1179 if default_path_wildcard == path:
1180 if default_path is not None:
1181 path_set[0:0] = default_path.split(os.pathsep)
1182 default_path = None
1183 continue
1184 recursive = False
1185 if (prefix_pattern is not None) and path.startswith(prefix_pattern):
1186 path = os.path.join(prefix_substituend, path[len(prefix_pattern):])
1187 if path.endswith('//'):
1188 recursive = True
1189 path = path[:-2]
1190 if os.path.isfile(path):
1191 if (pattern is None) or (pattern.search(path) is not None):
1192 matching_files.append(path)
1193 else:
1194 for (root, dirs, files) in os.walk(path):
1195 for f in files:
1196 if (pattern is None) or (pattern.search(f) is not None):
1197 matching_files.append(os.path.join(root, f))
1198 if not recursive:
1199 break
1200 return matching_files
1201
1202 @BackfillComparisons
1203 -class Location (object):
1204 __locationBase = None
1205 __lineNumber = None
1206 __columnNumber = None
1207
1208 - def __init__ (self, location_base=None, line_number=None, column_number=None):
1214
1215 - def newLocation (self, locator=None, line_number=None, column_number=None):
1216 if locator is not None:
1217 try:
1218 line_number = locator.getLineNumber()
1219 column_number = locator.getColumnNumber()
1220 except:
1221 pass
1222 return Location(self.__locationBase, line_number, column_number)
1223
1224 locationBase = property(lambda _s: _s.__locationBase)
1225 lineNumber = property(lambda _s: _s.__lineNumber)
1226 columnNumber = property(lambda _s: _s.__columnNumber)
1227
1229 if v1 is None:
1230 if v2 is None:
1231 return None
1232 return 1
1233 if v2 is None:
1234 return -1
1235 if v1 < v2:
1236 return -1
1237 if v1 == v2:
1238 return 0
1239 return 1
1240
1248
1251
1253 """Comparison by locationBase, then lineNumber, then columnNumber."""
1254 if other is None:
1255 return False
1256 rv = self.__cmpTupleUnlessNone(self, other)
1257 if rv is None:
1258 return True
1259 return 0 == rv
1260
1262 if other is None:
1263 return False
1264 rv = self.__cmpTupleUnlessNone(self, other)
1265 if rv is None:
1266 return False
1267 return -1 == rv
1268
1277
1282
1295
1297 """Filtered built-in repr for python 2/3 compatibility in
1298 generated bindings.
1299
1300 All generated string values are to be unicode. We always import
1301 unicode_literals from __future__, so we want plain quotes with no
1302 prefix u. Strip that off.
1303
1304 Integer constants should not have the suffix L even if they do not
1305 fit in a Python2 int. The references generated through this
1306 function are never used for calculations, so the implicit cast to
1307 a larger type is sufficient.
1308
1309 All other values use their standard representations.
1310 """
1311 if isinstance(v, six.string_types):
1312 qu = QuotedEscaped(v)
1313 if 'u' == qu[0]:
1314 return qu[1:]
1315 return qu
1316 if isinstance(v, six.integer_types):
1317 vs = repr(v)
1318 if vs.endswith('L'):
1319 return vs[:-1]
1320 return vs
1321 return repr(v)
1322