8 #ifndef ORCUS_SAX_PARSER_HPP
9 #define ORCUS_SAX_PARSER_HPP
11 #include "sax_parser_base.hpp"
30 template<
typename _Handler,
typename _Config = sax_parser_default_config>
34 typedef _Handler handler_type;
35 typedef _Config config_type;
37 sax_parser(
const char* content,
const size_t size, handler_type& handler);
51 void element_open(
const char* begin_pos);
52 void element_close(
const char* begin_pos);
54 void declaration(
const char* name_check);
61 handler_type& m_handler;
64 template<
typename _Handler,
typename _Config>
66 const char* content,
const size_t size, handler_type& handler) :
72 template<
typename _Handler,
typename _Config>
73 sax_parser<_Handler,_Config>::~sax_parser()
77 template<
typename _Handler,
typename _Config>
78 void sax_parser<_Handler,_Config>::parse()
86 assert(m_buffer_pos == 0);
89 template<
typename _Handler,
typename _Config>
90 void sax_parser<_Handler,_Config>::header()
95 if (!has_char() || cur_char() !=
'<')
96 throw sax::malformed_xml_error(
"xml file must begin with '<'.", offset());
98 if (config_type::strict_xml_declaration)
100 if (next_char_checked() !=
'?')
101 throw sax::malformed_xml_error(
"xml file must begin with '<?'.", offset());
107 template<
typename _Handler,
typename _Config>
108 void sax_parser<_Handler,_Config>::body()
112 if (cur_char() ==
'<')
115 if (!m_root_elem_open)
119 else if (m_nest_level)
127 template<
typename _Handler,
typename _Config>
128 void sax_parser<_Handler,_Config>::element()
130 assert(cur_char() ==
'<');
131 const char* pos = mp_char;
132 char c = next_char_checked();
146 throw sax::malformed_xml_error(
"expected an alphabet.", offset());
151 template<
typename _Handler,
typename _Config>
152 void sax_parser<_Handler,_Config>::element_open(
const char* begin_pos)
154 assert(is_alpha(cur_char()));
156 sax::parser_element elem;
157 element_name(elem, begin_pos);
166 if (next_and_char() !=
'>')
167 throw sax::malformed_xml_error(
"expected '/>' to self-close the element.", offset());
169 elem.end_pos = mp_char;
170 m_handler.start_element(elem);
172 m_handler.end_element(elem);
173 #if ORCUS_DEBUG_SAX_PARSER
174 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
182 elem.end_pos = mp_char;
184 m_handler.start_element(elem);
186 #if ORCUS_DEBUG_SAX_PARSER
187 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
196 template<
typename _Handler,
typename _Config>
197 void sax_parser<_Handler,_Config>::element_close(
const char* begin_pos)
199 assert(cur_char() ==
'/');
202 sax::parser_element elem;
203 element_name(elem, begin_pos);
205 if (cur_char() !=
'>')
206 throw sax::malformed_xml_error(
"expected '>' to close the element.", offset());
208 elem.end_pos = mp_char;
210 m_handler.end_element(elem);
211 #if ORCUS_DEBUG_SAX_PARSER
212 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
215 m_root_elem_open =
false;
218 template<
typename _Handler,
typename _Config>
219 void sax_parser<_Handler,_Config>::special_tag()
221 assert(cur_char() ==
'!');
223 size_t len = remains();
225 throw sax::malformed_xml_error(
"special tag too short.", offset());
227 switch (next_and_char())
232 if (next_and_char() !=
'-')
233 throw sax::malformed_xml_error(
"comment expected.", offset());
237 throw sax::malformed_xml_error(
"malformed comment.", offset());
246 expects_next(
"CDATA[", 6);
254 expects_next(
"OCTYPE", 6);
261 throw sax::malformed_xml_error(
"failed to parse special tag.", offset());
265 template<
typename _Handler,
typename _Config>
266 void sax_parser<_Handler,_Config>::declaration(
const char* name_check)
268 assert(cur_char() ==
'?');
274 #if ORCUS_DEBUG_SAX_PARSER
275 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
278 if (name_check && decl_name != name_check)
280 std::ostringstream os;
281 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
282 throw sax::malformed_xml_error(os.str(), offset());
285 m_handler.start_declaration(decl_name);
289 while (cur_char_checked() !=
'?')
294 if (next_char_checked() !=
'>')
295 throw sax::malformed_xml_error(
"declaration must end with '?>'.", offset());
297 m_handler.end_declaration(decl_name);
300 #if ORCUS_DEBUG_SAX_PARSER
301 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
305 template<
typename _Handler,
typename _Config>
306 void sax_parser<_Handler,_Config>::cdata()
308 size_t len = remains();
312 const char* p0 = mp_char;
313 size_t i = 0, match = 0;
314 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
328 else if (c ==
'>' && match == 2)
331 size_t cdata_len = i - 2;
332 m_handler.characters(pstring(p0, cdata_len),
false);
339 throw sax::malformed_xml_error(
"malformed CDATA section.", offset());
342 template<
typename _Handler,
typename _Config>
343 void sax_parser<_Handler,_Config>::doctype()
346 sax::doctype_declaration param;
347 name(param.root_element);
351 size_t len = remains();
353 throw sax::malformed_xml_error(
"DOCTYPE section too short.", offset());
355 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
359 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
360 throw sax::malformed_xml_error(
"malformed DOCTYPE section.", offset());
362 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
366 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
367 throw sax::malformed_xml_error(
"malformed DOCTYPE section.", offset());
372 has_char_throw(
"DOCTYPE section too short.");
375 value(param.fpi,
false);
377 has_char_throw(
"DOCTYPE section too short.");
379 has_char_throw(
"DOCTYPE section too short.");
381 if (cur_char() ==
'>')
384 #if ORCUS_DEBUG_SAX_PARSER
385 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
387 m_handler.doctype(param);
393 value(param.uri,
false);
395 has_char_throw(
"DOCTYPE section too short.");
397 has_char_throw(
"DOCTYPE section too short.");
399 if (cur_char() !=
'>')
400 throw sax::malformed_xml_error(
"malformed DOCTYPE section - closing '>' expected but not found.", offset());
402 #if ORCUS_DEBUG_SAX_PARSER
403 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
405 m_handler.doctype(param);
409 template<
typename _Handler,
typename _Config>
410 void sax_parser<_Handler,_Config>::characters()
412 const char* p0 = mp_char;
413 for (; has_char(); next())
415 if (cur_char() ==
'<')
418 if (cur_char() ==
'&')
421 cell_buffer& buf = get_cell_buffer();
423 buf.append(p0, mp_char-p0);
424 characters_with_encoded_char(buf);
426 m_handler.characters(pstring(),
false);
428 m_handler.characters(pstring(buf.get(), buf.size()),
true);
435 pstring val(p0, mp_char-p0);
436 m_handler.characters(val,
false);
440 template<
typename _Handler,
typename _Config>
441 void sax_parser<_Handler,_Config>::attribute()
443 sax::parser_attribute attr;
444 pstring attr_ns_name, attr_name, attr_value;
445 attribute_name(attr.ns, attr.name);
447 #if ORCUS_DEBUG_SAX_PARSER
448 std::ostringstream os;
449 os <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'";
455 std::ostringstream os;
456 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
457 throw sax::malformed_xml_error(os.str(), offset());
461 attr.transient = value(attr.value,
true);
466 #if ORCUS_DEBUG_SAX_PARSER
467 os <<
" value='" << attr.value <<
"'" << endl;
471 m_handler.attribute(attr);
Definition: sax_parser.hpp:15
static const bool strict_xml_declaration
Definition: sax_parser.hpp:23
Definition: parser_base.hpp:34
Definition: base64.hpp:15
Definition: sax_parser.hpp:31
Definition: sax_parser_base.hpp:95