8 #ifndef ORCUS_SAX_PARSER_HPP 9 #define ORCUS_SAX_PARSER_HPP 11 #include "sax_parser_base.hpp" 96 (void)val; (void)
transient;
117 template<
typename _Handler,
typename _Config = sax_parser_default_config>
121 typedef _Handler handler_type;
122 typedef _Config config_type;
124 sax_parser(
const char* content,
const size_t size, handler_type& handler);
125 sax_parser(
const char* content,
const size_t size,
bool transient_stream, handler_type& handler);
139 void element_open(std::ptrdiff_t begin_pos);
140 void element_close(std::ptrdiff_t begin_pos);
142 void declaration(
const char* name_check);
149 handler_type& m_handler;
152 template<
typename _Handler,
typename _Config>
154 const char* content,
const size_t size, handler_type& handler) :
160 template<
typename _Handler,
typename _Config>
161 sax_parser<_Handler,_Config>::sax_parser(
162 const char* content,
const size_t size,
bool transient_stream, handler_type& handler) :
163 sax::parser_base(content, size, transient_stream),
168 template<
typename _Handler,
typename _Config>
169 sax_parser<_Handler,_Config>::~sax_parser()
173 template<
typename _Handler,
typename _Config>
174 void sax_parser<_Handler,_Config>::parse()
179 skip_space_and_control();
182 assert(m_buffer_pos == 0);
185 template<
typename _Handler,
typename _Config>
186 void sax_parser<_Handler,_Config>::header()
190 skip_space_and_control();
191 if (!has_char() || cur_char() !=
'<')
192 throw sax::malformed_xml_error(
"xml file must begin with '<'.", offset());
194 if (config_type::baseline_version >= 11)
198 if (next_char_checked() !=
'?')
199 throw sax::malformed_xml_error(
"xml file must begin with '<?'.", offset());
205 template<
typename _Handler,
typename _Config>
206 void sax_parser<_Handler,_Config>::body()
210 if (cur_char() ==
'<')
213 if (!m_root_elem_open)
217 else if (m_nest_level)
225 template<
typename _Handler,
typename _Config>
226 void sax_parser<_Handler,_Config>::element()
228 assert(cur_char() ==
'<');
229 std::ptrdiff_t pos = offset();
230 char c = next_char_checked();
240 declaration(
nullptr);
243 if (!is_alpha(c) && c !=
'_')
244 throw sax::malformed_xml_error(
"expected an alphabet.", offset());
249 template<
typename _Handler,
typename _Config>
250 void sax_parser<_Handler,_Config>::element_open(std::ptrdiff_t begin_pos)
252 assert(is_alpha(cur_char()) || cur_char() ==
'_');
254 sax::parser_element elem;
255 element_name(elem, begin_pos);
259 skip_space_and_control();
264 if (next_and_char() !=
'>')
265 throw sax::malformed_xml_error(
"expected '/>' to self-close the element.", offset());
267 elem.end_pos = offset();
268 m_handler.start_element(elem);
270 m_handler.end_element(elem);
272 m_root_elem_open =
false;
273 #if ORCUS_DEBUG_SAX_PARSER 274 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
282 elem.end_pos = offset();
284 m_handler.start_element(elem);
286 #if ORCUS_DEBUG_SAX_PARSER 287 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
296 template<
typename _Handler,
typename _Config>
297 void sax_parser<_Handler,_Config>::element_close(std::ptrdiff_t begin_pos)
299 assert(cur_char() ==
'/');
302 sax::parser_element elem;
303 element_name(elem, begin_pos);
305 if (cur_char() !=
'>')
306 throw sax::malformed_xml_error(
"expected '>' to close the element.", offset());
308 elem.end_pos = offset();
310 m_handler.end_element(elem);
311 #if ORCUS_DEBUG_SAX_PARSER 312 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
315 m_root_elem_open =
false;
318 template<
typename _Handler,
typename _Config>
319 void sax_parser<_Handler,_Config>::special_tag()
321 assert(cur_char() ==
'!');
323 size_t len = remains();
325 throw sax::malformed_xml_error(
"special tag too short.", offset());
327 switch (next_and_char())
332 if (next_and_char() !=
'-')
333 throw sax::malformed_xml_error(
"comment expected.", offset());
337 throw sax::malformed_xml_error(
"malformed comment.", offset());
346 expects_next(
"CDATA[", 6);
354 expects_next(
"OCTYPE", 6);
355 skip_space_and_control();
361 throw sax::malformed_xml_error(
"failed to parse special tag.", offset());
365 template<
typename _Handler,
typename _Config>
366 void sax_parser<_Handler,_Config>::declaration(
const char* name_check)
368 assert(cur_char() ==
'?');
374 #if ORCUS_DEBUG_SAX_PARSER 375 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
378 if (name_check && decl_name != name_check)
380 std::ostringstream os;
381 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
382 throw sax::malformed_xml_error(os.str(), offset());
385 m_handler.start_declaration(decl_name);
386 skip_space_and_control();
389 while (cur_char_checked() !=
'?')
392 skip_space_and_control();
394 if (next_char_checked() !=
'>')
395 throw sax::malformed_xml_error(
"declaration must end with '?>'.", offset());
397 m_handler.end_declaration(decl_name);
400 #if ORCUS_DEBUG_SAX_PARSER 401 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
405 template<
typename _Handler,
typename _Config>
406 void sax_parser<_Handler,_Config>::cdata()
408 size_t len = remains();
412 const char* p0 = mp_char;
413 size_t i = 0, match = 0;
414 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
428 else if (c ==
'>' && match == 2)
431 size_t cdata_len = i - 2;
432 m_handler.characters(pstring(p0, cdata_len), transient_stream());
439 throw sax::malformed_xml_error(
"malformed CDATA section.", offset());
442 template<
typename _Handler,
typename _Config>
443 void sax_parser<_Handler,_Config>::doctype()
446 sax::doctype_declaration param;
447 name(param.root_element);
448 skip_space_and_control();
451 size_t len = remains();
453 throw sax::malformed_xml_error(
"DOCTYPE section too short.", offset());
455 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
459 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
460 throw sax::malformed_xml_error(
"malformed DOCTYPE section.", offset());
462 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
466 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
467 throw sax::malformed_xml_error(
"malformed DOCTYPE section.", offset());
471 skip_space_and_control();
472 has_char_throw(
"DOCTYPE section too short.");
475 value(param.fpi,
false);
477 has_char_throw(
"DOCTYPE section too short.");
478 skip_space_and_control();
479 has_char_throw(
"DOCTYPE section too short.");
481 if (cur_char() ==
'>')
484 #if ORCUS_DEBUG_SAX_PARSER 485 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
487 m_handler.doctype(param);
493 value(param.uri,
false);
495 has_char_throw(
"DOCTYPE section too short.");
496 skip_space_and_control();
497 has_char_throw(
"DOCTYPE section too short.");
499 if (cur_char() !=
'>')
500 throw sax::malformed_xml_error(
"malformed DOCTYPE section - closing '>' expected but not found.", offset());
502 #if ORCUS_DEBUG_SAX_PARSER 503 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
505 m_handler.doctype(param);
509 template<
typename _Handler,
typename _Config>
510 void sax_parser<_Handler,_Config>::characters()
512 const char* p0 = mp_char;
513 for (; has_char(); next())
515 if (cur_char() ==
'<')
518 if (cur_char() ==
'&')
521 cell_buffer& buf = get_cell_buffer();
523 buf.append(p0, mp_char-p0);
524 characters_with_encoded_char(buf);
526 m_handler.characters(pstring(), transient_stream());
528 m_handler.characters(pstring(buf.get(), buf.size()),
true);
535 pstring val(p0, mp_char-p0);
536 m_handler.characters(val, transient_stream());
540 template<
typename _Handler,
typename _Config>
541 void sax_parser<_Handler,_Config>::attribute()
543 sax::parser_attribute attr;
544 pstring attr_ns_name, attr_name, attr_value;
545 attribute_name(attr.ns, attr.name);
547 #if ORCUS_DEBUG_SAX_PARSER 548 cout <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'" << endl;
551 skip_space_and_control();
556 std::ostringstream os;
557 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
558 throw sax::malformed_xml_error(os.str(), offset());
562 skip_space_and_control();
564 attr.transient = value(attr.value,
true);
569 #if ORCUS_DEBUG_SAX_PARSER 570 cout <<
"sax_parser::attribute: value='" << attr.value <<
"'" << endl;
573 m_handler.attribute(attr);
Definition: pstring.hpp:27
Definition: sax_parser.hpp:15
void start_declaration(const orcus::pstring &decl)
Definition: sax_parser.hpp:45
void end_element(const orcus::sax::parser_element &elem)
Definition: sax_parser.hpp:75
static const uint8_t baseline_version
Definition: sax_parser.hpp:22
void attribute(const orcus::sax::parser_attribute &attr)
Definition: sax_parser.hpp:107
Definition: sax_parser_base.hpp:100
Definition: sax_parser.hpp:25
void end_declaration(const orcus::pstring &decl)
Definition: sax_parser.hpp:55
Definition: parser_base.hpp:40
void start_element(const orcus::sax::parser_element &elem)
Definition: sax_parser.hpp:65
void doctype(const orcus::sax::doctype_declaration ¶m)
Definition: sax_parser.hpp:33
Definition: sax_parser_base.hpp:85
Definition: sax_parser_base.hpp:45
Definition: base64.hpp:15
Definition: sax_parser.hpp:118
Definition: sax_parser_base.hpp:108
void characters(const orcus::pstring &val, bool transient)
Definition: sax_parser.hpp:94