Orcus
css_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_CSS_PARSER_HPP
9 #define INCLUDED_ORCUS_CSS_PARSER_HPP
10 
11 #define ORCUS_DEBUG_CSS 0
12 
13 #include "orcus/parser_global.hpp"
14 #include "orcus/css_parser_base.hpp"
15 #include "orcus/global.hpp"
16 
17 #include <cassert>
18 
19 #if ORCUS_DEBUG_CSS
20 #include <iostream>
21 using std::cout;
22 using std::endl;
23 #endif
24 
25 namespace orcus {
26 
32 {
33 public:
34  void at_rule_name(const char* p, size_t n)
35  {
36  (void)p; (void)n;
37  }
38 
39  void simple_selector_type(const char* p, size_t n)
40  {
41  (void)p; (void)n;
42  }
43 
44  void simple_selector_class(const char* p, size_t n)
45  {
46  (void)p; (void)n;
47  }
48 
49  void simple_selector_pseudo_element(orcus::css::pseudo_element_t pe)
50  {
51  (void)pe;
52  }
53 
54  void simple_selector_pseudo_class(orcus::css::pseudo_class_t pc)
55  {
56  (void)pc;
57  }
58 
59  void simple_selector_id(const char* p, size_t n)
60  {
61  (void)p; (void)n;
62  }
63 
64  void end_simple_selector() {}
65 
66  void end_selector() {}
67 
68  void combinator(orcus::css::combinator_t combinator)
69  {
70  (void)combinator;
71  }
72 
79  void property_name(const char* p, size_t n)
80  {
81  (void)p; (void)n;
82  }
83 
90  void value(const char* p, size_t n)
91  {
92  (void)p; (void)n;
93  }
94 
102  void rgb(uint8_t red, uint8_t green, uint8_t blue)
103  {
104  (void)red; (void)green; (void)blue;
105  }
106 
116  void rgba(uint8_t red, uint8_t green, uint8_t blue, double alpha)
117  {
118  (void)red; (void)green; (void)blue; (void)alpha;
119  }
120 
128  void hsl(uint8_t hue, uint8_t sat, uint8_t light)
129  {
130  (void)hue; (void)sat; (void)light;
131  }
132 
142  void hsla(uint8_t hue, uint8_t sat, uint8_t light, double alpha)
143  {
144  (void)hue; (void)sat; (void)light; (void)alpha;
145  }
146 
153  void url(const char* p, size_t n)
154  {
155  (void)p; (void)n;
156  }
157 
161  void begin_parse() {}
162 
166  void end_parse() {}
167 
172  void begin_block() {}
173 
178  void end_block() {}
179 
183  void begin_property() {}
184 
188  void end_property() {}
189 };
190 
191 template<typename _Handler>
193 {
194 public:
195  typedef _Handler handler_type;
196 
197  css_parser(const char* p, size_t n, handler_type& hdl);
198  void parse();
199 
200 private:
201  // Handlers - at the time a handler is called the current position is
202  // expected to point to the first unprocessed non-blank character, and
203  // each handler must set the current position to the next unprocessed
204  // non-blank character when it finishes.
205  void rule();
206  void at_rule_name();
207  void simple_selector_name();
208  void property_name();
209  void property();
210  void quoted_value(char c);
211  void value();
212  void function_value(const char* p, size_t len);
213  void function_rgb(bool alpha);
214  void function_hsl(bool alpha);
215  void function_url();
216  void name_sep();
217  void property_sep();
218  void block();
219 
220  handler_type& m_handler;
221 };
222 
223 template<typename _Handler>
224 css_parser<_Handler>::css_parser(const char* p, size_t n, handler_type& hdl) :
225  css::parser_base(p, n), m_handler(hdl) {}
226 
227 template<typename _Handler>
228 void css_parser<_Handler>::parse()
229 {
230  shrink_stream();
231 
232 #if ORCUS_DEBUG_CSS
233  std::cout << "compressed: '";
234  const char* p = mp_char;
235  for (; p != mp_end; ++p)
236  std::cout << *p;
237  std::cout << "'" << std::endl;
238 #endif
239  m_handler.begin_parse();
240  while (has_char())
241  rule();
242  m_handler.end_parse();
243 }
244 
245 template<typename _Handler>
246 void css_parser<_Handler>::rule()
247 {
248  // <selector name> , ... , <selector name> <block>
249  while (has_char())
250  {
251  if (skip_comment())
252  continue;
253 
254  char c = cur_char();
255  if (is_alpha(c))
256  {
257  simple_selector_name();
258  continue;
259  }
260 
261  switch (c)
262  {
263  case '>':
264  set_combinator(c, css::combinator_t::direct_child);
265  break;
266  case '+':
267  set_combinator(c, css::combinator_t::next_sibling);
268  break;
269  case '.':
270  case '#':
271  case '@':
272  simple_selector_name();
273  break;
274  case ',':
275  name_sep();
276  break;
277  case '{':
278  reset_before_block();
279  block();
280  break;
281  default:
282  css::parse_error::throw_with("rule: failed to parse '", c, "'");
283  }
284  }
285 }
286 
287 template<typename _Handler>
288 void css_parser<_Handler>::at_rule_name()
289 {
290  assert(has_char());
291  assert(cur_char() == '@');
292  next();
293  char c = cur_char();
294  if (!is_alpha(c))
295  throw css::parse_error("at_rule_name: first character of an at-rule name must be an alphabet.");
296 
297  const char* p;
298  size_t len;
299  identifier(p, len);
300  skip_blanks();
301 
302  m_handler.at_rule_name(p, len);
303 #if ORCUS_DEBUG_CSS
304  std::string foo(p, len);
305  std::cout << "at-rule name: " << foo.c_str() << std::endl;
306 #endif
307 }
308 
309 template<typename _Handler>
310 void css_parser<_Handler>::simple_selector_name()
311 {
312  assert(has_char());
313  char c = cur_char();
314  if (c == '@')
315  {
316  // This is the name of an at-rule.
317  at_rule_name();
318  return;
319  }
320 
321  if (m_simple_selector_count)
322  {
323 #if ORCUS_DEBUG_CSS
324  cout << "combinator: " << m_combinator << endl;
325 #endif
326  m_handler.combinator(m_combinator);
327  m_combinator = css::combinator_t::descendant;
328  }
329  assert(is_alpha(c) || c == '.' || c == '#');
330 
331  const char* p = nullptr;
332  size_t n = 0;
333 
334 #if ORCUS_DEBUG_CSS
335  cout << "simple_selector_name: (" << m_simple_selector_count << ")";
336 #endif
337 
338  if (c != '.' && c != '#')
339  {
340  identifier(p, n);
341 #if ORCUS_DEBUG_CSS
342  std::string s(p, n);
343  cout << " type=" << s;
344 #endif
345  m_handler.simple_selector_type(p, n);
346  }
347 
348  bool in_loop = true;
349  while (in_loop && has_char())
350  {
351  switch (cur_char())
352  {
353  case '.':
354  {
355  next();
356  identifier(p, n);
357  m_handler.simple_selector_class(p, n);
358 #if ORCUS_DEBUG_CSS
359  std::string s(p, n);
360  std::cout << " class=" << s;
361 #endif
362  }
363  break;
364  case '#':
365  {
366  next();
367  identifier(p, n);
368  m_handler.simple_selector_id(p, n);
369 #if ORCUS_DEBUG_CSS
370  std::string s(p, n);
371  std::cout << " id=" << s;
372 #endif
373  }
374  break;
375  case ':':
376  {
377  // This could be either a pseudo element or pseudo class.
378  next();
379  if (cur_char() == ':')
380  {
381  // pseudo element.
382  next();
383  identifier(p, n);
384  css::pseudo_element_t elem = css::to_pseudo_element(p, n);
385  if (!elem)
386  css::parse_error::throw_with(
387  "selector_name: unknown pseudo element '", p, n, "'");
388 
389  m_handler.simple_selector_pseudo_element(elem);
390  }
391  else
392  {
393  // pseudo class (or pseudo element in the older version of CSS).
394  identifier(p, n);
395  css::pseudo_class_t pc = css::to_pseudo_class(p, n);
396  if (!pc)
397  css::parse_error::throw_with(
398  "selector_name: unknown pseudo class '", p, n, "'");
399 
400  m_handler.simple_selector_pseudo_class(pc);
401  }
402  }
403  break;
404  default:
405  in_loop = false;
406  }
407  }
408 
409  m_handler.end_simple_selector();
410  skip_comments_and_blanks();
411 
412  ++m_simple_selector_count;
413 
414 #if ORCUS_DEBUG_CSS
415  std::cout << std::endl;
416 #endif
417 }
418 
419 template<typename _Handler>
420 void css_parser<_Handler>::property_name()
421 {
422  // <identifier>
423 
424  assert(has_char());
425  char c = cur_char();
426  if (!is_alpha(c) && c != '.')
427  css::parse_error::throw_with(
428  "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'");
429 
430  const char* p;
431  size_t len;
432  identifier(p, len);
433  skip_comments_and_blanks();
434 
435  m_handler.property_name(p, len);
436 #if ORCUS_DEBUG_CSS
437  std::string foo(p, len);
438  std::cout << "property name: " << foo.c_str() << std::endl;
439 #endif
440 }
441 
442 template<typename _Handler>
443 void css_parser<_Handler>::property()
444 {
445  // <property name> : <value> , ... , <value>
446 
447  m_handler.begin_property();
448  property_name();
449  if (cur_char() != ':')
450  throw css::parse_error("property: ':' expected.");
451  next();
452  skip_comments_and_blanks();
453 
454  bool in_loop = true;
455  while (in_loop && has_char())
456  {
457  value();
458  char c = cur_char();
459  switch (c)
460  {
461  case ',':
462  {
463  // separated by commas.
464  next();
465  skip_comments_and_blanks();
466  }
467  break;
468  case ';':
469  case '}':
470  in_loop = false;
471  break;
472  default:
473  ;
474  }
475  }
476 
477  skip_comments_and_blanks();
478  m_handler.end_property();
479 }
480 
481 template<typename _Handler>
482 void css_parser<_Handler>::quoted_value(char c)
483 {
484  // Parse until the the end quote is reached.
485  const char* p = nullptr;
486  size_t len = 0;
487  literal(p, len, c);
488  next();
489  skip_blanks();
490 
491  m_handler.value(p, len);
492 #if ORCUS_DEBUG_CSS
493  std::string foo(p, len);
494  std::cout << "quoted value: " << foo.c_str() << std::endl;
495 #endif
496 }
497 
498 template<typename _Handler>
499 void css_parser<_Handler>::value()
500 {
501  assert(has_char());
502  char c = cur_char();
503  if (c == '"' || c == '\'')
504  {
505  quoted_value(c);
506  return;
507  }
508 
509  if (!is_alpha(c) && !is_numeric(c) && !is_in(c, ORCUS_ASCII("-+.#")))
510  css::parse_error::throw_with("value:: illegal first character of a value '", c, "'");
511 
512  const char* p = nullptr;
513  size_t len = 0;
514  identifier(p, len, ORCUS_ASCII(".%"));
515  if (cur_char() == '(')
516  {
517  function_value(p, len);
518  return;
519  }
520 
521  m_handler.value(p, len);
522 
523  skip_comments_and_blanks();
524 
525 #if ORCUS_DEBUG_CSS
526  std::string foo(p, len);
527  std::cout << "value: " << foo.c_str() << std::endl;
528 #endif
529 }
530 
531 template<typename _Handler>
532 void css_parser<_Handler>::function_value(const char* p, size_t len)
533 {
534  assert(cur_char() == '(');
535  css::property_function_t func = css::to_property_function(p, len);
536  if (func == css::property_function_t::unknown)
537  css::parse_error::throw_with("function_value: unknown function '", p, len, "'");
538 
539  // Move to the first character of the first argument.
540  next();
541  skip_comments_and_blanks();
542 
543  switch (func)
544  {
545  case css::property_function_t::rgb:
546  function_rgb(false);
547  break;
548  case css::property_function_t::rgba:
549  function_rgb(true);
550  break;
551  case css::property_function_t::hsl:
552  function_hsl(false);
553  break;
554  case css::property_function_t::hsla:
555  function_hsl(true);
556  break;
557  case css::property_function_t::url:
558  function_url();
559  break;
560  default:
561  css::parse_error::throw_with("function_value: unhandled function '", p, len, "'");
562  }
563 
564  char c = cur_char();
565  if (c != ')')
566  css::parse_error::throw_with("function_value: ')' expected but '", c, "' found.");
567 
568  next();
569  skip_comments_and_blanks();
570 }
571 
572 template<typename _Handler>
573 void css_parser<_Handler>::function_rgb(bool alpha)
574 {
575  // rgb(num, num, num) rgba(num, num, num, float)
576 
577  uint8_t vals[3];
578  uint8_t* p = vals;
579  const uint8_t* plast = p + 2;
580  char c = 0;
581 
582  for (; ; ++p)
583  {
584  *p = parse_uint8();
585 
586  skip_comments_and_blanks();
587 
588  if (p == plast)
589  break;
590 
591  c = cur_char();
592 
593  if (c != ',')
594  css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.");
595 
596  next();
597  skip_comments_and_blanks();
598  }
599 
600  if (alpha)
601  {
602  c = cur_char();
603  if (c != ',')
604  css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.");
605 
606  next();
607  skip_comments_and_blanks();
608 
609  double alpha_val = parse_double_or_throw();
610 
611  alpha_val = clip(alpha_val, 0.0, 1.0);
612  m_handler.rgba(vals[0], vals[1], vals[2], alpha_val);
613  }
614  else
615  m_handler.rgb(vals[0], vals[1], vals[2]);
616 
617 #if ORCUS_DEBUG_CSS
618  std::cout << "rgb";
619  if (alpha)
620  std::cout << 'a';
621  std::cout << '(';
622  p = vals;
623  const uint8_t* pend = plast + 1;
624  for (; p != pend; ++p)
625  std::cout << ' ' << (int)*p;
626  std::cout << " )" << std::endl;
627 #endif
628 }
629 
630 template<typename _Handler>
631 void css_parser<_Handler>::function_hsl(bool alpha)
632 {
633  // hsl(num, percent, percent) hsla(num, percent, percent, float)
634 
635  double hue = parse_double_or_throw(); // casted to uint8_t eventually.
636  hue = clip(hue, 0.0, 360.0);
637  skip_comments_and_blanks();
638 
639  char c = cur_char();
640  if (c != ',')
641  css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
642 
643  next();
644  skip_comments_and_blanks();
645 
646  double sat = parse_percent();
647  sat = clip(sat, 0.0, 100.0);
648  skip_comments_and_blanks();
649 
650  c = cur_char();
651  if (c != ',')
652  css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
653 
654  next();
655  skip_comments_and_blanks();
656 
657  double light = parse_percent();
658  light = clip(light, 0.0, 100.0);
659  skip_comments_and_blanks();
660 
661  if (!alpha)
662  {
663  m_handler.hsl(hue, sat, light);
664  return;
665  }
666 
667  c = cur_char();
668  if (c != ',')
669  css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
670 
671  next();
672  skip_comments_and_blanks();
673 
674  double alpha_val = parse_double_or_throw();
675  alpha_val = clip(alpha_val, 0.0, 1.0);
676  skip_comments_and_blanks();
677  m_handler.hsla(hue, sat, light, alpha_val);
678 }
679 
680 template<typename _Handler>
681 void css_parser<_Handler>::function_url()
682 {
683  char c = cur_char();
684 
685  if (c == '"' || c == '\'')
686  {
687  // Quoted URL value.
688  const char* p;
689  size_t len;
690  literal(p, len, c);
691  next();
692  skip_comments_and_blanks();
693  m_handler.url(p, len);
694 #if ORCUS_DEBUG_CSS
695  std::cout << "url(" << std::string(p, len) << ")" << std::endl;
696 #endif
697  return;
698  }
699 
700  // Unquoted URL value.
701  const char* p;
702  size_t len;
703  skip_to_or_blank(p, len, ORCUS_ASCII(")"));
704  skip_comments_and_blanks();
705  m_handler.url(p, len);
706 #if ORCUS_DEBUG_CSS
707  std::cout << "url(" << std::string(p, len) << ")" << std::endl;
708 #endif
709 }
710 
711 template<typename _Handler>
712 void css_parser<_Handler>::name_sep()
713 {
714  assert(cur_char() == ',');
715 #if ORCUS_DEBUG_CSS
716  std::cout << "," << std::endl;
717 #endif
718  next();
719  skip_blanks();
720  m_handler.end_selector();
721 }
722 
723 template<typename _Handler>
724 void css_parser<_Handler>::property_sep()
725 {
726 #if ORCUS_DEBUG_CSS
727  std::cout << ";" << std::endl;
728 #endif
729  next();
730  skip_comments_and_blanks();
731 }
732 
733 template<typename _Handler>
734 void css_parser<_Handler>::block()
735 {
736  // '{' <property> ';' ... ';' <property> ';'(optional) '}'
737 
738  assert(cur_char() == '{');
739 #if ORCUS_DEBUG_CSS
740  std::cout << "{" << std::endl;
741 #endif
742  m_handler.end_selector();
743  m_handler.begin_block();
744 
745  next();
746  skip_comments_and_blanks();
747 
748  // parse properties.
749  while (has_char())
750  {
751  property();
752  if (cur_char() != ';')
753  break;
754  property_sep();
755  if (cur_char() == '}')
756  // ';' after the last property. This is optional but allowed.
757  break;
758  }
759 
760  if (cur_char() != '}')
761  throw css::parse_error("block: '}' expected.");
762 
763  m_handler.end_block();
764 
765  next();
766  skip_comments_and_blanks();
767 
768 #if ORCUS_DEBUG_CSS
769  std::cout << "}" << std::endl;
770 #endif
771 }
772 
773 }
774 
775 #endif
776 
777 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: css_parser_base.hpp:30
void begin_parse()
Definition: css_parser.hpp:161
void rgb(uint8_t red, uint8_t green, uint8_t blue)
Definition: css_parser.hpp:102
void end_property()
Definition: css_parser.hpp:188
void end_parse()
Definition: css_parser.hpp:166
void value(const char *p, size_t n)
Definition: css_parser.hpp:90
void hsl(uint8_t hue, uint8_t sat, uint8_t light)
Definition: css_parser.hpp:128
Definition: css_parser.hpp:192
void begin_property()
Definition: css_parser.hpp:183
Definition: css_parser.hpp:31
void hsla(uint8_t hue, uint8_t sat, uint8_t light, double alpha)
Definition: css_parser.hpp:142
Definition: parser_base.hpp:40
void url(const char *p, size_t n)
Definition: css_parser.hpp:153
void property_name(const char *p, size_t n)
Definition: css_parser.hpp:79
void begin_block()
Definition: css_parser.hpp:172
Definition: base64.hpp:15
void rgba(uint8_t red, uint8_t green, uint8_t blue, double alpha)
Definition: css_parser.hpp:116
void end_block()
Definition: css_parser.hpp:178