1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
// file : web/xhtml-fragment.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
#include <web/xhtml-fragment.hxx>
#include <string>
#include <cassert>
#include <libstudxml/parser.hxx>
#include <libstudxml/serializer.hxx>
#include <web/xhtml.hxx>
using namespace std;
using namespace xml;
namespace web
{
namespace xhtml
{
fragment::
fragment (const string& text, const string& name, size_t length)
{
// To parse the fragment make it a valid xml document, wrapping with the
// root element. If requested, truncate the fragment before the
// first-level element when the content length limit is exceeded.
//
string doc ("<d>" + text + "</d>");
parser p (doc.c_str (),
doc.size (),
name,
parser::receive_elements |
parser::receive_characters |
parser::receive_attributes_event);
size_t len (0);
size_t level (0);
for (parser::event_type e: p)
{
switch (e)
{
case parser::start_element:
{
truncated = length != 0 && level == 1 && len >= length;
if (truncated)
break;
++level;
}
// Fall through.
case parser::start_attribute:
{
const auto& n (p.qname ());
if (!n.namespace_ ().empty ())
throw parsing (
name, p.line (), p.column (), "namespace is not allowed");
events_.emplace_back (e, n.name ());
break;
}
case parser::end_element:
{
--level;
}
// Fall through.
case parser::end_attribute:
{
events_.emplace_back (e, "");
break;
}
case parser::characters:
{
string& s (p.value ());
assert (!events_.empty ()); // Contains root element start.
if (events_.back ().first != parser::start_attribute)
len += s.size ();
events_.emplace_back (e, move (s));
break;
}
default:
assert (false);
}
if (truncated)
{
events_.emplace_back (parser::end_element, ""); // Close root.
break;
}
}
// Unwrap the fragment removing the root element events.
//
assert (events_.size () >= 2);
events_.erase (events_.begin ());
events_.pop_back ();
}
void fragment::
operator() (serializer& s) const
{
for (const auto& e: events_)
{
switch (e.first)
{
case parser::start_element:
{
s.start_element (xmlns, e.second);
break;
}
case parser::start_attribute:
{
s.start_attribute (e.second);
break;
}
case parser::end_element:
{
s.end_element ();
break;
}
case parser::end_attribute:
{
s.end_attribute ();
break;
}
case parser::characters:
{
s.characters (e.second);
break;
}
default:
assert (false);
}
}
}
}
}
|