Line data Source code
1 : /**
2 : Copyright (c) 2016-2022 Roman Katuntsev <sbkarr@stappler.org>
3 : Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
4 :
5 : Permission is hereby granted, free of charge, to any person obtaining a copy
6 : of this software and associated documentation files (the "Software"), to deal
7 : in the Software without restriction, including without limitation the rights
8 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 : copies of the Software, and to permit persons to whom the Software is
10 : furnished to do so, subject to the following conditions:
11 :
12 : The above copyright notice and this permission notice shall be included in
13 : all copies or substantial portions of the Software.
14 :
15 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 : THE SOFTWARE.
22 : **/
23 :
24 : #include "SPHtmlParser.h"
25 :
26 : namespace STAPPLER_VERSIONIZED stappler::html {
27 :
28 : using HtmlIdentifier16 = chars::Compose<char16_t,
29 : chars::Range<char16_t, u'0', u'9'>,
30 : chars::Range<char16_t, u'A', u'Z'>,
31 : chars::Range<char16_t, u'a', u'z'>,
32 : chars::Chars<char16_t, u'_', u'-', u'!', u'/', u':'>
33 : >;
34 :
35 : using HtmlIdentifier8 = chars::Compose<char,
36 : chars::Range<char, u'0', u'9'>,
37 : chars::Range<char, u'A', u'Z'>,
38 : chars::Range<char, u'a', u'z'>,
39 : chars::Chars<char, u'_', u'-', u'!', u'/', u':'>
40 : >;
41 :
42 :
43 100 : template <> StringViewUtf8 Tag_readName<StringViewUtf8>(StringViewUtf8 &is) {
44 100 : StringViewUtf8 s = is;
45 100 : s.skipUntil<HtmlIdentifier16, StringViewUtf8::MatchChars<'>', '?'>>();
46 100 : if (s.is("!--")) {
47 25 : auto ret = StringViewUtf8(s, "!--"_len);
48 25 : s += "!--"_len;
49 25 : is = s;
50 25 : return ret;
51 : }
52 75 : StringViewUtf8 name(s.readChars<HtmlIdentifier16, StringViewUtf8::MatchChars<'?'>>());
53 75 : if (name.size() > 1 && name.back() == '/') {
54 25 : name.set(name.data(), name.size() - 1);
55 25 : is += (is.size() - s.size() - 1);
56 : } else {
57 50 : s.skipUntil<HtmlIdentifier16, StringViewUtf8::MatchChars<'>'>>();
58 50 : is = s;
59 : }
60 75 : return name;
61 : }
62 :
63 25 : template <> StringViewUtf8 Tag_readAttrName<StringViewUtf8>(StringViewUtf8 &s) {
64 25 : s.skipUntil<HtmlIdentifier16>();
65 25 : StringViewUtf8 name(s.readChars<HtmlIdentifier16>());
66 25 : return name;
67 : }
68 :
69 150 : template <> StringViewUtf8 Tag_readAttrValue<StringViewUtf8>(StringViewUtf8 &s) {
70 150 : s.skipChars<StringViewUtf8::WhiteSpace>();
71 150 : if (!s.is('=')) {
72 25 : s.skipUntil<HtmlIdentifier16>();
73 25 : return StringViewUtf8();
74 : }
75 :
76 125 : s ++;
77 125 : char quoted = 0;
78 125 : if (s.is('"') || s.is('\'')) {
79 100 : quoted = s[0];
80 100 : s ++;
81 100 : StringViewUtf8 tmp = s;
82 300 : while (!s.empty() && !s.is(quoted)) {
83 200 : if (quoted == '"') {
84 100 : s.skipUntil<StringViewUtf8::MatchChars<u'\\', u'"'>>();
85 : } else {
86 100 : s.skipUntil<StringViewUtf8::MatchChars<u'\\', u'\''>>();
87 : }
88 200 : if (s.is('\\')) {
89 100 : s += 2;
90 : }
91 : }
92 :
93 100 : StringViewUtf8 ret(tmp.data(), tmp.size() - s.size());
94 100 : if (s.is(quoted)) {
95 100 : s ++;
96 : }
97 100 : s.skipUntil<HtmlIdentifier16, StringViewUtf8::MatchChars<'>'>>();
98 100 : return ret;
99 : }
100 :
101 25 : return s.readChars<HtmlIdentifier16>();
102 : }
103 :
104 :
105 6375 : template <> StringView Tag_readName<StringView>(StringView &is) {
106 6375 : StringView s = is;
107 6375 : s.skipUntil<HtmlIdentifier8, StringView::MatchChars<'>', '?'>>();
108 6375 : if (s.is("!--")) {
109 50 : auto ret = StringView(s, "!--"_len);
110 50 : s += "!--"_len;
111 50 : is = s;
112 50 : return ret;
113 : }
114 6325 : StringView name(s.readChars<HtmlIdentifier8, StringView::MatchChars<'?'>>());
115 6325 : if (name.size() > 1 && name.back() == '/') {
116 25 : name.set(name.data(), name.size() - 1);
117 25 : is += (is.size() - s.size() - 1);
118 : } else {
119 6300 : s.skipUntil<HtmlIdentifier8, StringView::MatchChars<'>'>>();
120 6300 : is = s;
121 : }
122 6325 : return name;
123 : }
124 :
125 14000 : template <> StringView Tag_readAttrName<StringView>(StringView &s) {
126 14000 : s.skipUntil<HtmlIdentifier8>();
127 14000 : StringView name(s.readChars<HtmlIdentifier8>());
128 14000 : return name;
129 : }
130 :
131 14125 : template <> StringView Tag_readAttrValue<StringView>(StringView &s) {
132 14125 : s.skipChars<StringView::WhiteSpace>();
133 14125 : if (!s.is('=')) {
134 25 : s.skipUntil<HtmlIdentifier8>();
135 25 : return StringView();
136 : }
137 :
138 14100 : s ++;
139 14100 : char quoted = 0;
140 14100 : if (s.is('"') || s.is('\'')) {
141 14050 : quoted = s[0];
142 14050 : s ++;
143 14050 : StringView tmp = s;
144 28200 : while (!s.empty() && !s.is(quoted)) {
145 14150 : if (quoted == '"') {
146 14050 : s.skipUntil<StringView::MatchChars<'\\', '"'>>();
147 : } else {
148 100 : s.skipUntil<StringView::MatchChars<'\\', '\''>>();
149 : }
150 14150 : if (s.is('\\')) {
151 100 : s += 2;
152 : }
153 : }
154 :
155 14050 : StringView ret(tmp.data(), tmp.size() - s.size());
156 14050 : if (s.is(quoted)) {
157 14050 : s ++;
158 : }
159 14050 : s.skipUntil<HtmlIdentifier8, StringView::MatchChars<'>'>>();
160 14050 : return ret;
161 : }
162 :
163 50 : return s.readChars<HtmlIdentifier8>();
164 : }
165 :
166 :
167 100 : template <> WideStringView Tag_readName<WideStringView>(WideStringView &is) {
168 100 : WideStringView s = is;
169 100 : s.skipUntil<HtmlIdentifier16, WideStringView::MatchChars<u'>', u'?'>>();
170 100 : if (s.is(u"!--")) {
171 25 : auto ret = WideStringView(s, u"!--"_len);
172 25 : s += u"!--"_len;
173 25 : is = s;
174 25 : return ret;
175 : }
176 75 : WideStringView name(s.readChars<HtmlIdentifier16, WideStringView::MatchChars<u'?'>>());
177 75 : if (name.size() > 1 && name.back() == '/') {
178 25 : name.set(name.data(), name.size() - 1);
179 25 : is += (is.size() - s.size() - 1);
180 : } else {
181 50 : s.skipUntil<HtmlIdentifier16, WideStringView::MatchChars<u'>'>>();
182 50 : is = s;
183 : }
184 75 : return name;
185 : }
186 :
187 25 : template <> WideStringView Tag_readAttrName<WideStringView>(WideStringView &s) {
188 25 : s.skipUntil<HtmlIdentifier16>();
189 25 : WideStringView name(s.readChars<HtmlIdentifier16>());
190 25 : return name;
191 : }
192 :
193 150 : template <> WideStringView Tag_readAttrValue<WideStringView>(WideStringView &s) {
194 150 : s.skipChars<WideStringView::WhiteSpace>();
195 150 : if (!s.is('=')) {
196 25 : s.skipUntil<HtmlIdentifier16>();
197 25 : return WideStringView();
198 : }
199 :
200 125 : s ++;
201 125 : char16_t quoted = 0;
202 125 : if (s.is(u'"') || s.is(u'\'')) {
203 100 : quoted = s[0];
204 100 : s ++;
205 100 : WideStringView tmp = s;
206 300 : while (!s.empty() && !s.is(quoted)) {
207 200 : if (quoted == '"') {
208 100 : s.skipUntil<WideStringView::MatchChars<u'\\', u'"'>>();
209 : } else {
210 100 : s.skipUntil<WideStringView::MatchChars<u'\\', u'\''>>();
211 : }
212 200 : if (s.is('\\')) {
213 100 : s += 2;
214 : }
215 : }
216 :
217 100 : WideStringView ret(tmp.data(), tmp.size() - s.size());
218 100 : if (s.is(quoted)) {
219 100 : s ++;
220 : }
221 100 : s.skipUntil<HtmlIdentifier16, WideStringView::MatchChars<u'>'>>();
222 100 : return ret;
223 : }
224 :
225 25 : return s.readChars<HtmlIdentifier16>();
226 : }
227 :
228 : }
|