libfilezilla
Loading...
Searching...
No Matches
xml.hpp
Go to the documentation of this file.
1#ifndef LIBFILEZILLA_XML_HEADER
2#define LIBFILEZILLA_XML_HEADER
3
10#include <functional>
11#include <string>
12#include <tuple>
13#include <vector>
14
15#include "buffer.hpp"
16#include "logger.hpp"
17
18namespace fz {
19
20namespace xml {
21
24{
26 open,
27
29 close,
30
33
36 value,
37
40
41 doctype,
42};
43
53class FZ_PUBLIC_SYMBOL parser final
54{
55public:
62 typedef std::function<bool(callback_event type, std::string_view path, std::string_view name, std::string && value)> callback_t;
63
64 parser();
65 parser(callback_t const& cb);
66 parser(callback_t && cb);
67
71 void set_callback(callback_t const& cb);
72
74 bool parse(std::string_view data);
75
78 bool finalize();
79
81 std::string get_error() const;
82
85 void set_limits(size_t value_size_limit, size_t path_size_limit);
86
87private:
88 bool FZ_PRIVATE_SYMBOL decode_ref();
89 bool FZ_PRIVATE_SYMBOL is_valid_tag_or_attr(std::string_view s) const;
90 bool FZ_PRIVATE_SYMBOL normalize_value();
91
92 bool FZ_PRIVATE_SYMBOL parse_valid_utf8(std::string_view data);
93 bool FZ_PRIVATE_SYMBOL parse(char const* const begin, char const* const end);
94 void FZ_PRIVATE_SYMBOL set_error(std::string_view msg, size_t offset);
95
96 bool FZ_PRIVATE_SYMBOL deduce_encoding(std::string_view & data);
97
98 enum class state {
99 content,
100 tag_start, // Just after reading <
101 tag_name, // Reading tag name
102 tag_closing, // In a closing tag, matching the tag name
103 tag_end, // Just before reading >
104
105 attributes,
106 attribute_name,
107 attribute_equal,
108 attribute_quote,
109 attribute_value,
110
111 // <?xml and other parsing intructions
112 pi,
113 pi_value,
114
115 // entity and character references
116 reference,
117 attrvalue_reference,
118
119 comment_start,
120 comment_end,
121
122 doctype_start,
123 doctype_name,
124 doctype_value,
125
126 cdata_start,
127 cdata_end,
128
129 done,
130 error
131 };
132
133 callback_t cb_;
134
135 std::string path_;
136 std::vector<size_t> nodes_;
137 std::string name_;
138 std::string value_;
139 size_t processed_{};
140 std::string converted_{};
141
142 size_t path_size_limit_{1024*1024};
143 size_t value_size_limit_{10*1024*1024};
144
145 union {
146 size_t utf8_state_{};
147 uint32_t utf16_state_;
148 };
149
150 state s_{ state::content };
151
152 enum class encoding {
153 unknown,
154 utf8,
155 utf16le,
156 utf16be
157 };
158 encoding encoding_{};
159
160 union {
161 size_t tag_match_pos_{};
162 char quotes_;
163 unsigned char dashes_;
164 };
165
166 bool got_xmldecl_{};
167 bool got_doctype_{};
168 bool got_element_{};
169};
170
183class FZ_PUBLIC_SYMBOL namespace_parser final
184{
185public:
189
190 void set_callback(parser::callback_t && cb);
191 void set_callback(parser::callback_t const& cb);
192
193 bool parse(std::string_view data);
194 bool finalize();
195
196 std::string get_error() const;
197
199 typedef std::function<bool(callback_event type, std::string_view path, std::string_view name, std::string_view value)> raw_callback_t;
200 void set_raw_callback(raw_callback_t && cb);
201 void set_raw_callback(raw_callback_t const& cb);
202private:
203 std::string_view FZ_PRIVATE_SYMBOL apply_namespaces(std::string_view in);
204 bool FZ_PRIVATE_SYMBOL apply_namespace_to_path();
205
206 bool FZ_PRIVATE_SYMBOL on_callback(callback_event type, std::string_view path, std::string_view name, std::string && value);
207
208 parser parser_;
209
211 raw_callback_t raw_cb_;
212
213 std::string path_;
214 fz::buffer applied_;
215 std::vector<size_t> nodes_;
216 std::vector<std::pair<std::string, std::string>> attributes_;
217 std::vector<std::tuple<size_t, std::string, std::string>> namespaces_;
218 bool needs_namespace_expansion_{};
219 bool error_{};
220};
221
223class FZ_PUBLIC_SYMBOL pretty_printer
224{
225public:
226 pretty_printer() = default;
227 virtual ~pretty_printer();
228
229 void log(callback_event type, std::string_view, std::string_view name, std::string_view value);
230
231protected:
232 virtual void on_line(std::string_view line) = 0;
233
234private:
235 void FZ_PRIVATE_SYMBOL finish_line();
236 void FZ_PRIVATE_SYMBOL print_line();
237
238 size_t depth_{};
239 std::string value_;
240 std::string line_;
241};
242
244class FZ_PUBLIC_SYMBOL pretty_logger final : public pretty_printer
245{
246public:
248
249protected:
250 virtual void on_line(std::string_view line) override;
251
252 logmsg::type level_;
253 logger_interface & logger_;
254};
255
256
257}
258}
259
260#endif
Declares fz::buffer.
The buffer class is a simple buffer where data can be appended at the end and consumed at the front....
Definition: buffer.hpp:27
Abstract interface for logging strings.
Definition: logger.hpp:51
A stremable XML parser that resolves namespace declarations and namespace prefixes.
Definition: xml.hpp:184
std::function< bool(callback_event type, std::string_view path, std::string_view name, std::string_view value)> raw_callback_t
Additional raw callback to look at events before namespace processing takes place.
Definition: xml.hpp:199
A streaming XML parser.
Definition: xml.hpp:54
void set_callback(callback_t &&cb)
std::string get_error() const
Returns an error description. Empty if parsing was stopped by a callback.
void set_limits(size_t value_size_limit, size_t path_size_limit)
bool parse(std::string_view data)
Processes the block of data. Can be partial.
std::function< bool(callback_event type, std::string_view path, std::string_view name, std::string &&value)> callback_t
Definition: xml.hpp:62
Pretty-prints XML as it is being parsed to a logger.
Definition: xml.hpp:245
A slow pretty printer for XML as it is being parsed.
Definition: xml.hpp:224
Interface for logging.
type
Definition: logger.hpp:16
The namespace used by libfilezilla.
Definition: apply.hpp:17
callback_event
Types of callbacks when parsing XML.
Definition: xml.hpp:24
@ close
An element is closed, passed name.
@ parsing_instruction
Parsing instruction, includes the <?xml?> declaration. Until first space in name, rest in value.
@ open
An element is opened, passed name.
@ attribute
Attribute in name and value.