1 /*************************************************************************
6 * [2011] - [2015] Realm Inc
9 * NOTICE: All information contained herein is, and remains
10 * the property of Realm Incorporated and its suppliers,
11 * if any. The intellectual and technical concepts contained
12 * herein are proprietary to Realm Incorporated
13 * and its suppliers and may be covered by U.S. and Foreign Patents,
14 * patents in process, and are protected by trade secret or copyright law.
15 * Dissemination of this information or reproduction of this material
16 * is strictly forbidden unless prior written permission is obtained
17 * from Realm Incorporated.
19 **************************************************************************/
20 #ifndef REALM_UTIL_URI_HPP
21 #define REALM_UTIL_URI_HPP
29 /// \brief A decomposed URI reference.
31 /// A Uri object contains a URI reference decomposed into its 5 main component
32 /// parts (scheme, authority, path, query, and fragment identifier).
34 /// The decomposition process (as carried out by the constructor) performs a
35 /// maximally lenient parsing of the specified URI reference. It does that
36 /// according to the following regular expression (copied verbatimly from
37 /// http://tools.ietf.org/html/rfc3986#appendix-B):
39 /// ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
43 /// ------------------------
48 /// 8 Fragment identifier part
50 /// NOTE: Since this regular expression maches every string, every string is
53 /// NOTE: This class does not attempt to perform any level of validation of URI
54 /// references against the grammer specified in the RFC. Such validation could
55 /// be added later, for example through a new `Uri::validate()`.
57 /// For example, the decomposition of
58 /// "http://www.ietf.org/rfc/rfc2396.txt?foo=bar#chp3" is:
63 /// auth -> "//www.ietf.org"
64 /// path -> "/rfc/rfc2396.txt"
65 /// query -> "?foo=bar"
70 /// This class also provides recomposition of a URI references from their
71 /// component parts, where the parts can be specified individually, or be a
72 /// result of URI resoultion.
74 /// It is important to understand, however, that certain restrictions need to
75 /// apply to each component part in order that the URI reference as a whole is
76 /// self consistent. More concretely, it is necessary to require that the
77 /// component parts at any time must have values that will be preserved across a
78 /// recomposition -> decomposition cycle.
80 /// The actual restrictions on each component part is specified for the
81 /// corresponding setter-method (e.g., set_scheme()).
83 /// Note that component parts resulting from decomposition, canonicalize, or
84 /// from resolution (resolve()) will automatically (by design of the underlying
85 /// algorithm) adhere to these rules.
87 /// Decomposition, recomposition, conanonicalization, and resolution algorithms
88 /// are taken from RFC 3986.
90 /// \sa http://tools.ietf.org/html/rfc3986
95 /// Decompose the specified URI reference into its five main parts.
96 Uri(const std::string&);
98 /// Reconstruct a URI reference from its 5 components.
99 std::string recompose() const;
102 /// Resolve this URI reference against the specified base URI reference
103 /// according to the rules described in section 5.2 of RFC 3986.
105 /// Be aware that a fragment identifier on the base URI reference is never
106 /// carried over to the result. This is in accordance with the RFC.
107 void resolve(const Uri& base, bool strict = true);
110 /// Remove empty URI components. Also, for URI references having either a
111 /// scheme part or an authority part, replace an absent path with "/".
114 /// Get the scheme part of this URI reference including the trailing ":", or
115 /// the empty tring if there is no scheme part.
116 const std::string& get_scheme() const;
118 /// Get the authority part of this URI reference including the leading "//",
119 /// or the empty tring if there is no authority part.
120 const std::string& get_auth() const;
122 /// Same as get_auth() (with no arguments), but parse the authority component
123 /// into userinfo, host, and port subcomponents.
125 /// \return True if, and only if the authority component was present (i.e.,
126 /// not the empty string). When false is returned, none of the specified
127 /// strings will have been modified.
128 bool get_auth(std::string& userinfo, std::string& host, std::string& port) const;
130 /// Get the path part of this URI reference, or the empty tring if there is
132 const std::string& get_path() const;
134 /// Get the query part of this URI reference including the leading "?", or
135 /// the empty tring if there is no query part.
136 const std::string& get_query() const;
138 /// Get the fragment identifier of this URI reference including the leading
139 /// "#", or the empty tring if there is no fragment identifier.
140 const std::string& get_frag() const;
142 /// The specified string must either be empty or have a final ":". Also, it
143 /// must not contain "/", "?", or "#", nor may it contain more than one ":".
145 /// \throw std::invalid_argument If the specified string is not valid
146 /// according to the specified rules.
147 void set_scheme(const std::string&);
149 /// The specified string must either be empty or have "//" as a
150 /// prefix. Also, it must not contain "?" or "#", nor may it contain "/"
151 /// beyond the first two.
153 /// \throw std::invalid_argument If the specified string is not valid
154 /// according to the specified rules.
155 void set_auth(const std::string&);
157 /// The specified string must not contain "?" or "#".
159 /// \throw std::invalid_argument If the specified string is not valid
160 /// according to the specified rules.
161 void set_path(const std::string&);
163 /// The specified string must either be empty or have a leading "?". Also,
164 /// it must not contain "#".
166 /// \throw std::invalid_argument If the specified string is not valid
167 /// according to the specified rules.
168 void set_query(const std::string&);
171 /// Set the query string to the serialized form of the specified set of
172 /// query parameters. This is slightly faster than set_query(q.encode())
173 /// because it avoids the validity check on the string.
174 void set_query(const Params&);
177 /// The specified string must either be empty or have a leading "#".
179 /// \throw std::invalid_argument If the specified string is not valid
180 /// according to the specified rules.
181 void set_frag(const std::string&);
183 bool is_absolute() const;
186 std::string m_scheme, m_auth, m_path, m_query, m_frag;
190 /// uri_percent_encode() uri encodes a string as defined in according to
191 /// https://tools.ietf.org/html/rfc3986#section-2.1
192 /// The unescaped input must be UTF-8 encoded. uri_percent_encode() works
193 /// by replacing each UTF-8 character by three charatcers.
194 /// pct-encoded = "%" HEXDIG HEXDIG
195 /// where HEXDIG HEXDIG is the hexadecimal value of the character.
196 /// HEXDIG is a capital letter for A - F.
197 /// Unreserved chracters are not encoded.
198 /// unreseved = ALPHA / DIGIT / "-" / "." / "_" / "~"
200 /// uri_percent_decode() is the inverse of uri_percent_encode().
201 /// uri_percent_decode() throws std::runtime_error if the input
202 /// is invalid and cannot be decoded.
203 std::string uri_percent_encode(const std::string& unescaped);
204 std::string uri_percent_decode(const std::string& escaped);
213 inline std::string Uri::recompose() const
215 return m_scheme + m_auth + m_path + m_query + m_frag;
218 inline const std::string& Uri::get_scheme() const
223 inline const std::string& Uri::get_auth() const
228 inline const std::string& Uri::get_path() const
233 inline const std::string& Uri::get_query() const
238 inline const std::string& Uri::get_frag() const
243 inline bool Uri::is_absolute() const
245 return !m_scheme.empty();
251 #endif // REALM_UTIL_URI_HPP