1 /*************************************************************************
3 * Copyright 2016 Realm Inc.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 **************************************************************************/
19 #ifndef REALM_STRING_HPP
20 #define REALM_STRING_HPP
22 #include <realm/null.hpp>
23 #include <realm/util/features.h>
24 #include <realm/util/optional.hpp>
37 /// A reference to a chunk of character data.
39 /// An instance of this class can be thought of as a type tag on a region of
40 /// memory. It does not own the referenced memory, nor does it in any other way
41 /// attempt to manage the lifetime of it.
43 /// A null character inside the referenced region is considered a part of the
46 /// For compatibility with C-style strings, when a string is stored in a Realm
47 /// database, it is always followed by a terminating null character, regardless
48 /// of whether the string itself has internal null characters. This means that
49 /// when a StringData object is extracted from Realm, the referenced region is
50 /// guaranteed to be followed immediately by an extra null character, but that
51 /// null character is not inside the referenced region. Therefore, all of the
52 /// following forms are guaranteed to return a pointer to a null-terminated
57 /// group.get_table_name(...).data()
58 /// table.get_column_name().data()
59 /// table.get_string(...).data()
60 /// table.get_mixed(...).get_string().data()
64 /// Note that in general, no assumptions can be made about what follows a string
65 /// that is referenced by a StringData object, or whether anything follows it at
66 /// all. In particular, the receiver of a StringData object cannot assume that
67 /// the referenced string is followed by a null character unless there is an
68 /// externally provided guarantee.
70 /// This class makes it possible to distinguish between a 'null' reference and a
71 /// reference to the empty string (see is_null()).
77 /// Construct a null reference.
78 StringData() noexcept;
80 /// If \a external_data is 'null', \a data_size must be zero.
81 StringData(const char* external_data, size_t data_size) noexcept;
83 template <class T, class A>
84 StringData(const std::basic_string<char, T, A>&);
86 template <class T, class A>
87 operator std::basic_string<char, T, A>() const;
89 // StringData does not store data, callers must manage their own strings.
90 template <class T, class A>
91 StringData(const std::basic_string<char, T, A>&&) = delete;
93 template <class T, class A>
94 StringData(const util::Optional<std::basic_string<char, T, A>>&);
96 StringData(const null&) noexcept;
98 /// Initialize from a zero terminated C style string. Pass null to construct
100 StringData(const char* c_str) noexcept;
102 char operator[](size_t i) const noexcept;
104 const char* data() const noexcept;
105 size_t size() const noexcept;
107 /// Is this a null reference?
109 /// An instance of StringData is a null reference when, and only when the
110 /// stored size is zero (size()) and the stored pointer is the null pointer
113 /// In the case of the empty string, the stored size is still zero, but the
114 /// stored pointer is **not** the null pointer. It could for example point
115 /// to the empty string literal. Note that the actual value of the pointer
116 /// is immaterial in this case (as long as it is not zero), because when the
117 /// size is zero, it is an error to dereference the pointer.
119 /// Conversion of a StringData object to `bool` yields the logical negation
120 /// of the result of calling this function. In other words, a StringData
121 /// object is converted to true if it is not the null reference, otherwise
122 /// it is converted to false.
123 bool is_null() const noexcept;
125 friend bool operator==(const StringData&, const StringData&) noexcept;
126 friend bool operator!=(const StringData&, const StringData&) noexcept;
129 /// Trivial bytewise lexicographical comparison.
130 friend bool operator<(const StringData&, const StringData&) noexcept;
131 friend bool operator>(const StringData&, const StringData&) noexcept;
132 friend bool operator<=(const StringData&, const StringData&) noexcept;
133 friend bool operator>=(const StringData&, const StringData&) noexcept;
136 bool begins_with(StringData) const noexcept;
137 bool ends_with(StringData) const noexcept;
138 bool contains(StringData) const noexcept;
139 bool contains(StringData d, const std::array<uint8_t, 256> &charmap) const noexcept;
141 // Wildcard matching ('?' for single char, '*' for zero or more chars)
142 // case insensitive version in unicode.hpp
143 bool like(StringData) const noexcept;
146 /// Undefined behavior if \a n, \a i, or <tt>i+n</tt> is greater than
148 StringData prefix(size_t n) const noexcept;
149 StringData suffix(size_t n) const noexcept;
150 StringData substr(size_t i, size_t n) const noexcept;
151 StringData substr(size_t i) const noexcept;
154 template <class C, class T>
155 friend std::basic_ostream<C, T>& operator<<(std::basic_ostream<C, T>&, const StringData&);
157 explicit operator bool() const noexcept;
163 static bool matchlike(const StringData& text, const StringData& pattern) noexcept;
164 static bool matchlike_ins(const StringData& text, const StringData& pattern_upper,
165 const StringData& pattern_lower) noexcept;
167 friend bool string_like_ins(StringData, StringData) noexcept;
168 friend bool string_like_ins(StringData, StringData, StringData) noexcept;
174 inline StringData::StringData() noexcept
180 inline StringData::StringData(const char* external_data, size_t data_size) noexcept
181 : m_data(external_data)
184 REALM_ASSERT_DEBUG(external_data || data_size == 0);
187 template <class T, class A>
188 inline StringData::StringData(const std::basic_string<char, T, A>& s)
194 template <class T, class A>
195 inline StringData::operator std::basic_string<char, T, A>() const
197 return std::basic_string<char, T, A>(m_data, m_size);
200 template <class T, class A>
201 inline StringData::StringData(const util::Optional<std::basic_string<char, T, A>>& s)
202 : m_data(s ? s->data() : nullptr)
203 , m_size(s ? s->size() : 0)
207 inline StringData::StringData(const null&) noexcept
213 inline StringData::StringData(const char* c_str) noexcept
218 m_size = std::char_traits<char>::length(c_str);
221 inline char StringData::operator[](size_t i) const noexcept
226 inline const char* StringData::data() const noexcept
231 inline size_t StringData::size() const noexcept
236 inline bool StringData::is_null() const noexcept
241 inline bool operator==(const StringData& a, const StringData& b) noexcept
243 return a.m_size == b.m_size && a.is_null() == b.is_null() && safe_equal(a.m_data, a.m_data + a.m_size, b.m_data);
246 inline bool operator!=(const StringData& a, const StringData& b) noexcept
251 inline bool operator<(const StringData& a, const StringData& b) noexcept
253 if (a.is_null() && !b.is_null()) {
254 // Null strings are smaller than all other strings, and not
255 // equal to empty strings.
258 return std::lexicographical_compare(a.m_data, a.m_data + a.m_size, b.m_data, b.m_data + b.m_size);
261 inline bool operator>(const StringData& a, const StringData& b) noexcept
266 inline bool operator<=(const StringData& a, const StringData& b) noexcept
271 inline bool operator>=(const StringData& a, const StringData& b) noexcept
276 inline bool StringData::begins_with(StringData d) const noexcept
278 if (is_null() && !d.is_null())
280 return d.m_size <= m_size && safe_equal(m_data, m_data + d.m_size, d.m_data);
283 inline bool StringData::ends_with(StringData d) const noexcept
285 if (is_null() && !d.is_null())
287 return d.m_size <= m_size && safe_equal(m_data + m_size - d.m_size, m_data + m_size, d.m_data);
290 inline bool StringData::contains(StringData d) const noexcept
292 if (is_null() && !d.is_null())
295 return d.m_size == 0 || std::search(m_data, m_data + m_size, d.m_data, d.m_data + d.m_size) != m_data + m_size;
298 /// This method takes an array that maps chars to distance that can be moved (and zero for chars not in needle),
299 /// allowing the method to apply Boyer-Moore for quick substring search
300 /// The map is calculated in the StringNode<Contains> class (so it can be reused across searches)
301 inline bool StringData::contains(StringData d, const std::array<uint8_t, 256> &charmap) const noexcept
303 if (is_null() && !d.is_null())
306 size_t needle_size = d.size();
307 if (needle_size == 0)
310 // Prepare vars to avoid lookups in loop
311 size_t last_char_pos = d.size()-1;
312 unsigned char lastChar = d[last_char_pos];
314 // Do Boyer-Moore search
315 size_t p = last_char_pos;
317 unsigned char c = m_data[p]; // Get candidate for last char
320 StringData candidate = substr(p-needle_size+1, needle_size);
322 return true; // text found!
325 // If we don't have a match, see how far we can move char_pos
327 p += needle_size; // char was not present in search string
335 inline bool StringData::like(StringData d) const noexcept
337 if (is_null() || d.is_null()) {
338 return (is_null() && d.is_null());
341 return matchlike(*this, d);
344 inline StringData StringData::prefix(size_t n) const noexcept
349 inline StringData StringData::suffix(size_t n) const noexcept
351 return substr(m_size - n);
354 inline StringData StringData::substr(size_t i, size_t n) const noexcept
356 return StringData(m_data + i, n);
359 inline StringData StringData::substr(size_t i) const noexcept
361 return substr(i, m_size - i);
364 template <class C, class T>
365 inline std::basic_ostream<C, T>& operator<<(std::basic_ostream<C, T>& out, const StringData& d)
367 for (const char* i = d.m_data; i != d.m_data + d.m_size; ++i)
372 inline StringData::operator bool() const noexcept
379 #endif // REALM_STRING_HPP