1//===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_ADT_STRINGREF_H
10#define LLVM_ADT_STRINGREF_H
11
12#include "llvm/ADT/DenseMapInfo.h"
13#include "llvm/ADT/STLFunctionalExtras.h"
14#include "llvm/ADT/iterator_range.h"
15#include "llvm/Support/Compiler.h"
16#include <algorithm>
17#include <cassert>
18#include <cstddef>
19#include <cstring>
20#include <iterator>
21#include <limits>
22#include <string>
23#include <string_view>
24#include <type_traits>
25#include <utility>
26
27namespace llvm {
28
29 class APInt;
30 class hash_code;
31 template <typename T> class SmallVectorImpl;
32 class StringRef;
33
34 /// Helper functions for StringRef::getAsInteger.
35 LLVM_ABI bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
36 unsigned long long &Result);
37
38 LLVM_ABI bool getAsSignedInteger(StringRef Str, unsigned Radix,
39 long long &Result);
40
41 LLVM_ABI bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
42 unsigned long long &Result);
43 LLVM_ABI bool consumeSignedInteger(StringRef &Str, unsigned Radix,
44 long long &Result);
45
46 /// StringRef - Represent a constant reference to a string, i.e. a character
47 /// array and a length, which need not be null terminated.
48 ///
49 /// This class does not own the string data, it is expected to be used in
50 /// situations where the character data resides in some other buffer, whose
51 /// lifetime extends past that of the StringRef. For this reason, it is not in
52 /// general safe to store a StringRef.
53 class LLVM_GSL_POINTER StringRef {
54 public:
55 static constexpr size_t npos = ~size_t(0);
56
57 using iterator = const char *;
58 using const_iterator = const char *;
59 using size_type = size_t;
60 using value_type = char;
61 using reverse_iterator = std::reverse_iterator<iterator>;
62 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
63
64 private:
65 /// The start of the string, in an external buffer.
66 const char *Data = nullptr;
67
68 /// The length of the string.
69 size_t Length = 0;
70
71 // Workaround memcmp issue with null pointers (undefined behavior)
72 // by providing a specialized version
73 static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
74 if (Length == 0) { return 0; }
75 return ::memcmp(s1: Lhs,s2: Rhs,n: Length);
76 }
77
78 public:
79 /// @name Constructors
80 /// @{
81
82 /// Construct an empty string ref.
83 /*implicit*/ StringRef() = default;
84
85 /// Disable conversion from nullptr. This prevents things like
86 /// if (S == nullptr)
87 StringRef(std::nullptr_t) = delete;
88
89 /// Construct a string ref from a cstring.
90 /*implicit*/ constexpr StringRef(const char *Str LLVM_LIFETIME_BOUND)
91 : Data(Str), Length(Str ?
92 // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen.
93#if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8
94 __builtin_strlen(Str)
95#else
96 std::char_traits<char>::length(s: Str)
97#endif
98 : 0) {
99 }
100
101 /// Construct a string ref from a pointer and length.
102 /*implicit*/ constexpr StringRef(const char *data LLVM_LIFETIME_BOUND,
103 size_t length)
104 : Data(data), Length(length) {}
105
106 /// Construct a string ref from an std::string.
107 /*implicit*/ StringRef(const std::string &Str)
108 : Data(Str.data()), Length(Str.length()) {}
109
110 /// Construct a string ref from an std::string_view.
111 /*implicit*/ constexpr StringRef(std::string_view Str)
112 : Data(Str.data()), Length(Str.size()) {}
113
114 /// @}
115 /// @name Iterators
116 /// @{
117
118 iterator begin() const { return data(); }
119
120 iterator end() const { return data() + size(); }
121
122 reverse_iterator rbegin() const {
123 return std::make_reverse_iterator(i: end());
124 }
125
126 reverse_iterator rend() const {
127 return std::make_reverse_iterator(i: begin());
128 }
129
130 const unsigned char *bytes_begin() const {
131 return reinterpret_cast<const unsigned char *>(begin());
132 }
133 const unsigned char *bytes_end() const {
134 return reinterpret_cast<const unsigned char *>(end());
135 }
136 iterator_range<const unsigned char *> bytes() const {
137 return make_range(x: bytes_begin(), y: bytes_end());
138 }
139
140 /// @}
141 /// @name String Operations
142 /// @{
143
144 /// data - Get a pointer to the start of the string (which may not be null
145 /// terminated).
146 [[nodiscard]] constexpr const char *data() const { return Data; }
147
148 /// empty - Check if the string is empty.
149 [[nodiscard]] constexpr bool empty() const { return size() == 0; }
150
151 /// size - Get the string size.
152 [[nodiscard]] constexpr size_t size() const { return Length; }
153
154 /// front - Get the first character in the string.
155 [[nodiscard]] char front() const {
156 assert(!empty());
157 return data()[0];
158 }
159
160 /// back - Get the last character in the string.
161 [[nodiscard]] char back() const {
162 assert(!empty());
163 return data()[size() - 1];
164 }
165
166 // copy - Allocate copy in Allocator and return StringRef to it.
167 template <typename Allocator>
168 [[nodiscard]] StringRef copy(Allocator &A) const {
169 // Don't request a length 0 copy from the allocator.
170 if (empty())
171 return StringRef();
172 char *S = A.template Allocate<char>(size());
173 std::copy(first: begin(), last: end(), result: S);
174 return StringRef(S, size());
175 }
176
177 /// Check for string equality, ignoring case.
178 [[nodiscard]] bool equals_insensitive(StringRef RHS) const {
179 return size() == RHS.size() && compare_insensitive(RHS) == 0;
180 }
181
182 /// compare - Compare two strings; the result is negative, zero, or positive
183 /// if this string is lexicographically less than, equal to, or greater than
184 /// the \p RHS.
185 [[nodiscard]] int compare(StringRef RHS) const {
186 // Check the prefix for a mismatch.
187 if (int Res =
188 compareMemory(Lhs: data(), Rhs: RHS.data(), Length: std::min(a: size(), b: RHS.size())))
189 return Res < 0 ? -1 : 1;
190
191 // Otherwise the prefixes match, so we only need to check the lengths.
192 if (size() == RHS.size())
193 return 0;
194 return size() < RHS.size() ? -1 : 1;
195 }
196
197 /// Compare two strings, ignoring case.
198 [[nodiscard]] LLVM_ABI int compare_insensitive(StringRef RHS) const;
199
200 /// compare_numeric - Compare two strings, treating sequences of digits as
201 /// numbers.
202 [[nodiscard]] LLVM_ABI int compare_numeric(StringRef RHS) const;
203
204 /// Determine the edit distance between this string and another
205 /// string.
206 ///
207 /// \param Other the string to compare this string against.
208 ///
209 /// \param AllowReplacements whether to allow character
210 /// replacements (change one character into another) as a single
211 /// operation, rather than as two operations (an insertion and a
212 /// removal).
213 ///
214 /// \param MaxEditDistance If non-zero, the maximum edit distance that
215 /// this routine is allowed to compute. If the edit distance will exceed
216 /// that maximum, returns \c MaxEditDistance+1.
217 ///
218 /// \returns the minimum number of character insertions, removals,
219 /// or (if \p AllowReplacements is \c true) replacements needed to
220 /// transform one of the given strings into the other. If zero,
221 /// the strings are identical.
222 [[nodiscard]] LLVM_ABI unsigned
223 edit_distance(StringRef Other, bool AllowReplacements = true,
224 unsigned MaxEditDistance = 0) const;
225
226 [[nodiscard]] LLVM_ABI unsigned
227 edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
228 unsigned MaxEditDistance = 0) const;
229
230 /// str - Get the contents as an std::string.
231 [[nodiscard]] std::string str() const {
232 if (!data())
233 return std::string();
234 return std::string(data(), size());
235 }
236
237 /// @}
238 /// @name Operator Overloads
239 /// @{
240
241 [[nodiscard]] char operator[](size_t Index) const {
242 assert(Index < size() && "Invalid index!");
243 return data()[Index];
244 }
245
246 /// Disallow accidental assignment from a temporary std::string.
247 ///
248 /// The declaration here is extra complicated so that `stringRef = {}`
249 /// and `stringRef = "abc"` continue to select the move assignment operator.
250 template <typename T>
251 std::enable_if_t<std::is_same<T, std::string>::value, StringRef> &
252 operator=(T &&Str) = delete;
253
254 /// @}
255 /// @name Type Conversions
256 /// @{
257
258 constexpr operator std::string_view() const {
259 return std::string_view(data(), size());
260 }
261
262 /// @}
263 /// @name String Predicates
264 /// @{
265
266 /// Check if this string starts with the given \p Prefix.
267 [[nodiscard]] bool starts_with(StringRef Prefix) const {
268 return size() >= Prefix.size() &&
269 compareMemory(Lhs: data(), Rhs: Prefix.data(), Length: Prefix.size()) == 0;
270 }
271 [[nodiscard]] bool starts_with(char Prefix) const {
272 return !empty() && front() == Prefix;
273 }
274
275 /// Check if this string starts with the given \p Prefix, ignoring case.
276 [[nodiscard]] LLVM_ABI bool starts_with_insensitive(StringRef Prefix) const;
277
278 /// Check if this string ends with the given \p Suffix.
279 [[nodiscard]] bool ends_with(StringRef Suffix) const {
280 return size() >= Suffix.size() &&
281 compareMemory(Lhs: end() - Suffix.size(), Rhs: Suffix.data(),
282 Length: Suffix.size()) == 0;
283 }
284 [[nodiscard]] bool ends_with(char Suffix) const {
285 return !empty() && back() == Suffix;
286 }
287
288 /// Check if this string ends with the given \p Suffix, ignoring case.
289 [[nodiscard]] LLVM_ABI bool ends_with_insensitive(StringRef Suffix) const;
290
291 /// @}
292 /// @name String Searching
293 /// @{
294
295 /// Search for the first character \p C in the string.
296 ///
297 /// \returns The index of the first occurrence of \p C, or npos if not
298 /// found.
299 [[nodiscard]] size_t find(char C, size_t From = 0) const {
300 return std::string_view(*this).find(c: C, pos: From);
301 }
302
303 /// Search for the first character \p C in the string, ignoring case.
304 ///
305 /// \returns The index of the first occurrence of \p C, or npos if not
306 /// found.
307 [[nodiscard]] LLVM_ABI size_t find_insensitive(char C,
308 size_t From = 0) const;
309
310 /// Search for the first character satisfying the predicate \p F
311 ///
312 /// \returns The index of the first character satisfying \p F starting from
313 /// \p From, or npos if not found.
314 [[nodiscard]] size_t find_if(function_ref<bool(char)> F,
315 size_t From = 0) const {
316 StringRef S = drop_front(N: From);
317 while (!S.empty()) {
318 if (F(S.front()))
319 return size() - S.size();
320 S = S.drop_front();
321 }
322 return npos;
323 }
324
325 /// Search for the first character not satisfying the predicate \p F
326 ///
327 /// \returns The index of the first character not satisfying \p F starting
328 /// from \p From, or npos if not found.
329 [[nodiscard]] size_t find_if_not(function_ref<bool(char)> F,
330 size_t From = 0) const {
331 return find_if(F: [F](char c) { return !F(c); }, From);
332 }
333
334 /// Search for the first string \p Str in the string.
335 ///
336 /// \returns The index of the first occurrence of \p Str, or npos if not
337 /// found.
338 [[nodiscard]] LLVM_ABI size_t find(StringRef Str, size_t From = 0) const;
339
340 /// Search for the first string \p Str in the string, ignoring case.
341 ///
342 /// \returns The index of the first occurrence of \p Str, or npos if not
343 /// found.
344 [[nodiscard]] LLVM_ABI size_t find_insensitive(StringRef Str,
345 size_t From = 0) const;
346
347 /// Search for the last character \p C in the string.
348 ///
349 /// \returns The index of the last occurrence of \p C, or npos if not
350 /// found.
351 [[nodiscard]] size_t rfind(char C, size_t From = npos) const {
352 size_t I = std::min(a: From, b: size());
353 while (I) {
354 --I;
355 if (data()[I] == C)
356 return I;
357 }
358 return npos;
359 }
360
361 /// Search for the last character \p C in the string, ignoring case.
362 ///
363 /// \returns The index of the last occurrence of \p C, or npos if not
364 /// found.
365 [[nodiscard]] LLVM_ABI size_t rfind_insensitive(char C,
366 size_t From = npos) const;
367
368 /// Search for the last string \p Str in the string.
369 ///
370 /// \returns The index of the last occurrence of \p Str, or npos if not
371 /// found.
372 [[nodiscard]] LLVM_ABI size_t rfind(StringRef Str) const;
373
374 /// Search for the last string \p Str in the string, ignoring case.
375 ///
376 /// \returns The index of the last occurrence of \p Str, or npos if not
377 /// found.
378 [[nodiscard]] LLVM_ABI size_t rfind_insensitive(StringRef Str) const;
379
380 /// Find the first character in the string that is \p C, or npos if not
381 /// found. Same as find.
382 [[nodiscard]] size_t find_first_of(char C, size_t From = 0) const {
383 return find(C, From);
384 }
385
386 /// Find the first character in the string that is in \p Chars, or npos if
387 /// not found.
388 ///
389 /// Complexity: O(size() + Chars.size())
390 [[nodiscard]] LLVM_ABI size_t find_first_of(StringRef Chars,
391 size_t From = 0) const;
392
393 /// Find the first character in the string that is not \p C or npos if not
394 /// found.
395 [[nodiscard]] LLVM_ABI size_t find_first_not_of(char C,
396 size_t From = 0) const;
397
398 /// Find the first character in the string that is not in the string
399 /// \p Chars, or npos if not found.
400 ///
401 /// Complexity: O(size() + Chars.size())
402 [[nodiscard]] LLVM_ABI size_t find_first_not_of(StringRef Chars,
403 size_t From = 0) const;
404
405 /// Find the last character in the string that is \p C, or npos if not
406 /// found.
407 [[nodiscard]] size_t find_last_of(char C, size_t From = npos) const {
408 return rfind(C, From);
409 }
410
411 /// Find the last character in the string that is in \p C, or npos if not
412 /// found.
413 ///
414 /// Complexity: O(size() + Chars.size())
415 [[nodiscard]] LLVM_ABI size_t find_last_of(StringRef Chars,
416 size_t From = npos) const;
417
418 /// Find the last character in the string that is not \p C, or npos if not
419 /// found.
420 [[nodiscard]] LLVM_ABI size_t find_last_not_of(char C,
421 size_t From = npos) const;
422
423 /// Find the last character in the string that is not in \p Chars, or
424 /// npos if not found.
425 ///
426 /// Complexity: O(size() + Chars.size())
427 [[nodiscard]] LLVM_ABI size_t find_last_not_of(StringRef Chars,
428 size_t From = npos) const;
429
430 /// Return true if the given string is a substring of *this, and false
431 /// otherwise.
432 [[nodiscard]] bool contains(StringRef Other) const {
433 return find(Str: Other) != npos;
434 }
435
436 /// Return true if the given character is contained in *this, and false
437 /// otherwise.
438 [[nodiscard]] bool contains(char C) const {
439 return find_first_of(C) != npos;
440 }
441
442 /// Return true if the given string is a substring of *this, and false
443 /// otherwise.
444 [[nodiscard]] bool contains_insensitive(StringRef Other) const {
445 return find_insensitive(Str: Other) != npos;
446 }
447
448 /// Return true if the given character is contained in *this, and false
449 /// otherwise.
450 [[nodiscard]] bool contains_insensitive(char C) const {
451 return find_insensitive(C) != npos;
452 }
453
454 /// @}
455 /// @name Helpful Algorithms
456 /// @{
457
458 /// Return the number of occurrences of \p C in the string.
459 [[nodiscard]] size_t count(char C) const {
460 size_t Count = 0;
461 for (size_t I = 0; I != size(); ++I)
462 if (data()[I] == C)
463 ++Count;
464 return Count;
465 }
466
467 /// Return the number of non-overlapped occurrences of \p Str in
468 /// the string.
469 LLVM_ABI size_t count(StringRef Str) const;
470
471 /// Parse the current string as an integer of the specified radix. If
472 /// \p Radix is specified as zero, this does radix autosensing using
473 /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
474 ///
475 /// If the string is invalid or if only a subset of the string is valid,
476 /// this returns true to signify the error. The string is considered
477 /// erroneous if empty or if it overflows T.
478 template <typename T> bool getAsInteger(unsigned Radix, T &Result) const {
479 if constexpr (std::numeric_limits<T>::is_signed) {
480 long long LLVal;
481 if (getAsSignedInteger(Str: *this, Radix, Result&: LLVal) ||
482 static_cast<T>(LLVal) != LLVal)
483 return true;
484 Result = LLVal;
485 } else {
486 unsigned long long ULLVal;
487 // The additional cast to unsigned long long is required to avoid the
488 // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
489 // 'unsigned __int64' when instantiating getAsInteger with T = bool.
490 if (getAsUnsignedInteger(Str: *this, Radix, Result&: ULLVal) ||
491 static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
492 return true;
493 Result = ULLVal;
494 }
495 return false;
496 }
497
498 /// Parse the current string as an integer of the specified radix. If
499 /// \p Radix is specified as zero, this does radix autosensing using
500 /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
501 ///
502 /// If the string does not begin with a number of the specified radix,
503 /// this returns true to signify the error. The string is considered
504 /// erroneous if empty or if it overflows T.
505 /// The portion of the string representing the discovered numeric value
506 /// is removed from the beginning of the string.
507 template <typename T> bool consumeInteger(unsigned Radix, T &Result) {
508 if constexpr (std::numeric_limits<T>::is_signed) {
509 long long LLVal;
510 if (consumeSignedInteger(Str&: *this, Radix, Result&: LLVal) ||
511 static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
512 return true;
513 Result = LLVal;
514 } else {
515 unsigned long long ULLVal;
516 if (consumeUnsignedInteger(Str&: *this, Radix, Result&: ULLVal) ||
517 static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
518 return true;
519 Result = ULLVal;
520 }
521 return false;
522 }
523
524 /// Parse the current string as an integer of the specified \p Radix, or of
525 /// an autosensed radix if the \p Radix given is 0. The current value in
526 /// \p Result is discarded, and the storage is changed to be wide enough to
527 /// store the parsed integer.
528 ///
529 /// \returns true if the string does not solely consist of a valid
530 /// non-empty number in the appropriate base.
531 ///
532 /// APInt::fromString is superficially similar but assumes the
533 /// string is well-formed in the given radix.
534 LLVM_ABI bool getAsInteger(unsigned Radix, APInt &Result) const;
535
536 /// Parse the current string as an integer of the specified \p Radix. If
537 /// \p Radix is specified as zero, this does radix autosensing using
538 /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
539 ///
540 /// If the string does not begin with a number of the specified radix,
541 /// this returns true to signify the error. The string is considered
542 /// erroneous if empty.
543 /// The portion of the string representing the discovered numeric value
544 /// is removed from the beginning of the string.
545 LLVM_ABI bool consumeInteger(unsigned Radix, APInt &Result);
546
547 /// Parse the current string as an IEEE double-precision floating
548 /// point value. The string must be a well-formed double.
549 ///
550 /// If \p AllowInexact is false, the function will fail if the string
551 /// cannot be represented exactly. Otherwise, the function only fails
552 /// in case of an overflow or underflow, or an invalid floating point
553 /// representation.
554 LLVM_ABI bool getAsDouble(double &Result, bool AllowInexact = true) const;
555
556 /// @}
557 /// @name String Operations
558 /// @{
559
560 // Convert the given ASCII string to lowercase.
561 [[nodiscard]] LLVM_ABI std::string lower() const;
562
563 /// Convert the given ASCII string to uppercase.
564 [[nodiscard]] LLVM_ABI std::string upper() const;
565
566 /// @}
567 /// @name Substring Operations
568 /// @{
569
570 /// Return a reference to the substring from [Start, Start + N).
571 ///
572 /// \param Start The index of the starting character in the substring; if
573 /// the index is npos or greater than the length of the string then the
574 /// empty substring will be returned.
575 ///
576 /// \param N The number of characters to included in the substring. If N
577 /// exceeds the number of characters remaining in the string, the string
578 /// suffix (starting with \p Start) will be returned.
579 [[nodiscard]] constexpr StringRef substr(size_t Start,
580 size_t N = npos) const {
581 Start = std::min(a: Start, b: size());
582 return StringRef(data() + Start, std::min(a: N, b: size() - Start));
583 }
584
585 /// Return a StringRef equal to 'this' but with only the first \p N
586 /// elements remaining. If \p N is greater than the length of the
587 /// string, the entire string is returned.
588 [[nodiscard]] StringRef take_front(size_t N = 1) const {
589 if (N >= size())
590 return *this;
591 return drop_back(N: size() - N);
592 }
593
594 /// Return a StringRef equal to 'this' but with only the last \p N
595 /// elements remaining. If \p N is greater than the length of the
596 /// string, the entire string is returned.
597 [[nodiscard]] StringRef take_back(size_t N = 1) const {
598 if (N >= size())
599 return *this;
600 return drop_front(N: size() - N);
601 }
602
603 /// Return the longest prefix of 'this' such that every character
604 /// in the prefix satisfies the given predicate.
605 [[nodiscard]] StringRef take_while(function_ref<bool(char)> F) const {
606 return substr(Start: 0, N: find_if_not(F));
607 }
608
609 /// Return the longest prefix of 'this' such that no character in
610 /// the prefix satisfies the given predicate.
611 [[nodiscard]] StringRef take_until(function_ref<bool(char)> F) const {
612 return substr(Start: 0, N: find_if(F));
613 }
614
615 /// Return a StringRef equal to 'this' but with the first \p N elements
616 /// dropped.
617 [[nodiscard]] StringRef drop_front(size_t N = 1) const {
618 assert(size() >= N && "Dropping more elements than exist");
619 return substr(Start: N);
620 }
621
622 /// Return a StringRef equal to 'this' but with the last \p N elements
623 /// dropped.
624 [[nodiscard]] StringRef drop_back(size_t N = 1) const {
625 assert(size() >= N && "Dropping more elements than exist");
626 return substr(Start: 0, N: size()-N);
627 }
628
629 /// Return a StringRef equal to 'this', but with all characters satisfying
630 /// the given predicate dropped from the beginning of the string.
631 [[nodiscard]] StringRef drop_while(function_ref<bool(char)> F) const {
632 return substr(Start: find_if_not(F));
633 }
634
635 /// Return a StringRef equal to 'this', but with all characters not
636 /// satisfying the given predicate dropped from the beginning of the string.
637 [[nodiscard]] StringRef drop_until(function_ref<bool(char)> F) const {
638 return substr(Start: find_if(F));
639 }
640
641 /// Returns true if this StringRef has the given prefix and removes that
642 /// prefix.
643 bool consume_front(StringRef Prefix) {
644 if (!starts_with(Prefix))
645 return false;
646
647 *this = substr(Start: Prefix.size());
648 return true;
649 }
650
651 /// Returns true if this StringRef has the given prefix, ignoring case,
652 /// and removes that prefix.
653 bool consume_front_insensitive(StringRef Prefix) {
654 if (!starts_with_insensitive(Prefix))
655 return false;
656
657 *this = substr(Start: Prefix.size());
658 return true;
659 }
660
661 /// Returns true if this StringRef has the given suffix and removes that
662 /// suffix.
663 bool consume_back(StringRef Suffix) {
664 if (!ends_with(Suffix))
665 return false;
666
667 *this = substr(Start: 0, N: size() - Suffix.size());
668 return true;
669 }
670
671 /// Returns true if this StringRef has the given suffix, ignoring case,
672 /// and removes that suffix.
673 bool consume_back_insensitive(StringRef Suffix) {
674 if (!ends_with_insensitive(Suffix))
675 return false;
676
677 *this = substr(Start: 0, N: size() - Suffix.size());
678 return true;
679 }
680
681 /// Return a reference to the substring from [Start, End).
682 ///
683 /// \param Start The index of the starting character in the substring; if
684 /// the index is npos or greater than the length of the string then the
685 /// empty substring will be returned.
686 ///
687 /// \param End The index following the last character to include in the
688 /// substring. If this is npos or exceeds the number of characters
689 /// remaining in the string, the string suffix (starting with \p Start)
690 /// will be returned. If this is less than \p Start, an empty string will
691 /// be returned.
692 [[nodiscard]] StringRef slice(size_t Start, size_t End) const {
693 Start = std::min(a: Start, b: size());
694 End = std::clamp(val: End, lo: Start, hi: size());
695 return StringRef(data() + Start, End - Start);
696 }
697
698 /// Split into two substrings around the first occurrence of a separator
699 /// character.
700 ///
701 /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
702 /// such that (*this == LHS + Separator + RHS) is true and RHS is
703 /// maximal. If \p Separator is not in the string, then the result is a
704 /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
705 ///
706 /// \param Separator The character to split on.
707 /// \returns The split substrings.
708 [[nodiscard]] std::pair<StringRef, StringRef> split(char Separator) const {
709 return split(Separator: StringRef(&Separator, 1));
710 }
711
712 /// Split into two substrings around the first occurrence of a separator
713 /// string.
714 ///
715 /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
716 /// such that (*this == LHS + Separator + RHS) is true and RHS is
717 /// maximal. If \p Separator is not in the string, then the result is a
718 /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
719 ///
720 /// \param Separator - The string to split on.
721 /// \return - The split substrings.
722 [[nodiscard]] std::pair<StringRef, StringRef>
723 split(StringRef Separator) const {
724 size_t Idx = find(Str: Separator);
725 if (Idx == npos)
726 return std::make_pair(x: *this, y: StringRef());
727 return std::make_pair(x: slice(Start: 0, End: Idx), y: substr(Start: Idx + Separator.size()));
728 }
729
730 /// Split into two substrings around the last occurrence of a separator
731 /// string.
732 ///
733 /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
734 /// such that (*this == LHS + Separator + RHS) is true and RHS is
735 /// minimal. If \p Separator is not in the string, then the result is a
736 /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
737 ///
738 /// \param Separator - The string to split on.
739 /// \return - The split substrings.
740 [[nodiscard]] std::pair<StringRef, StringRef>
741 rsplit(StringRef Separator) const {
742 size_t Idx = rfind(Str: Separator);
743 if (Idx == npos)
744 return std::make_pair(x: *this, y: StringRef());
745 return std::make_pair(x: slice(Start: 0, End: Idx), y: substr(Start: Idx + Separator.size()));
746 }
747
748 /// Split into substrings around the occurrences of a separator string.
749 ///
750 /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
751 /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
752 /// elements are added to A.
753 /// If \p KeepEmpty is false, empty strings are not added to \p A. They
754 /// still count when considering \p MaxSplit
755 /// An useful invariant is that
756 /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
757 ///
758 /// \param A - Where to put the substrings.
759 /// \param Separator - The string to split on.
760 /// \param MaxSplit - The maximum number of times the string is split.
761 /// \param KeepEmpty - True if empty substring should be added.
762 LLVM_ABI void split(SmallVectorImpl<StringRef> &A, StringRef Separator,
763 int MaxSplit = -1, bool KeepEmpty = true) const;
764
765 /// Split into substrings around the occurrences of a separator character.
766 ///
767 /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
768 /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
769 /// elements are added to A.
770 /// If \p KeepEmpty is false, empty strings are not added to \p A. They
771 /// still count when considering \p MaxSplit
772 /// An useful invariant is that
773 /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
774 ///
775 /// \param A - Where to put the substrings.
776 /// \param Separator - The string to split on.
777 /// \param MaxSplit - The maximum number of times the string is split.
778 /// \param KeepEmpty - True if empty substring should be added.
779 LLVM_ABI void split(SmallVectorImpl<StringRef> &A, char Separator,
780 int MaxSplit = -1, bool KeepEmpty = true) const;
781
782 /// Split into two substrings around the last occurrence of a separator
783 /// character.
784 ///
785 /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
786 /// such that (*this == LHS + Separator + RHS) is true and RHS is
787 /// minimal. If \p Separator is not in the string, then the result is a
788 /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
789 ///
790 /// \param Separator - The character to split on.
791 /// \return - The split substrings.
792 [[nodiscard]] std::pair<StringRef, StringRef> rsplit(char Separator) const {
793 return rsplit(Separator: StringRef(&Separator, 1));
794 }
795
796 /// Return string with consecutive \p Char characters starting from the
797 /// the left removed.
798 [[nodiscard]] StringRef ltrim(char Char) const {
799 return drop_front(N: std::min(a: size(), b: find_first_not_of(C: Char)));
800 }
801
802 /// Return string with consecutive characters in \p Chars starting from
803 /// the left removed.
804 [[nodiscard]] StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
805 return drop_front(N: std::min(a: size(), b: find_first_not_of(Chars)));
806 }
807
808 /// Return string with consecutive \p Char characters starting from the
809 /// right removed.
810 [[nodiscard]] StringRef rtrim(char Char) const {
811 return drop_back(N: size() - std::min(a: size(), b: find_last_not_of(C: Char) + 1));
812 }
813
814 /// Return string with consecutive characters in \p Chars starting from
815 /// the right removed.
816 [[nodiscard]] StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
817 return drop_back(N: size() - std::min(a: size(), b: find_last_not_of(Chars) + 1));
818 }
819
820 /// Return string with consecutive \p Char characters starting from the
821 /// left and right removed.
822 [[nodiscard]] StringRef trim(char Char) const {
823 return ltrim(Char).rtrim(Char);
824 }
825
826 /// Return string with consecutive characters in \p Chars starting from
827 /// the left and right removed.
828 [[nodiscard]] StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
829 return ltrim(Chars).rtrim(Chars);
830 }
831
832 /// Detect the line ending style of the string.
833 ///
834 /// If the string contains a line ending, return the line ending character
835 /// sequence that is detected. Otherwise return '\n' for unix line endings.
836 ///
837 /// \return - The line ending character sequence.
838 [[nodiscard]] StringRef detectEOL() const {
839 size_t Pos = find(C: '\r');
840 if (Pos == npos) {
841 // If there is no carriage return, assume unix
842 return "\n";
843 }
844 if (Pos + 1 < size() && data()[Pos + 1] == '\n')
845 return "\r\n"; // Windows
846 if (Pos > 0 && data()[Pos - 1] == '\n')
847 return "\n\r"; // You monster!
848 return "\r"; // Classic Mac
849 }
850 /// @}
851 };
852
853 /// A wrapper around a string literal that serves as a proxy for constructing
854 /// global tables of StringRefs with the length computed at compile time.
855 /// In order to avoid the invocation of a global constructor, StringLiteral
856 /// should *only* be used in a constexpr context, as such:
857 ///
858 /// constexpr StringLiteral S("test");
859 ///
860 class StringLiteral : public StringRef {
861 private:
862 constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
863 }
864
865 public:
866 template <size_t N>
867 constexpr StringLiteral(const char (&Str)[N])
868#if defined(__clang__) && __has_attribute(enable_if)
869#pragma clang diagnostic push
870#pragma clang diagnostic ignored "-Wgcc-compat"
871 __attribute((enable_if(__builtin_strlen(Str) == N - 1,
872 "invalid string literal")))
873#pragma clang diagnostic pop
874#endif
875 : StringRef(Str, N - 1) {
876 }
877
878 // Explicit construction for strings like "foo\0bar".
879 template <size_t N>
880 static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
881 return StringLiteral(Str, N - 1);
882 }
883 };
884
885 /// @name StringRef Comparison Operators
886 /// @{
887
888 inline bool operator==(StringRef LHS, StringRef RHS) {
889 if (LHS.size() != RHS.size())
890 return false;
891 if (LHS.empty())
892 return true;
893 return ::memcmp(s1: LHS.data(), s2: RHS.data(), n: LHS.size()) == 0;
894 }
895
896 inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
897
898 inline bool operator<(StringRef LHS, StringRef RHS) {
899 return LHS.compare(RHS) < 0;
900 }
901
902 inline bool operator<=(StringRef LHS, StringRef RHS) {
903 return LHS.compare(RHS) <= 0;
904 }
905
906 inline bool operator>(StringRef LHS, StringRef RHS) {
907 return LHS.compare(RHS) > 0;
908 }
909
910 inline bool operator>=(StringRef LHS, StringRef RHS) {
911 return LHS.compare(RHS) >= 0;
912 }
913
914 inline std::string &operator+=(std::string &buffer, StringRef string) {
915 return buffer.append(s: string.data(), n: string.size());
916 }
917
918 /// @}
919
920 /// Compute a hash_code for a StringRef.
921 [[nodiscard]] LLVM_ABI hash_code hash_value(StringRef S);
922
923 // Provide DenseMapInfo for StringRefs.
924 template <> struct DenseMapInfo<StringRef, void> {
925 static inline StringRef getEmptyKey() {
926 return StringRef(
927 reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0);
928 }
929
930 static inline StringRef getTombstoneKey() {
931 return StringRef(
932 reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0);
933 }
934
935 LLVM_ABI static unsigned getHashValue(StringRef Val);
936
937 static bool isEqual(StringRef LHS, StringRef RHS) {
938 if (RHS.data() == getEmptyKey().data())
939 return LHS.data() == getEmptyKey().data();
940 if (RHS.data() == getTombstoneKey().data())
941 return LHS.data() == getTombstoneKey().data();
942 return LHS == RHS;
943 }
944 };
945
946} // end namespace llvm
947
948#endif // LLVM_ADT_STRINGREF_H
949

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of llvm/include/llvm/ADT/StringRef.h