[libc] Change ctype to be encoding independent (#110574)

The previous implementation of the ctype functions assumed ASCII. This patch changes to a switch/case implementation that looks odd, but actually is easier for the compiler to understand and optimize.
2025-04-18 19:26:45 +00:00 · 2024-12-03 12:36:04 -08:00 · 2024-12-03 12:36:04 -08:00 · a0c4f854ca
commit a0c4f854ca
parent e0ae7793fc
33 changed files with 922 additions and 192 deletions
--- a/libc/src/__support/ctype_utils.h
+++ b/libc/src/__support/ctype_utils.h
@ -15,44 +15,567 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {

-// ------------------------------------------------------
-// Rationale: Since these classification functions are
-// called in other functions, we will avoid the overhead
-// of a function call by inlining them.
-// ------------------------------------------------------
+// -----------------------------------------------------------------------------
+// ******************                 WARNING                 ******************
+// ****************** DO NOT TRY TO OPTIMIZE THESE FUNCTIONS! ******************
+// -----------------------------------------------------------------------------
+// This switch/case form is easier for the compiler to understand, and is
+// optimized into a form that is almost always the same as or better than
+// versions written by hand (see https://godbolt.org/z/qvrebqvvr). Also this
+// form makes these functions encoding independent. If you want to rewrite these
+// functions, make sure you have benchmarks to show your new solution is faster,
+// as well as a way to support non-ASCII character encodings.

-LIBC_INLINE static constexpr bool isalpha(unsigned ch) {
-  return (ch | 32) - 'a' < 26;
+// Similarly, do not change these functions to use case ranges. e.g.
+//  bool islower(int ch) {
+//    switch(ch) {
+//    case 'a'...'z':
+//      return true;
+//    }
+//  }
+// This assumes the character ranges are contiguous, which they aren't in
+// EBCDIC. Technically we could use some smaller ranges, but that's even harder
+// to read.
+
+LIBC_INLINE static constexpr bool islower(int ch) {
+  switch (ch) {
+  case 'a':
+  case 'b':
+  case 'c':
+  case 'd':
+  case 'e':
+  case 'f':
+  case 'g':
+  case 'h':
+  case 'i':
+  case 'j':
+  case 'k':
+  case 'l':
+  case 'm':
+  case 'n':
+  case 'o':
+  case 'p':
+  case 'q':
+  case 'r':
+  case 's':
+  case 't':
+  case 'u':
+  case 'v':
+  case 'w':
+  case 'x':
+  case 'y':
+  case 'z':
+    return true;
+  default:
+    return false;
+  }
 }

-LIBC_INLINE static constexpr bool isdigit(unsigned ch) {
-  return (ch - '0') < 10;
+LIBC_INLINE static constexpr bool isupper(int ch) {
+  switch (ch) {
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'D':
+  case 'E':
+  case 'F':
+  case 'G':
+  case 'H':
+  case 'I':
+  case 'J':
+  case 'K':
+  case 'L':
+  case 'M':
+  case 'N':
+  case 'O':
+  case 'P':
+  case 'Q':
+  case 'R':
+  case 'S':
+  case 'T':
+  case 'U':
+  case 'V':
+  case 'W':
+  case 'X':
+  case 'Y':
+  case 'Z':
+    return true;
+  default:
+    return false;
+  }
 }

-LIBC_INLINE static constexpr bool isalnum(unsigned ch) {
-  return isalpha(ch) || isdigit(ch);
-}
-
-LIBC_INLINE static constexpr bool isgraph(unsigned ch) {
-  return 0x20 < ch && ch < 0x7f;
-}
-
-LIBC_INLINE static constexpr bool islower(unsigned ch) {
-  return (ch - 'a') < 26;
-}
-
-LIBC_INLINE static constexpr bool isupper(unsigned ch) {
-  return (ch - 'A') < 26;
-}
-
-LIBC_INLINE static constexpr bool isspace(unsigned ch) {
-  return ch == ' ' || (ch - '\t') < 5;
+LIBC_INLINE static constexpr bool isdigit(int ch) {
+  switch (ch) {
+  case '0':
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9':
+    return true;
+  default:
+    return false;
+  }
 }

 LIBC_INLINE static constexpr int tolower(int ch) {
-  if (isupper(ch))
-    return ch + ('a' - 'A');
-  return ch;
+  switch (ch) {
+  case 'A':
+    return 'a';
+  case 'B':
+    return 'b';
+  case 'C':
+    return 'c';
+  case 'D':
+    return 'd';
+  case 'E':
+    return 'e';
+  case 'F':
+    return 'f';
+  case 'G':
+    return 'g';
+  case 'H':
+    return 'h';
+  case 'I':
+    return 'i';
+  case 'J':
+    return 'j';
+  case 'K':
+    return 'k';
+  case 'L':
+    return 'l';
+  case 'M':
+    return 'm';
+  case 'N':
+    return 'n';
+  case 'O':
+    return 'o';
+  case 'P':
+    return 'p';
+  case 'Q':
+    return 'q';
+  case 'R':
+    return 'r';
+  case 'S':
+    return 's';
+  case 'T':
+    return 't';
+  case 'U':
+    return 'u';
+  case 'V':
+    return 'v';
+  case 'W':
+    return 'w';
+  case 'X':
+    return 'x';
+  case 'Y':
+    return 'y';
+  case 'Z':
+    return 'z';
+  default:
+    return ch;
+  }
+}
+
+LIBC_INLINE static constexpr int toupper(int ch) {
+  switch (ch) {
+  case 'a':
+    return 'A';
+  case 'b':
+    return 'B';
+  case 'c':
+    return 'C';
+  case 'd':
+    return 'D';
+  case 'e':
+    return 'E';
+  case 'f':
+    return 'F';
+  case 'g':
+    return 'G';
+  case 'h':
+    return 'H';
+  case 'i':
+    return 'I';
+  case 'j':
+    return 'J';
+  case 'k':
+    return 'K';
+  case 'l':
+    return 'L';
+  case 'm':
+    return 'M';
+  case 'n':
+    return 'N';
+  case 'o':
+    return 'O';
+  case 'p':
+    return 'P';
+  case 'q':
+    return 'Q';
+  case 'r':
+    return 'R';
+  case 's':
+    return 'S';
+  case 't':
+    return 'T';
+  case 'u':
+    return 'U';
+  case 'v':
+    return 'V';
+  case 'w':
+    return 'W';
+  case 'x':
+    return 'X';
+  case 'y':
+    return 'Y';
+  case 'z':
+    return 'Z';
+  default:
+    return ch;
+  }
+}
+
+LIBC_INLINE static constexpr bool isalpha(int ch) {
+  switch (ch) {
+  case 'a':
+  case 'b':
+  case 'c':
+  case 'd':
+  case 'e':
+  case 'f':
+  case 'g':
+  case 'h':
+  case 'i':
+  case 'j':
+  case 'k':
+  case 'l':
+  case 'm':
+  case 'n':
+  case 'o':
+  case 'p':
+  case 'q':
+  case 'r':
+  case 's':
+  case 't':
+  case 'u':
+  case 'v':
+  case 'w':
+  case 'x':
+  case 'y':
+  case 'z':
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'D':
+  case 'E':
+  case 'F':
+  case 'G':
+  case 'H':
+  case 'I':
+  case 'J':
+  case 'K':
+  case 'L':
+  case 'M':
+  case 'N':
+  case 'O':
+  case 'P':
+  case 'Q':
+  case 'R':
+  case 'S':
+  case 'T':
+  case 'U':
+  case 'V':
+  case 'W':
+  case 'X':
+  case 'Y':
+  case 'Z':
+    return true;
+  default:
+    return false;
+  }
+}
+
+LIBC_INLINE static constexpr bool isalnum(int ch) {
+  switch (ch) {
+  case 'a':
+  case 'b':
+  case 'c':
+  case 'd':
+  case 'e':
+  case 'f':
+  case 'g':
+  case 'h':
+  case 'i':
+  case 'j':
+  case 'k':
+  case 'l':
+  case 'm':
+  case 'n':
+  case 'o':
+  case 'p':
+  case 'q':
+  case 'r':
+  case 's':
+  case 't':
+  case 'u':
+  case 'v':
+  case 'w':
+  case 'x':
+  case 'y':
+  case 'z':
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'D':
+  case 'E':
+  case 'F':
+  case 'G':
+  case 'H':
+  case 'I':
+  case 'J':
+  case 'K':
+  case 'L':
+  case 'M':
+  case 'N':
+  case 'O':
+  case 'P':
+  case 'Q':
+  case 'R':
+  case 'S':
+  case 'T':
+  case 'U':
+  case 'V':
+  case 'W':
+  case 'X':
+  case 'Y':
+  case 'Z':
+  case '0':
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9':
+    return true;
+  default:
+    return false;
+  }
+}
+
+LIBC_INLINE static constexpr int b36_char_to_int(int ch) {
+  switch (ch) {
+  case '0':
+    return 0;
+  case '1':
+    return 1;
+  case '2':
+    return 2;
+  case '3':
+    return 3;
+  case '4':
+    return 4;
+  case '5':
+    return 5;
+  case '6':
+    return 6;
+  case '7':
+    return 7;
+  case '8':
+    return 8;
+  case '9':
+    return 9;
+  case 'a':
+  case 'A':
+    return 10;
+  case 'b':
+  case 'B':
+    return 11;
+  case 'c':
+  case 'C':
+    return 12;
+  case 'd':
+  case 'D':
+    return 13;
+  case 'e':
+  case 'E':
+    return 14;
+  case 'f':
+  case 'F':
+    return 15;
+  case 'g':
+  case 'G':
+    return 16;
+  case 'h':
+  case 'H':
+    return 17;
+  case 'i':
+  case 'I':
+    return 18;
+  case 'j':
+  case 'J':
+    return 19;
+  case 'k':
+  case 'K':
+    return 20;
+  case 'l':
+  case 'L':
+    return 21;
+  case 'm':
+  case 'M':
+    return 22;
+  case 'n':
+  case 'N':
+    return 23;
+  case 'o':
+  case 'O':
+    return 24;
+  case 'p':
+  case 'P':
+    return 25;
+  case 'q':
+  case 'Q':
+    return 26;
+  case 'r':
+  case 'R':
+    return 27;
+  case 's':
+  case 'S':
+    return 28;
+  case 't':
+  case 'T':
+    return 29;
+  case 'u':
+  case 'U':
+    return 30;
+  case 'v':
+  case 'V':
+    return 31;
+  case 'w':
+  case 'W':
+    return 32;
+  case 'x':
+  case 'X':
+    return 33;
+  case 'y':
+  case 'Y':
+    return 34;
+  case 'z':
+  case 'Z':
+    return 35;
+  default:
+    return 0;
+  }
+}
+
+LIBC_INLINE static constexpr int int_to_b36_char(int num) {
+  // Can't actually use LIBC_ASSERT here because it depends on integer_to_string
+  // which depends on this.
+
+  // LIBC_ASSERT(num < 36);
+  switch (num) {
+  case 0:
+    return '0';
+  case 1:
+    return '1';
+  case 2:
+    return '2';
+  case 3:
+    return '3';
+  case 4:
+    return '4';
+  case 5:
+    return '5';
+  case 6:
+    return '6';
+  case 7:
+    return '7';
+  case 8:
+    return '8';
+  case 9:
+    return '9';
+  case 10:
+    return 'a';
+  case 11:
+    return 'b';
+  case 12:
+    return 'c';
+  case 13:
+    return 'd';
+  case 14:
+    return 'e';
+  case 15:
+    return 'f';
+  case 16:
+    return 'g';
+  case 17:
+    return 'h';
+  case 18:
+    return 'i';
+  case 19:
+    return 'j';
+  case 20:
+    return 'k';
+  case 21:
+    return 'l';
+  case 22:
+    return 'm';
+  case 23:
+    return 'n';
+  case 24:
+    return 'o';
+  case 25:
+    return 'p';
+  case 26:
+    return 'q';
+  case 27:
+    return 'r';
+  case 28:
+    return 's';
+  case 29:
+    return 't';
+  case 30:
+    return 'u';
+  case 31:
+    return 'v';
+  case 32:
+    return 'w';
+  case 33:
+    return 'x';
+  case 34:
+    return 'y';
+  case 35:
+    return 'z';
+  default:
+    return '!';
+  }
+}
+
+LIBC_INLINE static constexpr bool isspace(int ch) {
+  switch (ch) {
+  case ' ':
+  case '\t':
+  case '\n':
+  case '\v':
+  case '\f':
+  case '\r':
+    return true;
+  default:
+    return false;
+  }
+}
+
+// not yet encoding independent.
+LIBC_INLINE static constexpr bool isgraph(int ch) {
+  return 0x20 < ch && ch < 0x7f;
 }

 } // namespace internal
--- a/libc/src/__support/high_precision_decimal.h
+++ b/libc/src/__support/high_precision_decimal.h
@ -178,9 +178,11 @@ private:
      if (digit_index >= this->num_digits) {
        return new_digits - 1;
      }
-      if (this->digits[digit_index] != power_of_five[digit_index] - '0') {
+      if (this->digits[digit_index] !=
+          internal::b36_char_to_int(power_of_five[digit_index])) {
        return new_digits -
-               ((this->digits[digit_index] < power_of_five[digit_index] - '0')
+               ((this->digits[digit_index] <
+                 internal::b36_char_to_int(power_of_five[digit_index]))
                    ? 1
                    : 0);
      }
@ -337,8 +339,8 @@ public:
        }
        ++total_digits;
        if (this->num_digits < MAX_NUM_DIGITS) {
-          this->digits[this->num_digits] =
-              static_cast<uint8_t>(num_string[num_cur] - '0');
+          this->digits[this->num_digits] = static_cast<uint8_t>(
+              internal::b36_char_to_int(num_string[num_cur]));
          ++this->num_digits;
        } else if (num_string[num_cur] != '0') {
          this->truncated = true;
--- a/libc/src/__support/integer_literals.h
+++ b/libc/src/__support/integer_literals.h
@ -13,12 +13,13 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H
 #define LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H

-#include "src/__support/CPP/limits.h"        // CHAR_BIT
+#include "src/__support/CPP/limits.h" // CHAR_BIT
+#include "src/__support/ctype_utils.h"
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/__support/macros/config.h"
-#include "src/__support/uint128.h"           // UInt128
-#include <stddef.h>                          // size_t
-#include <stdint.h>                          // uintxx_t
+#include "src/__support/uint128.h" // UInt128
+#include <stddef.h>                // size_t
+#include <stdint.h>                // uintxx_t

 namespace LIBC_NAMESPACE_DECL {

@ -75,26 +76,13 @@ template <typename T, int base> struct DigitBuffer {
      push(*str);
  }

-  // Returns the digit for a particular character.
-  // Returns INVALID_DIGIT if the character is invalid.
-  LIBC_INLINE static constexpr uint8_t get_digit_value(const char c) {
-    const auto to_lower = [](char c) { return c | 32; };
-    const auto is_digit = [](char c) { return c >= '0' && c <= '9'; };
-    const auto is_alpha = [](char c) {
-      return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
-    };
-    if (is_digit(c))
-      return static_cast<uint8_t>(c - '0');
-    if (base > 10 && is_alpha(c))
-      return static_cast<uint8_t>(to_lower(c) - 'a' + 10);
-    return INVALID_DIGIT;
-  }
-
  // Adds a single character to this buffer.
  LIBC_INLINE constexpr void push(char c) {
    if (c == '\'')
      return; // ' is valid but not taken into account.
-    const uint8_t value = get_digit_value(c);
+    const int b36_val = internal::b36_char_to_int(c);
+    const uint8_t value = static_cast<uint8_t>(
+        b36_val < base && (b36_val != 0 || c == '0') ? b36_val : INVALID_DIGIT);
    if (value == INVALID_DIGIT || size >= MAX_DIGITS) {
      // During constant evaluation `__builtin_unreachable` will halt the
      // compiler as it is not executable. This is preferable over `assert` that
--- a/libc/src/__support/integer_to_string.h
+++ b/libc/src/__support/integer_to_string.h
@ -69,6 +69,7 @@
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t
 #include "src/__support/common.h"
+#include "src/__support/ctype_utils.h"
 #include "src/__support/macros/config.h"

 namespace LIBC_NAMESPACE_DECL {
@ -214,9 +215,9 @@ template <typename T, typename Fmt = radix::Dec> class IntegerToString {
    using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>;

    LIBC_INLINE static char digit_char(uint8_t digit) {
-      if (digit < 10)
-        return '0' + static_cast<char>(digit);
-      return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast<char>(digit - 10);
+      const int result = internal::int_to_b36_char(digit);
+      return static_cast<char>(Fmt::IS_UPPERCASE ? internal::toupper(result)
+                                                 : result);
    }

    LIBC_INLINE static void
--- a/libc/src/__support/str_to_float.h
+++ b/libc/src/__support/str_to_float.h
@ -909,7 +909,7 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
      cpp::numeric_limits<StorageType>::max() / BASE;
  while (true) {
    if (isdigit(src[index])) {
-      uint32_t digit = src[index] - '0';
+      uint32_t digit = b36_char_to_int(src[index]);
      seen_digit = true;

      if (mantissa < bitstype_max_div_by_base) {
--- a/libc/src/__support/str_to_integer.h
+++ b/libc/src/__support/str_to_integer.h
@ -42,14 +42,6 @@ first_non_whitespace(const char *__restrict src,
  return src + src_cur;
 }

-LIBC_INLINE int b36_char_to_int(char input) {
-  if (isdigit(input))
-    return input - '0';
-  if (isalpha(input))
-    return (input | 32) + 10 - 'a';
-  return 0;
-}
-
 // checks if the next 3 characters of the string pointer are the start of a
 // hexadecimal number. Does not advance the string pointer.
 LIBC_INLINE bool
@ -57,7 +49,7 @@ is_hex_start(const char *__restrict src,
             size_t src_len = cpp::numeric_limits<size_t>::max()) {
  if (src_len < 3)
    return false;
-  return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
+  return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) &&
         b36_char_to_int(*(src + 2)) < 16;
 }

--- a/libc/src/ctype/isxdigit.cpp
+++ b/libc/src/ctype/isxdigit.cpp
@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL {

 LLVM_LIBC_FUNCTION(int, isxdigit, (int c)) {
  const unsigned ch = static_cast<unsigned>(c);
-  return static_cast<int>(internal::isdigit(ch) || (ch | 32) - 'a' < 6);
+  return static_cast<int>(internal::isalnum(ch) &&
+                          internal::b36_char_to_int(ch) < 16);
 }

 } // namespace LIBC_NAMESPACE_DECL
--- a/libc/src/ctype/isxdigit_l.cpp
+++ b/libc/src/ctype/isxdigit_l.cpp
@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL {

 LLVM_LIBC_FUNCTION(int, isxdigit_l, (int c, locale_t)) {
  const unsigned ch = static_cast<unsigned>(c);
-  return static_cast<int>(internal::isdigit(ch) || (ch | 32) - 'a' < 6);
+  return static_cast<int>(internal::isalnum(ch) &&
+                          internal::b36_char_to_int(ch) < 16);
 }

 } // namespace LIBC_NAMESPACE_DECL
--- a/libc/src/ctype/toupper.cpp
+++ b/libc/src/ctype/toupper.cpp
@ -14,10 +14,6 @@

 namespace LIBC_NAMESPACE_DECL {

-LLVM_LIBC_FUNCTION(int, toupper, (int c)) {
-  if (internal::islower(c))
-    return c - ('a' - 'A');
-  return c;
-}
+LLVM_LIBC_FUNCTION(int, toupper, (int c)) { return internal::toupper(c); }

 } // namespace LIBC_NAMESPACE_DECL
--- a/libc/src/ctype/toupper_l.cpp
+++ b/libc/src/ctype/toupper_l.cpp
@ -15,9 +15,7 @@
 namespace LIBC_NAMESPACE_DECL {

 LLVM_LIBC_FUNCTION(int, toupper_l, (int c, locale_t)) {
-  if (internal::islower(c))
-    return c - ('a' - 'A');
-  return c;
+  return internal::toupper(c);
 }

 } // namespace LIBC_NAMESPACE_DECL
--- a/libc/src/stdio/printf_core/fixed_converter.h
+++ b/libc/src/stdio/printf_core/fixed_converter.h
@ -11,6 +11,7 @@

 #include "include/llvm-libc-macros/stdfix-macros.h"
 #include "src/__support/CPP/string_view.h"
+#include "src/__support/ctype_utils.h"
 #include "src/__support/fixed_point/fx_bits.h"
 #include "src/__support/fixed_point/fx_rep.h"
 #include "src/__support/integer_to_string.h"
@ -68,10 +69,6 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
  using LARep = fixed_point::FXRep<unsigned long accum>;
  using StorageType = LARep::StorageType;

-  // All of the letters will be defined relative to variable a, which will be
-  // the appropriate case based on the name of the conversion. This converts any
-  // conversion name into the letter 'a' with the appropriate case.
-  const char a = (to_conv.conv_name & 32) | 'A';
  FormatFlags flags = to_conv.flags;

  bool is_negative;
@ -179,9 +176,9 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
    // unspecified.
    RoundDirection round;
    char first_digit_after = fraction_digits[precision];
-    if (first_digit_after > '5') {
+    if (internal::b36_char_to_int(first_digit_after) > 5) {
      round = RoundDirection::Up;
-    } else if (first_digit_after < '5') {
+    } else if (internal::b36_char_to_int(first_digit_after) < 5) {
      round = RoundDirection::Down;
    } else {
      // first_digit_after == '5'
@ -204,7 +201,8 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
        keep_rounding = false;
        char cur_digit = fraction_digits[digit_to_round];
        // if the digit should not be rounded up
-        if (round == RoundDirection::Even && ((cur_digit - '0') % 2) == 0) {
+        if (round == RoundDirection::Even &&
+            (internal::b36_char_to_int(cur_digit) % 2) == 0) {
          // break out of the loop
          break;
        }
@ -246,7 +244,7 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
  char sign_char = 0;

  // Check if the conv name is uppercase
-  if (a == 'A') {
+  if (internal::isupper(to_conv.conv_name)) {
    // These flags are only for signed conversions, so this removes them if the
    // conversion is unsigned.
    flags = FormatFlags(flags &
--- a/libc/src/stdio/printf_core/float_dec_converter.h
+++ b/libc/src/stdio/printf_core/float_dec_converter.h
@ -13,6 +13,7 @@
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/rounding_mode.h"
 #include "src/__support/big_int.h" // is_big_int_v
+#include "src/__support/ctype_utils.h"
 #include "src/__support/float_to_string.h"
 #include "src/__support/integer_to_string.h"
 #include "src/__support/libc_assert.h"
@ -587,8 +588,6 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
  int exponent = float_bits.get_explicit_exponent();
  StorageType mantissa = float_bits.get_explicit_mantissa();

-  const char a = (to_conv.conv_name & 32) | 'A';
-
  char sign_char = 0;

  if (float_bits.is_neg())
@ -734,7 +733,8 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
  round = get_round_direction(last_digit, truncated, float_bits.sign());

  RET_IF_RESULT_NEGATIVE(float_writer.write_last_block(
-      digits, maximum, round, final_exponent, a + 'E' - 'A'));
+      digits, maximum, round, final_exponent,
+      internal::islower(to_conv.conv_name) ? 'e' : 'E'));

  RET_IF_RESULT_NEGATIVE(float_writer.right_pad());
  return WRITE_OK;
--- a/libc/src/stdio/printf_core/float_hex_converter.h
+++ b/libc/src/stdio/printf_core/float_hex_converter.h
@ -12,6 +12,7 @@
 #include "src/__support/CPP/string_view.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/ctype_utils.h"
 #include "src/__support/macros/config.h"
 #include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
@ -28,10 +29,6 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
                                      const FormatSection &to_conv) {
  using LDBits = fputil::FPBits<long double>;
  using StorageType = LDBits::StorageType;
-  // All of the letters will be defined relative to variable a, which will be
-  // the appropriate case based on the name of the conversion. This converts any
-  // conversion name into the letter 'a' with the appropriate case.
-  const char a = (to_conv.conv_name & 32) | 'A';

  bool is_negative;
  int exponent;
@ -138,9 +135,10 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
  size_t mant_cur = mant_len;
  size_t first_non_zero = 1;
  for (; mant_cur > 0; --mant_cur, mantissa >>= 4) {
-    char mant_mod_16 = static_cast<char>(mantissa) & 15;
-    char new_digit = static_cast<char>(
-        (mant_mod_16 > 9) ? (mant_mod_16 - 10 + a) : (mant_mod_16 + '0'));
+    char mant_mod_16 = static_cast<char>(mantissa % 16);
+    char new_digit = static_cast<char>(internal::int_to_b36_char(mant_mod_16));
+    if (internal::isupper(to_conv.conv_name))
+      new_digit = static_cast<char>(internal::toupper(new_digit));
    mant_buffer[mant_cur - 1] = new_digit;
    if (new_digit != '0' && first_non_zero < mant_cur)
      first_non_zero = mant_cur;
@ -168,7 +166,8 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,

  size_t exp_cur = EXP_LEN;
  for (; exponent > 0; --exp_cur, exponent /= 10) {
-    exp_buffer[exp_cur - 1] = static_cast<char>((exponent % 10) + '0');
+    exp_buffer[exp_cur - 1] =
+        static_cast<char>(internal::int_to_b36_char(exponent % 10));
  }
  if (exp_cur == EXP_LEN) { // if nothing else was written, write a 0.
    exp_buffer[EXP_LEN - 1] = '0';
@ -187,7 +186,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
  constexpr size_t PREFIX_LEN = 2;
  char prefix[PREFIX_LEN];
  prefix[0] = '0';
-  prefix[1] = a + ('x' - 'a');
+  prefix[1] = internal::islower(to_conv.conv_name) ? 'x' : 'X';
  const cpp::string_view prefix_str(prefix, PREFIX_LEN);

  // If the precision is greater than the actual result, pad with 0s
@ -200,7 +199,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
  constexpr cpp::string_view HEXADECIMAL_POINT(".");

  // This is for the letter 'p' before the exponent.
-  const char exp_separator = a + ('p' - 'a');
+  const char exp_separator = internal::islower(to_conv.conv_name) ? 'p' : 'P';
  constexpr int EXP_SEPARATOR_LEN = 1;

  padding = static_cast<int>(to_conv.min_width - (sign_char > 0 ? 1 : 0) -
--- a/libc/src/stdio/printf_core/float_inf_nan_converter.h
+++ b/libc/src/stdio/printf_core/float_inf_nan_converter.h
@ -10,6 +10,7 @@
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_FLOAT_INF_NAN_CONVERTER_H

 #include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/ctype_utils.h"
 #include "src/__support/macros/config.h"
 #include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
@ -26,8 +27,6 @@ using StorageType = fputil::FPBits<long double>::StorageType;
 LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) {
  // All of the letters will be defined relative to variable a, which will be
  // the appropriate case based on the case of the conversion.
-  const char a = (to_conv.conv_name & 32) | 'A';
-
  bool is_negative;
  StorageType mantissa;
  if (to_conv.length_modifier == LengthModifier::L) {
@ -66,9 +65,11 @@ LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) {
  if (sign_char)
    RET_IF_RESULT_NEGATIVE(writer->write(sign_char));
  if (mantissa == 0) { // inf
-    RET_IF_RESULT_NEGATIVE(writer->write(a == 'a' ? "inf" : "INF"));
+    RET_IF_RESULT_NEGATIVE(
+        writer->write(internal::islower(to_conv.conv_name) ? "inf" : "INF"));
  } else { // nan
-    RET_IF_RESULT_NEGATIVE(writer->write(a == 'a' ? "nan" : "NAN"));
+    RET_IF_RESULT_NEGATIVE(
+        writer->write(internal::islower(to_conv.conv_name) ? "nan" : "NAN"));
  }

  if (padding > 0 && ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) ==
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@ -11,6 +11,7 @@

 #include "src/__support/CPP/span.h"
 #include "src/__support/CPP/string_view.h"
+#include "src/__support/ctype_utils.h"
 #include "src/__support/integer_to_string.h"
 #include "src/__support/macros/config.h"
 #include "src/stdio/printf_core/converter_utils.h"
@ -23,11 +24,6 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace printf_core {

-// These functions only work on characters that are already known to be in the
-// alphabet. Their behavior is undefined otherwise.
-LIBC_INLINE constexpr char to_lower(char a) { return a | 32; }
-LIBC_INLINE constexpr bool is_lower(char a) { return (a & 32) > 0; }
-
 namespace details {

 using HexFmt = IntegerToString<uintmax_t, radix::Hex>;
@ -49,14 +45,14 @@ LIBC_INLINE constexpr size_t num_buf_size() {

 LIBC_INLINE cpp::optional<cpp::string_view>
 num_to_strview(uintmax_t num, cpp::span<char> bufref, char conv_name) {
-  if (to_lower(conv_name) == 'x') {
-    if (is_lower(conv_name))
+  if (internal::tolower(conv_name) == 'x') {
+    if (internal::islower(conv_name))
      return HexFmt::format_to(bufref, num);
    else
      return HexFmtUppercase::format_to(bufref, num);
  } else if (conv_name == 'o') {
    return OctFmt::format_to(bufref, num);
-  } else if (to_lower(conv_name) == 'b') {
+  } else if (internal::tolower(conv_name) == 'b') {
    return BinFmt::format_to(bufref, num);
  } else {
    return DecFmt::format_to(bufref, num);
@ -72,7 +68,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
  uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
  bool is_negative = false;
  FormatFlags flags = to_conv.flags;
-  const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';

  // If the conversion is signed, then handle negative values.
  if (to_conv.conv_name == 'd' || to_conv.conv_name == 'i') {
@ -116,16 +111,16 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
  // conversions. Since hexadecimal is unsigned these will never conflict.
  size_t prefix_len;
  char prefix[2];
-  if ((to_lower(to_conv.conv_name) == 'x') &&
+  if ((internal::tolower(to_conv.conv_name) == 'x') &&
      ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) {
    prefix_len = 2;
    prefix[0] = '0';
-    prefix[1] = a + ('x' - 'a');
-  } else if ((to_lower(to_conv.conv_name) == 'b') &&
+    prefix[1] = internal::islower(to_conv.conv_name) ? 'x' : 'X';
+  } else if ((internal::tolower(to_conv.conv_name) == 'b') &&
             ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) {
    prefix_len = 2;
    prefix[0] = '0';
-    prefix[1] = a + ('b' - 'a');
+    prefix[1] = internal::islower(to_conv.conv_name) ? 'b' : 'B';
  } else {
    prefix_len = (sign_char == 0 ? 0 : 1);
    prefix[0] = sign_char;
--- a/libc/src/stdio/scanf_core/converter_utils.h
+++ b/libc/src/stdio/scanf_core/converter_utils.h
@ -19,16 +19,6 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace scanf_core {

-LIBC_INLINE constexpr char to_lower(char a) { return a | 32; }
-
-LIBC_INLINE constexpr int b36_char_to_int(char input) {
-  if (internal::isdigit(input))
-    return input - '0';
-  if (internal::isalpha(input))
-    return to_lower(input) + 10 - 'a';
-  return 0;
-}
-
 LIBC_INLINE void write_int_with_length(uintmax_t output_val,
                                       const FormatSection &to_conv) {
  if ((to_conv.flags & NO_WRITE) != 0) {
--- a/libc/src/stdio/scanf_core/float_converter.cpp
+++ b/libc/src/stdio/scanf_core/float_converter.cpp
@ -55,11 +55,12 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {

  // Handle inf

-  if (to_lower(cur_char) == inf_string[0]) {
+  if (internal::tolower(cur_char) == inf_string[0]) {
    size_t inf_index = 0;

-    for (; inf_index < sizeof(inf_string) && out_str.length() < max_width &&
-           to_lower(cur_char) == inf_string[inf_index];
+    for (;
+         inf_index < (sizeof(inf_string) - 1) && out_str.length() < max_width &&
+         internal::tolower(cur_char) == inf_string[inf_index];
         ++inf_index) {
      if (!out_str.append(cur_char)) {
        return ALLOCATION_FAILURE;
@ -78,11 +79,12 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
  static const char nan_string[] = "nan";

  // Handle nan
-  if (to_lower(cur_char) == nan_string[0]) {
+  if (internal::tolower(cur_char) == nan_string[0]) {
    size_t nan_index = 0;

-    for (; nan_index < sizeof(nan_string) && out_str.length() < max_width &&
-           to_lower(cur_char) == nan_string[nan_index];
+    for (;
+         nan_index < (sizeof(nan_string) - 1) && out_str.length() < max_width &&
+         internal::tolower(cur_char) == nan_string[nan_index];
         ++nan_index) {
      if (!out_str.append(cur_char)) {
        return ALLOCATION_FAILURE;
@ -117,7 +119,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
    }

    // If that next character is an 'x' then this is a hexadecimal number.
-    if (to_lower(cur_char) == 'x') {
+    if (internal::tolower(cur_char) == 'x') {
      base = 16;

      if (!out_str.append(cur_char)) {
@ -163,7 +165,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {

  // Handle the exponent, which has an exponent mark, an optional sign, and
  // decimal digits.
-  if (to_lower(cur_char) == exponent_mark) {
+  if (internal::tolower(cur_char) == exponent_mark) {
    if (!out_str.append(cur_char)) {
      return ALLOCATION_FAILURE;
    }
--- a/libc/src/stdio/scanf_core/int_converter.cpp
+++ b/libc/src/stdio/scanf_core/int_converter.cpp
@ -80,7 +80,8 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
    is_signed = true;
  } else if (to_conv.conv_name == 'o') {
    base = 8;
-  } else if (to_lower(to_conv.conv_name) == 'x' || to_conv.conv_name == 'p') {
+  } else if (internal::tolower(to_conv.conv_name) == 'x' ||
+             to_conv.conv_name == 'p') {
    base = 16;
  } else if (to_conv.conv_name == 'd') {
    base = 10;
@ -122,7 +123,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
        return READ_OK;
      }

-      if (to_lower(cur_char) == 'x') {
+      if (internal::tolower(cur_char) == 'x') {
        // This is a valid hex prefix.

        is_number = false;
@ -175,17 +176,18 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {

  const uintmax_t max_div_by_base = MAX / base;

-  if (internal::isalnum(cur_char) && b36_char_to_int(cur_char) < base) {
+  if (internal::isalnum(cur_char) &&
+      internal::b36_char_to_int(cur_char) < base) {
    is_number = true;
  }

  bool has_overflow = false;
  size_t i = 0;
  for (; i < max_width && internal::isalnum(cur_char) &&
-         b36_char_to_int(cur_char) < base;
+         internal::b36_char_to_int(cur_char) < base;
       ++i, cur_char = reader->getc()) {

-    uintmax_t cur_digit = b36_char_to_int(cur_char);
+    uintmax_t cur_digit = internal::b36_char_to_int(cur_char);

    if (result == MAX) {
      has_overflow = true;
--- a/libc/src/stdio/scanf_core/ptr_converter.cpp
+++ b/libc/src/stdio/scanf_core/ptr_converter.cpp
@ -8,6 +8,7 @@

 #include "src/stdio/scanf_core/ptr_converter.h"

+#include "src/__support/ctype_utils.h"
 #include "src/__support/macros/config.h"
 #include "src/stdio/scanf_core/converter_utils.h"
 #include "src/stdio/scanf_core/core_structs.h"
@ -24,7 +25,8 @@ int convert_pointer(Reader *reader, const FormatSection &to_conv) {
  // Check if it's exactly the nullptr string, if so then it's a nullptr.
  char cur_char = reader->getc();
  size_t i = 0;
-  for (; i < sizeof(nullptr_string) && to_lower(cur_char) == nullptr_string[i];
+  for (; i < (sizeof(nullptr_string) - 1) &&
+         internal::tolower(cur_char) == nullptr_string[i];
       ++i) {
    cur_char = reader->getc();
  }
--- a/libc/test/UnitTest/MemoryMatcher.cpp
+++ b/libc/test/UnitTest/MemoryMatcher.cpp
@ -8,6 +8,7 @@

 #include "MemoryMatcher.h"

+#include "src/__support/ctype_utils.h"
 #include "src/__support/macros/config.h"
 #include "test/UnitTest/Test.h"

@ -40,7 +41,8 @@ bool MemoryMatcher::match(MemoryView actualValue) {

 static void display(char C) {
  const auto print = [](unsigned char I) {
-    tlog << static_cast<char>(I < 10 ? '0' + I : 'A' + I - 10);
+    tlog << static_cast<char>(LIBC_NAMESPACE::internal::toupper(
+        LIBC_NAMESPACE::internal::int_to_b36_char(I)));
  };
  print(static_cast<unsigned char>(C) / 16);
  print(static_cast<unsigned char>(C) & 15);
--- a/libc/test/src/__support/CPP/stringview_test.cpp
+++ b/libc/test/src/__support/CPP/stringview_test.cpp
@ -109,8 +109,6 @@ TEST(LlvmLibcStringViewTest, Observer) {
  ASSERT_EQ(ABC.back(), 'c');
 }

-bool isDigit(char c) { return c >= '0' && c <= '9'; }
-
 TEST(LlvmLibcStringViewTest, FindFirstOf) {
  string_view Tmp("abca");
  ASSERT_TRUE(Tmp.find_first_of('a') == 0);
@ -236,6 +234,9 @@ TEST(LlvmLibcStringViewTest, FindFirstNotOf) {

 TEST(LlvmLibcStringViewTest, Contains) {
  string_view Empty;
+  static_assert(
+      'a' < 'z',
+      "This test only supports character encodings where 'a' is below 'z'");
  for (char c = 'a'; c < 'z'; ++c)
    EXPECT_FALSE(Empty.contains(c));

--- a/libc/test/src/ctype/isalnum_test.cpp
+++ b/libc/test/src/ctype/isalnum_test.cpp
@ -6,18 +6,45 @@
 //
 //===----------------------------------------------------------------------===//

+#include "src/__support/CPP/span.h"
 #include "src/ctype/isalnum.h"

 #include "test/UnitTest/Test.h"

+TEST(LlvmLibcIsAlNum, SimpleTest) {
+  EXPECT_NE(LIBC_NAMESPACE::isalnum('a'), 0);
+  EXPECT_NE(LIBC_NAMESPACE::isalnum('B'), 0);
+  EXPECT_NE(LIBC_NAMESPACE::isalnum('3'), 0);
+
+  EXPECT_EQ(LIBC_NAMESPACE::isalnum(' '), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isalnum('?'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isalnum('\0'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isalnum(-1), 0);
+}
+
+// TODO: Merge the ctype tests using this framework.
+constexpr char ALNUM_ARRAY[] = {
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+};
+
+bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+  for (size_t i = 0; i < arr.size(); ++i)
+    if (static_cast<int>(arr[i]) == ch)
+      return true;
+  return false;
+}
+
 TEST(LlvmLibcIsAlNum, DefaultLocale) {
  // Loops through all characters, verifying that numbers and letters
  // return non-zero integer and everything else returns a zero.
-  for (int c = -255; c < 255; ++c) {
-    if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
-        ('0' <= c && c <= '9'))
-      EXPECT_NE(LIBC_NAMESPACE::isalnum(c), 0);
+  for (int ch = -255; ch < 255; ++ch) {
+    if (in_span(ch, ALNUM_ARRAY))
+      EXPECT_NE(LIBC_NAMESPACE::isalnum(ch), 0);
    else
-      EXPECT_EQ(LIBC_NAMESPACE::isalnum(c), 0);
+      EXPECT_EQ(LIBC_NAMESPACE::isalnum(ch), 0);
  }
 }
--- a/libc/test/src/ctype/isalpha_test.cpp
+++ b/libc/test/src/ctype/isalpha_test.cpp
@ -6,15 +6,43 @@
 //
 //===----------------------------------------------------------------------===//

+#include "src/__support/CPP/span.h"
 #include "src/ctype/isalpha.h"

 #include "test/UnitTest/Test.h"

+TEST(LlvmLibcIsAlpha, SimpleTest) {
+  EXPECT_NE(LIBC_NAMESPACE::isalpha('a'), 0);
+  EXPECT_NE(LIBC_NAMESPACE::isalpha('B'), 0);
+
+  EXPECT_EQ(LIBC_NAMESPACE::isalpha('3'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isalpha(' '), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isalpha('?'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isalpha('\0'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isalpha(-1), 0);
+}
+
+// TODO: Merge the ctype tests using this framework.
+constexpr char ALPHA_ARRAY[] = {
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+};
+
+bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+  for (size_t i = 0; i < arr.size(); ++i)
+    if (static_cast<int>(arr[i]) == ch)
+      return true;
+  return false;
+}
+
 TEST(LlvmLibcIsAlpha, DefaultLocale) {
  // Loops through all characters, verifying that letters return a
  // non-zero integer and everything else returns zero.
+  // TODO: encoding indep
  for (int ch = -255; ch < 255; ++ch) {
-    if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'))
+    if (in_span(ch, ALPHA_ARRAY))
      EXPECT_NE(LIBC_NAMESPACE::isalpha(ch), 0);
    else
      EXPECT_EQ(LIBC_NAMESPACE::isalpha(ch), 0);
--- a/libc/test/src/ctype/isdigit_test.cpp
+++ b/libc/test/src/ctype/isdigit_test.cpp
@ -6,15 +6,39 @@
 //
 //===----------------------------------------------------------------------===//

+#include "src/__support/CPP/span.h"
 #include "src/ctype/isdigit.h"

 #include "test/UnitTest/Test.h"

+TEST(LlvmLibcIsDigit, SimpleTest) {
+  EXPECT_NE(LIBC_NAMESPACE::isdigit('3'), 0);
+
+  EXPECT_EQ(LIBC_NAMESPACE::isdigit('a'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isdigit('B'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isdigit(' '), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isdigit('?'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isdigit('\0'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isdigit(-1), 0);
+}
+
+// TODO: Merge the ctype tests using this framework.
+constexpr char DIGIT_ARRAY[] = {
+    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+};
+
+bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+  for (size_t i = 0; i < arr.size(); ++i)
+    if (static_cast<int>(arr[i]) == ch)
+      return true;
+  return false;
+}
+
 TEST(LlvmLibcIsDigit, DefaultLocale) {
-  // Loops through all characters, verifying that numbers return a
-  // non-zero integer and everything else returns zero.
+  // Loops through all characters, verifying that numbers and letters
+  // return non-zero integer and everything else returns a zero.
  for (int ch = -255; ch < 255; ++ch) {
-    if ('0' <= ch && ch <= '9')
+    if (in_span(ch, DIGIT_ARRAY))
      EXPECT_NE(LIBC_NAMESPACE::isdigit(ch), 0);
    else
      EXPECT_EQ(LIBC_NAMESPACE::isdigit(ch), 0);
--- a/libc/test/src/ctype/islower_test.cpp
+++ b/libc/test/src/ctype/islower_test.cpp
@ -6,14 +6,40 @@
 //
 //===----------------------------------------------------------------------===//

+#include "src/__support/CPP/span.h"
 #include "src/ctype/islower.h"
+
 #include "test/UnitTest/Test.h"

+TEST(LlvmLibcIsLower, SimpleTest) {
+  EXPECT_NE(LIBC_NAMESPACE::islower('a'), 0);
+
+  EXPECT_EQ(LIBC_NAMESPACE::islower('B'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::islower('3'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::islower(' '), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::islower('?'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::islower('\0'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::islower(-1), 0);
+}
+
+// TODO: Merge the ctype tests using this framework.
+constexpr char LOWER_ARRAY[] = {
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+};
+
+bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+  for (size_t i = 0; i < arr.size(); ++i)
+    if (static_cast<int>(arr[i]) == ch)
+      return true;
+  return false;
+}
+
 TEST(LlvmLibcIsLower, DefaultLocale) {
-  // Loops through all characters, verifying that lowercase letters
-  // return a non-zero integer and everything else returns zero.
+  // Loops through all characters, verifying that numbers and letters
+  // return non-zero integer and everything else returns a zero.
  for (int ch = -255; ch < 255; ++ch) {
-    if ('a' <= ch && ch <= 'z')
+    if (in_span(ch, LOWER_ARRAY))
      EXPECT_NE(LIBC_NAMESPACE::islower(ch), 0);
    else
      EXPECT_EQ(LIBC_NAMESPACE::islower(ch), 0);
--- a/libc/test/src/ctype/isupper_test.cpp
+++ b/libc/test/src/ctype/isupper_test.cpp
@ -6,14 +6,40 @@
 //
 //===----------------------------------------------------------------------===//

+#include "src/__support/CPP/span.h"
 #include "src/ctype/isupper.h"
+
 #include "test/UnitTest/Test.h"

+TEST(LlvmLibcIsUpper, SimpleTest) {
+  EXPECT_NE(LIBC_NAMESPACE::isupper('B'), 0);
+
+  EXPECT_EQ(LIBC_NAMESPACE::isupper('a'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isupper('3'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isupper(' '), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isupper('?'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isupper('\0'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isupper(-1), 0);
+}
+
+// TODO: Merge the ctype tests using this framework.
+constexpr char UPPER_ARRAY[] = {
+    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+};
+
+bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+  for (size_t i = 0; i < arr.size(); ++i)
+    if (static_cast<int>(arr[i]) == ch)
+      return true;
+  return false;
+}
+
 TEST(LlvmLibcIsUpper, DefaultLocale) {
-  // Loops through all characters, verifying that uppercase letters
-  // return a non-zero integer and everything else returns zero.
+  // Loops through all characters, verifying that numbers and letters
+  // return non-zero integer and everything else returns a zero.
  for (int ch = -255; ch < 255; ++ch) {
-    if ('A' <= ch && ch <= 'Z')
+    if (in_span(ch, UPPER_ARRAY))
      EXPECT_NE(LIBC_NAMESPACE::isupper(ch), 0);
    else
      EXPECT_EQ(LIBC_NAMESPACE::isupper(ch), 0);
--- a/libc/test/src/ctype/isxdigit_test.cpp
+++ b/libc/test/src/ctype/isxdigit_test.cpp
@ -6,13 +6,41 @@
 //
 //===----------------------------------------------------------------------===//

+#include "src/__support/CPP/span.h"
 #include "src/ctype/isxdigit.h"
+
 #include "test/UnitTest/Test.h"

-TEST(LlvmLibcIsXDigit, DefaultLocale) {
+TEST(LlvmLibcIsXdigit, SimpleTest) {
+  EXPECT_NE(LIBC_NAMESPACE::isxdigit('a'), 0);
+  EXPECT_NE(LIBC_NAMESPACE::isxdigit('B'), 0);
+  EXPECT_NE(LIBC_NAMESPACE::isxdigit('3'), 0);
+
+  EXPECT_EQ(LIBC_NAMESPACE::isxdigit('z'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isxdigit(' '), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isxdigit('?'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isxdigit('\0'), 0);
+  EXPECT_EQ(LIBC_NAMESPACE::isxdigit(-1), 0);
+}
+
+// TODO: Merge the ctype tests using this framework.
+constexpr char XDIGIT_ARRAY[] = {
+    'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E',
+    'F', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+};
+
+bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+  for (size_t i = 0; i < arr.size(); ++i)
+    if (static_cast<int>(arr[i]) == ch)
+      return true;
+  return false;
+}
+
+TEST(LlvmLibcIsXdigit, DefaultLocale) {
+  // Loops through all characters, verifying that numbers and letters
+  // return non-zero integer and everything else returns a zero.
  for (int ch = -255; ch < 255; ++ch) {
-    if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') ||
-        ('A' <= ch && ch <= 'F'))
+    if (in_span(ch, XDIGIT_ARRAY))
      EXPECT_NE(LIBC_NAMESPACE::isxdigit(ch), 0);
    else
      EXPECT_EQ(LIBC_NAMESPACE::isxdigit(ch), 0);
--- a/libc/test/src/ctype/tolower_test.cpp
+++ b/libc/test/src/ctype/tolower_test.cpp
@ -6,14 +6,51 @@
 //
 //===----------------------------------------------------------------------===//

+#include "src/__support/CPP/span.h"
 #include "src/ctype/tolower.h"
+
 #include "test/UnitTest/Test.h"

+TEST(LlvmLibcToLower, SimpleTest) {
+  EXPECT_EQ(LIBC_NAMESPACE::tolower('a'), int('a'));
+  EXPECT_EQ(LIBC_NAMESPACE::tolower('B'), int('b'));
+  EXPECT_EQ(LIBC_NAMESPACE::tolower('3'), int('3'));
+
+  EXPECT_EQ(LIBC_NAMESPACE::tolower(' '), int(' '));
+  EXPECT_EQ(LIBC_NAMESPACE::tolower('?'), int('?'));
+  EXPECT_EQ(LIBC_NAMESPACE::tolower('\0'), int('\0'));
+  EXPECT_EQ(LIBC_NAMESPACE::tolower(-1), int(-1));
+}
+
+// TODO: Merge the ctype tests using this framework.
+// Invariant: UPPER_ARR and LOWER_ARR are both the complete alphabet in the same
+// order.
+constexpr char UPPER_ARR[] = {
+    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+};
+constexpr char LOWER_ARR[] = {
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+};
+
+static_assert(
+    sizeof(UPPER_ARR) == sizeof(LOWER_ARR),
+    "There must be the same number of uppercase and lowercase letters.");
+
+int span_index(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+  for (size_t i = 0; i < arr.size(); ++i)
+    if (static_cast<int>(arr[i]) == ch)
+      return static_cast<int>(i);
+  return -1;
+}
+
 TEST(LlvmLibcToLower, DefaultLocale) {
  for (int ch = -255; ch < 255; ++ch) {
-    // This follows pattern 'A' + 32 = 'a'.
-    if ('A' <= ch && ch <= 'Z')
-      EXPECT_EQ(LIBC_NAMESPACE::tolower(ch), ch + 32);
+    int char_index = span_index(ch, UPPER_ARR);
+    if (char_index != -1)
+      EXPECT_EQ(LIBC_NAMESPACE::tolower(ch),
+                static_cast<int>(LOWER_ARR[char_index]));
    else
      EXPECT_EQ(LIBC_NAMESPACE::tolower(ch), ch);
  }
--- a/libc/test/src/ctype/toupper_test.cpp
+++ b/libc/test/src/ctype/toupper_test.cpp
@ -6,14 +6,51 @@
 //
 //===----------------------------------------------------------------------===//

+#include "src/__support/CPP/span.h"
 #include "src/ctype/toupper.h"
+
 #include "test/UnitTest/Test.h"

+TEST(LlvmLibcToUpper, SimpleTest) {
+  EXPECT_EQ(LIBC_NAMESPACE::toupper('a'), int('A'));
+  EXPECT_EQ(LIBC_NAMESPACE::toupper('B'), int('B'));
+  EXPECT_EQ(LIBC_NAMESPACE::toupper('3'), int('3'));
+
+  EXPECT_EQ(LIBC_NAMESPACE::toupper(' '), int(' '));
+  EXPECT_EQ(LIBC_NAMESPACE::toupper('?'), int('?'));
+  EXPECT_EQ(LIBC_NAMESPACE::toupper('\0'), int('\0'));
+  EXPECT_EQ(LIBC_NAMESPACE::toupper(-1), int(-1));
+}
+
+// TODO: Merge the ctype tests using this framework.
+// Invariant: UPPER_ARR and LOWER_ARR are both the complete alphabet in the same
+// order.
+constexpr char UPPER_ARR[] = {
+    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+};
+constexpr char LOWER_ARR[] = {
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+};
+
+static_assert(
+    sizeof(UPPER_ARR) == sizeof(LOWER_ARR),
+    "There must be the same number of uppercase and lowercase letters.");
+
+int span_index(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+  for (size_t i = 0; i < arr.size(); ++i)
+    if (static_cast<int>(arr[i]) == ch)
+      return static_cast<int>(i);
+  return -1;
+}
+
 TEST(LlvmLibcToUpper, DefaultLocale) {
  for (int ch = -255; ch < 255; ++ch) {
-    // This follows pattern 'a' - 32 = 'A'.
-    if ('a' <= ch && ch <= 'z')
-      EXPECT_EQ(LIBC_NAMESPACE::toupper(ch), ch - 32);
+    int char_index = span_index(ch, LOWER_ARR);
+    if (char_index != -1)
+      EXPECT_EQ(LIBC_NAMESPACE::toupper(ch),
+                static_cast<int>(UPPER_ARR[char_index]));
    else
      EXPECT_EQ(LIBC_NAMESPACE::toupper(ch), ch);
  }
--- a/libc/test/src/stdlib/StrtolTest.h
+++ b/libc/test/src/stdlib/StrtolTest.h
@ -8,6 +8,7 @@

 #include "src/__support/CPP/limits.h"
 #include "src/__support/CPP/type_traits.h"
+#include "src/__support/ctype_utils.h"
 #include "src/__support/macros/properties/architectures.h"
 #include "src/errno/libc_errno.h"
 #include "test/UnitTest/Test.h"
@ -16,14 +17,6 @@

 using LIBC_NAMESPACE::cpp::is_signed_v;

-static inline char int_to_b36_char(int input) {
-  if (input < 0 || input > 36)
-    return '0';
-  if (input < 10)
-    return static_cast<char>('0' + input);
-  return static_cast<char>('A' + input - 10);
-}
-
 template <typename ReturnT>
 struct StrtoTest : public LIBC_NAMESPACE::testing::Test {
  using FunctionT = ReturnT (*)(const char *, char **, int);
@ -207,7 +200,8 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::Test {
    char small_string[4] = {'\0', '\0', '\0', '\0'};
    for (int base = 2; base <= 36; ++base) {
      for (int first_digit = 0; first_digit <= 36; ++first_digit) {
-        small_string[0] = int_to_b36_char(first_digit);
+        small_string[0] =
+            LIBC_NAMESPACE::internal::int_to_b36_char(first_digit);
        if (first_digit < base) {
          LIBC_NAMESPACE::libc_errno = 0;
          ASSERT_EQ(func(small_string, nullptr, base),
@ -223,9 +217,11 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::Test {

    for (int base = 2; base <= 36; ++base) {
      for (int first_digit = 0; first_digit <= 36; ++first_digit) {
-        small_string[0] = int_to_b36_char(first_digit);
+        small_string[0] =
+            LIBC_NAMESPACE::internal::int_to_b36_char(first_digit);
        for (int second_digit = 0; second_digit <= 36; ++second_digit) {
-          small_string[1] = int_to_b36_char(second_digit);
+          small_string[1] =
+              LIBC_NAMESPACE::internal::int_to_b36_char(second_digit);
          if (first_digit < base && second_digit < base) {
            LIBC_NAMESPACE::libc_errno = 0;
            ASSERT_EQ(
@ -248,11 +244,14 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::Test {

    for (int base = 2; base <= 36; ++base) {
      for (int first_digit = 0; first_digit <= 36; ++first_digit) {
-        small_string[0] = int_to_b36_char(first_digit);
+        small_string[0] =
+            LIBC_NAMESPACE::internal::int_to_b36_char(first_digit);
        for (int second_digit = 0; second_digit <= 36; ++second_digit) {
-          small_string[1] = int_to_b36_char(second_digit);
+          small_string[1] =
+              LIBC_NAMESPACE::internal::int_to_b36_char(second_digit);
          for (int third_digit = 0; third_digit <= limit; ++third_digit) {
-            small_string[2] = int_to_b36_char(third_digit);
+            small_string[2] =
+                LIBC_NAMESPACE::internal::int_to_b36_char(third_digit);

            if (first_digit < base && second_digit < base &&
                third_digit < base) {
--- a/libc/test/src/string/strcmp_test.cpp
+++ b/libc/test/src/string/strcmp_test.cpp
@ -25,13 +25,13 @@ TEST(LlvmLibcStrCmpTest, EmptyStringShouldNotEqualNonEmptyString) {
  const char *s2 = "abc";
  int result = LIBC_NAMESPACE::strcmp(empty, s2);
  // This should be '\0' - 'a' = -97
-  ASSERT_EQ(result, -97);
+  ASSERT_EQ(result, '\0' - 'a');

  // Similar case if empty string is second argument.
  const char *s3 = "123";
  result = LIBC_NAMESPACE::strcmp(s3, empty);
  // This should be '1' - '\0' = 49
-  ASSERT_EQ(result, 49);
+  ASSERT_EQ(result, '1' - '\0');
 }

 TEST(LlvmLibcStrCmpTest, EqualStringsShouldReturnZero) {
@ -50,12 +50,12 @@ TEST(LlvmLibcStrCmpTest, ShouldReturnResultOfFirstDifference) {
  const char *s2 = "___C55__";
  int result = LIBC_NAMESPACE::strcmp(s1, s2);
  // This should return 'B' - 'C' = -1.
-  ASSERT_EQ(result, -1);
+  ASSERT_EQ(result, 'B' - 'C');

  // Verify operands reversed.
  result = LIBC_NAMESPACE::strcmp(s2, s1);
  // This should return 'C' - 'B' = 1.
-  ASSERT_EQ(result, 1);
+  ASSERT_EQ(result, 'C' - 'B');
 }

 TEST(LlvmLibcStrCmpTest, CapitalizedLetterShouldNotBeEqual) {
@ -63,12 +63,12 @@ TEST(LlvmLibcStrCmpTest, CapitalizedLetterShouldNotBeEqual) {
  const char *s2 = "abCd";
  int result = LIBC_NAMESPACE::strcmp(s1, s2);
  // 'c' - 'C' = 32.
-  ASSERT_EQ(result, 32);
+  ASSERT_EQ(result, 'c' - 'C');

  // Verify operands reversed.
  result = LIBC_NAMESPACE::strcmp(s2, s1);
  // 'C' - 'c' = -32.
-  ASSERT_EQ(result, -32);
+  ASSERT_EQ(result, 'C' - 'c');
 }

 TEST(LlvmLibcStrCmpTest, UnequalLengthStringsShouldNotReturnZero) {
@ -76,12 +76,12 @@ TEST(LlvmLibcStrCmpTest, UnequalLengthStringsShouldNotReturnZero) {
  const char *s2 = "abcd";
  int result = LIBC_NAMESPACE::strcmp(s1, s2);
  // '\0' - 'd' = -100.
-  ASSERT_EQ(result, -100);
+  ASSERT_EQ(result, -'\0' - 'd');

  // Verify operands reversed.
  result = LIBC_NAMESPACE::strcmp(s2, s1);
  // 'd' - '\0' = 100.
-  ASSERT_EQ(result, 100);
+  ASSERT_EQ(result, 'd' - '\0');
 }

 TEST(LlvmLibcStrCmpTest, StringArgumentSwapChangesSign) {
@ -89,11 +89,11 @@ TEST(LlvmLibcStrCmpTest, StringArgumentSwapChangesSign) {
  const char *b = "b";
  int result = LIBC_NAMESPACE::strcmp(b, a);
  // 'b' - 'a' = 1.
-  ASSERT_EQ(result, 1);
+  ASSERT_EQ(result, 'b' - 'a');

  result = LIBC_NAMESPACE::strcmp(a, b);
  // 'a' - 'b' = -1.
-  ASSERT_EQ(result, -1);
+  ASSERT_EQ(result, 'a' - 'b');
 }

 TEST(LlvmLibcStrCmpTest, Case) {
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@ -740,6 +740,7 @@ libc_support_library(
    hdrs = ["src/__support/integer_literals.h"],
    deps = [
        ":__support_cpp_limits",
+        ":__support_ctype_utils",
        ":__support_uint128",
    ],
 )
@ -772,6 +773,7 @@ libc_support_library(
        ":__support_cpp_span",
        ":__support_cpp_string_view",
        ":__support_cpp_type_traits",
+        ":__support_ctype_utils",
    ],
 )

@ -4450,6 +4452,7 @@ libc_support_library(
        ":__support_cpp_limits",
        ":__support_cpp_span",
        ":__support_cpp_string_view",
+        ":__support_ctype_utils",
        ":__support_float_to_string",
        ":__support_fputil_fenv_impl",
        ":__support_fputil_fp_bits",
--- a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel
@ -63,12 +63,12 @@ libc_support_library(
        "//libc:__support_stringutil",
        "//libc:__support_uint128",
        "//libc:errno",
-        "//libc:llvm_libc_macros_stdfix_macros",
-        "//llvm:Support",
        "//libc:func_aligned_alloc",
        "//libc:func_free",
        "//libc:func_malloc",
        "//libc:func_realloc",
+        "//libc:llvm_libc_macros_stdfix_macros",
+        "//llvm:Support",
    ],
 )

@ -121,6 +121,7 @@ libc_support_library(
        "//libc:__support_cpp_bitset",
        "//libc:__support_cpp_span",
        "//libc:__support_cpp_type_traits",
+        "//libc:__support_ctype_utils",
        "//libc:__support_macros_config",
    ],
 )