diff --git a/src/string.cc b/src/string.cc
index edfd4cf6..d7522bd8 100644
--- a/src/string.cc
+++ b/src/string.cc
@@ -17,6 +17,7 @@ String::Data::Data(const char* data, size_t size, size_t capacity)
         if (capacity & 1)
             ++capacity;
 
+        kak_assert(capacity < Long::max_capacity);
         l.ptr = Alloc{}.allocate(capacity+1);
         l.size = size;
         l.capacity = capacity;
@@ -71,6 +72,10 @@ void String::Data::reserve(size_t new_capacity)
     if (is_long())
         new_capacity = std::max(l.capacity * 2, new_capacity);
 
+    if (new_capacity & 1)
+        ++new_capacity;
+
+    kak_assert(new_capacity < Long::max_capacity);
     char* new_ptr = Alloc{}.allocate(new_capacity+1);
     if (copy)
     {
diff --git a/src/string.hh b/src/string.hh
index be140888..3fa08a83 100644
--- a/src/string.hh
+++ b/src/string.hh
@@ -130,12 +130,22 @@ public:
     static const String ms_empty;
     static constexpr const char* option_type_name = "str";
 
+    // String data storage using small string optimization.
+    //
+    // the LSB of the last byte is used to flag if we are using the small buffer
+    // or an allocated one. On big endian systems that means the allocated
+    // capacity must be pair, on little endian systems that means the allocated
+    // capacity cannot use its most significant byte, so we effectively limit
+    // capacity to 2^24 on 32bit arch, and 2^60 on 64.
     union Data
     {
         using Alloc = Allocator<char, MemoryDomain::String>;
 
         struct Long
         {
+            static constexpr size_t max_capacity =
+                (size_t)1 << 8 * (sizeof(size_t) - 1);
+
             char* ptr;
             size_t size;
             size_t capacity;