Browse Source

LibJS: Remove ByteString internals from PrimitiveString

PrimitiveString is now internally either UTF-8, UTF-16, or both.
We no longer convert them to/from ByteString anywhere, nor does VM have
a ByteString cache.
Andreas Kling 1 week ago
parent
commit
c71772126f

+ 1 - 1
Libraries/LibJS/Runtime/AbstractOperations.cpp

@@ -583,7 +583,7 @@ ThrowCompletionOr<Value> perform_eval(VM& vm, Value x, CallerMode strict_caller,
         .in_class_field_initializer = in_class_field_initializer,
     };
 
-    Parser parser { Lexer { code_string.byte_string() }, Program::Type::Script, move(initial_state) };
+    Parser parser { Lexer { code_string.utf8_string_view() }, Program::Type::Script, move(initial_state) };
     auto program = parser.parse_program(strict_caller == CallerMode::Strict);
 
     //     b. If script is a List of errors, throw a SyntaxError exception.

+ 3 - 3
Libraries/LibJS/Runtime/DatePrototype.cpp

@@ -1213,11 +1213,11 @@ JS_DEFINE_NATIVE_FUNCTION(DatePrototype::symbol_to_primitive)
     auto hint_value = vm.argument(0);
     if (!hint_value.is_string())
         return vm.throw_completion<TypeError>(ErrorType::InvalidHint, hint_value.to_string_without_side_effects());
-    auto hint = hint_value.as_string().byte_string();
+    auto hint = hint_value.as_string().utf8_string_view();
     Value::PreferredType try_first;
-    if (hint == "string" || hint == "default")
+    if (hint == "string"sv || hint == "default"sv)
         try_first = Value::PreferredType::String;
-    else if (hint == "number")
+    else if (hint == "number"sv)
         try_first = Value::PreferredType::Number;
     else
         return vm.throw_completion<TypeError>(ErrorType::InvalidHint, hint);

+ 15 - 15
Libraries/LibJS/Runtime/ObjectPrototype.cpp

@@ -147,53 +147,53 @@ JS_DEFINE_NATIVE_FUNCTION(ObjectPrototype::to_string)
     // 4. Let isArray be ? IsArray(O).
     auto is_array = TRY(Value(object).is_array(vm));
 
-    ByteString builtin_tag;
+    StringView builtin_tag;
 
     // 5. If isArray is true, let builtinTag be "Array".
     if (is_array)
-        builtin_tag = "Array";
+        builtin_tag = "Array"sv;
     // 6. Else if O has a [[ParameterMap]] internal slot, let builtinTag be "Arguments".
     else if (object->has_parameter_map())
-        builtin_tag = "Arguments";
+        builtin_tag = "Arguments"sv;
     // 7. Else if O has a [[Call]] internal method, let builtinTag be "Function".
     else if (object->is_function())
-        builtin_tag = "Function";
+        builtin_tag = "Function"sv;
     // 8. Else if O has an [[ErrorData]] internal slot, let builtinTag be "Error".
     else if (is<Error>(*object))
-        builtin_tag = "Error";
+        builtin_tag = "Error"sv;
     // 9. Else if O has a [[BooleanData]] internal slot, let builtinTag be "Boolean".
     else if (is<BooleanObject>(*object))
-        builtin_tag = "Boolean";
+        builtin_tag = "Boolean"sv;
     // 10. Else if O has a [[NumberData]] internal slot, let builtinTag be "Number".
     else if (is<NumberObject>(*object))
-        builtin_tag = "Number";
+        builtin_tag = "Number"sv;
     // 11. Else if O has a [[StringData]] internal slot, let builtinTag be "String".
     else if (is<StringObject>(*object))
-        builtin_tag = "String";
+        builtin_tag = "String"sv;
     // 12. Else if O has a [[DateValue]] internal slot, let builtinTag be "Date".
     else if (is<Date>(*object))
-        builtin_tag = "Date";
+        builtin_tag = "Date"sv;
     // 13. Else if O has a [[RegExpMatcher]] internal slot, let builtinTag be "RegExp".
     else if (is<RegExpObject>(*object))
-        builtin_tag = "RegExp";
+        builtin_tag = "RegExp"sv;
     // 14. Else, let builtinTag be "Object".
     else
-        builtin_tag = "Object";
+        builtin_tag = "Object"sv;
 
     // 15. Let tag be ? Get(O, @@toStringTag).
     auto to_string_tag = TRY(object->get(vm.well_known_symbol_to_string_tag()));
 
     // Optimization: Instead of creating another PrimitiveString from builtin_tag, we separate tag and to_string_tag and add an additional branch to step 16.
-    ByteString tag;
+    StringView tag;
 
     // 16. If Type(tag) is not String, set tag to builtinTag.
     if (!to_string_tag.is_string())
-        tag = move(builtin_tag);
+        tag = builtin_tag;
     else
-        tag = to_string_tag.as_string().byte_string();
+        tag = to_string_tag.as_string().utf8_string_view();
 
     // 17. Return the string-concatenation of "[object ", tag, and "]".
-    return PrimitiveString::create(vm, ByteString::formatted("[object {}]", tag));
+    return PrimitiveString::create(vm, MUST(String::formatted("[object {}]", tag)));
 }
 
 // 20.1.3.7 Object.prototype.valueOf ( ), https://tc39.es/ecma262/#sec-object.prototype.valueof

+ 5 - 59
Libraries/LibJS/Runtime/PrimitiveString.cpp

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, Andreas Kling <andreas@ladybird.org>
+ * Copyright (c) 2020-2025, Andreas Kling <andreas@ladybird.org>
  * Copyright (c) 2022, Linus Groh <linusg@serenityos.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
@@ -33,11 +33,6 @@ PrimitiveString::PrimitiveString(String string)
 {
 }
 
-PrimitiveString::PrimitiveString(ByteString string)
-    : m_byte_string(move(string))
-{
-}
-
 PrimitiveString::PrimitiveString(Utf16String string)
     : m_utf16_string(move(string))
 {
@@ -49,8 +44,6 @@ PrimitiveString::~PrimitiveString()
         vm().string_cache().remove(*m_utf8_string);
     if (has_utf16_string())
         vm().utf16_string_cache().remove(*m_utf16_string);
-    if (has_byte_string())
-        vm().byte_string_cache().remove(*m_byte_string);
 }
 
 void PrimitiveString::visit_edges(Cell::Visitor& visitor)
@@ -73,8 +66,6 @@ bool PrimitiveString::is_empty() const
         return m_utf16_string->is_empty();
     if (has_utf8_string())
         return m_utf8_string->is_empty();
-    if (has_byte_string())
-        return m_byte_string->is_empty();
     VERIFY_NOT_REACHED();
 }
 
@@ -83,12 +74,8 @@ String PrimitiveString::utf8_string() const
     resolve_rope_if_needed(EncodingPreference::UTF8);
 
     if (!has_utf8_string()) {
-        if (has_byte_string())
-            m_utf8_string = MUST(String::from_byte_string(*m_byte_string));
-        else if (has_utf16_string())
-            m_utf8_string = m_utf16_string->to_utf8();
-        else
-            VERIFY_NOT_REACHED();
+        VERIFY(has_utf16_string());
+        m_utf8_string = m_utf16_string->to_utf8();
     }
 
     return *m_utf8_string;
@@ -100,33 +87,13 @@ StringView PrimitiveString::utf8_string_view() const
     return m_utf8_string->bytes_as_string_view();
 }
 
-ByteString PrimitiveString::byte_string() const
-{
-    resolve_rope_if_needed(EncodingPreference::UTF8);
-
-    if (!has_byte_string()) {
-        if (has_utf8_string())
-            m_byte_string = m_utf8_string->to_byte_string();
-        else if (has_utf16_string())
-            m_byte_string = m_utf16_string->to_byte_string();
-        else
-            VERIFY_NOT_REACHED();
-    }
-
-    return *m_byte_string;
-}
-
 Utf16String PrimitiveString::utf16_string() const
 {
     resolve_rope_if_needed(EncodingPreference::UTF16);
 
     if (!has_utf16_string()) {
-        if (has_utf8_string()) {
-            m_utf16_string = Utf16String::create(m_utf8_string->bytes_as_string_view());
-        } else {
-            VERIFY(has_byte_string());
-            m_utf16_string = Utf16String::create(*m_byte_string);
-        }
+        VERIFY(has_utf8_string());
+        m_utf16_string = Utf16String::create(m_utf8_string->bytes_as_string_view());
     }
 
     return *m_utf16_string;
@@ -208,27 +175,6 @@ GC::Ref<PrimitiveString> PrimitiveString::create(VM& vm, StringView string)
     return create(vm, String::from_utf8(string).release_value());
 }
 
-GC::Ref<PrimitiveString> PrimitiveString::create(VM& vm, ByteString string)
-{
-    if (string.is_empty())
-        return vm.empty_string();
-
-    if (string.length() == 1) {
-        auto ch = static_cast<u8>(string.characters()[0]);
-        if (is_ascii(ch))
-            return vm.single_ascii_character_string(ch);
-    }
-
-    auto& string_cache = vm.byte_string_cache();
-    auto it = string_cache.find(string);
-    if (it == string_cache.end()) {
-        auto new_string = vm.heap().allocate<PrimitiveString>(string);
-        string_cache.set(move(string), new_string);
-        return *new_string;
-    }
-    return *it->value;
-}
-
 GC::Ref<PrimitiveString> PrimitiveString::create(VM& vm, PrimitiveString& lhs, PrimitiveString& rhs)
 {
     // We're here to concatenate two strings into a new rope string.

+ 1 - 8
Libraries/LibJS/Runtime/PrimitiveString.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, Andreas Kling <andreas@ladybird.org>
+ * Copyright (c) 2020-2025, Andreas Kling <andreas@ladybird.org>
  * Copyright (c) 2022, Linus Groh <linusg@serenityos.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
@@ -7,7 +7,6 @@
 
 #pragma once
 
-#include <AK/ByteString.h>
 #include <AK/Optional.h>
 #include <AK/String.h>
 #include <AK/StringView.h>
@@ -28,7 +27,6 @@ public:
     [[nodiscard]] static GC::Ref<PrimitiveString> create(VM&, Utf16String);
     [[nodiscard]] static GC::Ref<PrimitiveString> create(VM&, String);
     [[nodiscard]] static GC::Ref<PrimitiveString> create(VM&, FlyString const&);
-    [[nodiscard]] static GC::Ref<PrimitiveString> create(VM&, ByteString);
     [[nodiscard]] static GC::Ref<PrimitiveString> create(VM&, PrimitiveString&, PrimitiveString&);
     [[nodiscard]] static GC::Ref<PrimitiveString> create(VM&, StringView);
 
@@ -43,9 +41,6 @@ public:
     [[nodiscard]] StringView utf8_string_view() const;
     bool has_utf8_string() const { return m_utf8_string.has_value(); }
 
-    [[nodiscard]] ByteString byte_string() const;
-    bool has_byte_string() const { return m_byte_string.has_value(); }
-
     [[nodiscard]] Utf16String utf16_string() const;
     [[nodiscard]] Utf16View utf16_string_view() const;
     bool has_utf16_string() const { return m_utf16_string.has_value(); }
@@ -55,7 +50,6 @@ public:
 private:
     explicit PrimitiveString(PrimitiveString&, PrimitiveString&);
     explicit PrimitiveString(String);
-    explicit PrimitiveString(ByteString);
     explicit PrimitiveString(Utf16String);
 
     virtual void visit_edges(Cell::Visitor&) override;
@@ -72,7 +66,6 @@ private:
     mutable GC::Ptr<PrimitiveString> m_rhs;
 
     mutable Optional<String> m_utf8_string;
-    mutable Optional<ByteString> m_byte_string;
     mutable Optional<Utf16String> m_utf16_string;
 };
 

+ 1 - 1
Libraries/LibJS/Runtime/ShadowRealmPrototype.cpp

@@ -51,7 +51,7 @@ JS_DEFINE_NATIVE_FUNCTION(ShadowRealmPrototype::evaluate)
     auto& eval_realm = object->shadow_realm();
 
     // 6. Return ? PerformShadowRealmEval(sourceText, callerRealm, evalRealm).
-    return perform_shadow_realm_eval(vm, source_text.as_string().byte_string(), *caller_realm, eval_realm);
+    return perform_shadow_realm_eval(vm, source_text.as_string().utf8_string_view(), *caller_realm, eval_realm);
 }
 
 // 3.4.2 ShadowRealm.prototype.importValue ( specifier, exportName ), https://tc39.es/proposal-shadowrealm/#sec-shadowrealm.prototype.importvalue

+ 8 - 8
Libraries/LibJS/Runtime/VM.cpp

@@ -75,14 +75,14 @@ VM::VM(OwnPtr<CustomData> custom_data, ErrorMessages error_messages)
     m_empty_string = m_heap.allocate<PrimitiveString>(String {});
 
     typeof_strings = {
-        .number = m_heap.allocate<PrimitiveString>("number"),
-        .undefined = m_heap.allocate<PrimitiveString>("undefined"),
-        .object = m_heap.allocate<PrimitiveString>("object"),
-        .string = m_heap.allocate<PrimitiveString>("string"),
-        .symbol = m_heap.allocate<PrimitiveString>("symbol"),
-        .boolean = m_heap.allocate<PrimitiveString>("boolean"),
-        .bigint = m_heap.allocate<PrimitiveString>("bigint"),
-        .function = m_heap.allocate<PrimitiveString>("function"),
+        .number = m_heap.allocate<PrimitiveString>("number"_string),
+        .undefined = m_heap.allocate<PrimitiveString>("undefined"_string),
+        .object = m_heap.allocate<PrimitiveString>("object"_string),
+        .string = m_heap.allocate<PrimitiveString>("string"_string),
+        .symbol = m_heap.allocate<PrimitiveString>("symbol"_string),
+        .boolean = m_heap.allocate<PrimitiveString>("boolean"_string),
+        .bigint = m_heap.allocate<PrimitiveString>("bigint"_string),
+        .function = m_heap.allocate<PrimitiveString>("function"_string),
     };
 
     for (size_t i = 0; i < single_ascii_character_strings.size(); ++i)

+ 0 - 6
Libraries/LibJS/Runtime/VM.h

@@ -77,11 +77,6 @@ public:
         return m_string_cache;
     }
 
-    HashMap<ByteString, GC::Ptr<PrimitiveString>>& byte_string_cache()
-    {
-        return m_byte_string_cache;
-    }
-
     HashMap<Utf16String, GC::Ptr<PrimitiveString>>& utf16_string_cache()
     {
         return m_utf16_string_cache;
@@ -310,7 +305,6 @@ private:
     void set_well_known_symbols(WellKnownSymbols well_known_symbols) { m_well_known_symbols = move(well_known_symbols); }
 
     HashMap<String, GC::Ptr<PrimitiveString>> m_string_cache;
-    HashMap<ByteString, GC::Ptr<PrimitiveString>> m_byte_string_cache;
     HashMap<Utf16String, GC::Ptr<PrimitiveString>> m_utf16_string_cache;
 
     GC::Heap m_heap;

+ 8 - 8
Libraries/LibJS/Runtime/Value.cpp

@@ -724,7 +724,7 @@ ThrowCompletionOr<Value> Value::to_number_slow_case(VM& vm) const
         return Value(as_bool() ? 1 : 0);
     // 6. If argument is a String, return StringToNumber(argument).
     case STRING_TAG:
-        return string_to_number(as_string().byte_string());
+        return string_to_number(as_string().utf8_string_view());
     // 7. Assert: argument is an Object.
     case OBJECT_TAG: {
         // 8. Let primValue be ? ToPrimitive(argument, number).
@@ -778,7 +778,7 @@ ThrowCompletionOr<GC::Ref<BigInt>> Value::to_bigint(VM& vm) const
         return primitive.as_bigint();
     case STRING_TAG: {
         // 1. Let n be ! StringToBigInt(prim).
-        auto bigint = string_to_bigint(vm, primitive.as_string().byte_string());
+        auto bigint = string_to_bigint(vm, primitive.as_string().utf8_string_view());
 
         // 2. If n is undefined, throw a SyntaxError exception.
         if (!bigint.has_value())
@@ -2226,7 +2226,7 @@ bool same_value_non_number(Value lhs, Value rhs)
     // 5. If x is a String, then
     if (lhs.is_string()) {
         // a. If x and y are exactly the same sequence of code units (same length and same code units at corresponding indices), return true; otherwise, return false.
-        return lhs.as_string().byte_string() == rhs.as_string().byte_string();
+        return lhs.as_string().utf8_string_view() == rhs.as_string().utf8_string_view();
     }
 
     // 3. If x is undefined, return true.
@@ -2307,7 +2307,7 @@ ThrowCompletionOr<bool> is_loosely_equal(VM& vm, Value lhs, Value rhs)
     // 7. If Type(x) is BigInt and Type(y) is String, then
     if (lhs.is_bigint() && rhs.is_string()) {
         // a. Let n be StringToBigInt(y).
-        auto bigint = string_to_bigint(vm, rhs.as_string().byte_string());
+        auto bigint = string_to_bigint(vm, rhs.as_string().utf8_string_view());
 
         // b. If n is undefined, return false.
         if (!bigint.has_value())
@@ -2388,8 +2388,8 @@ ThrowCompletionOr<TriState> is_less_than(VM& vm, Value lhs, Value rhs, bool left
 
     // 3. If px is a String and py is a String, then
     if (x_primitive.is_string() && y_primitive.is_string()) {
-        auto x_string = x_primitive.as_string().byte_string();
-        auto y_string = y_primitive.as_string().byte_string();
+        auto x_string = x_primitive.as_string().utf8_string_view();
+        auto y_string = y_primitive.as_string().utf8_string_view();
 
         Utf8View x_code_points { x_string };
         Utf8View y_code_points { y_string };
@@ -2424,7 +2424,7 @@ ThrowCompletionOr<TriState> is_less_than(VM& vm, Value lhs, Value rhs, bool left
     // a. If px is a BigInt and py is a String, then
     if (x_primitive.is_bigint() && y_primitive.is_string()) {
         // i. Let ny be StringToBigInt(py).
-        auto y_bigint = string_to_bigint(vm, y_primitive.as_string().byte_string());
+        auto y_bigint = string_to_bigint(vm, y_primitive.as_string().utf8_string_view());
 
         // ii. If ny is undefined, return undefined.
         if (!y_bigint.has_value())
@@ -2439,7 +2439,7 @@ ThrowCompletionOr<TriState> is_less_than(VM& vm, Value lhs, Value rhs, bool left
     // b. If px is a String and py is a BigInt, then
     if (x_primitive.is_string() && y_primitive.is_bigint()) {
         // i. Let nx be StringToBigInt(px).
-        auto x_bigint = string_to_bigint(vm, x_primitive.as_string().byte_string());
+        auto x_bigint = string_to_bigint(vm, x_primitive.as_string().utf8_string_view());
 
         // ii. If nx is undefined, return undefined.
         if (!x_bigint.has_value())

+ 2 - 4
Libraries/LibJS/Runtime/ValueTraits.h

@@ -17,10 +17,8 @@ struct ValueTraits : public Traits<Value> {
     static unsigned hash(Value value)
     {
         VERIFY(!value.is_empty());
-        if (value.is_string()) {
-            // FIXME: Propagate this error.
-            return value.as_string().byte_string().hash();
-        }
+        if (value.is_string())
+            return value.as_string().utf8_string().hash();
 
         if (value.is_bigint())
             return value.as_bigint().big_integer().hash();

+ 1 - 1
Libraries/LibWeb/HTML/Scripting/Fetching.cpp

@@ -238,7 +238,7 @@ WebIDL::ExceptionOr<Optional<URL::URL>> resolve_imports_match(ByteString const&
 }
 
 // https://html.spec.whatwg.org/multipage/webappapis.html#resolving-a-url-like-module-specifier
-Optional<URL::URL> resolve_url_like_module_specifier(ByteString const& specifier, URL::URL const& base_url)
+Optional<URL::URL> resolve_url_like_module_specifier(StringView specifier, URL::URL const& base_url)
 {
     // 1. If specifier starts with "/", "./", or "../", then:
     if (specifier.starts_with("/"sv) || specifier.starts_with("./"sv) || specifier.starts_with("../"sv)) {

+ 1 - 1
Libraries/LibWeb/HTML/Scripting/Fetching.h

@@ -85,7 +85,7 @@ private:
 String module_type_from_module_request(JS::ModuleRequest const&);
 WebIDL::ExceptionOr<URL::URL> resolve_module_specifier(Optional<Script&> referring_script, String const& specifier);
 WebIDL::ExceptionOr<Optional<URL::URL>> resolve_imports_match(ByteString const& normalized_specifier, Optional<URL::URL> as_url, ModuleSpecifierMap const&);
-Optional<URL::URL> resolve_url_like_module_specifier(ByteString const& specifier, URL::URL const& base_url);
+Optional<URL::URL> resolve_url_like_module_specifier(StringView specifier, URL::URL const& base_url);
 ScriptFetchOptions get_descendant_script_fetch_options(ScriptFetchOptions const& original_options, URL::URL const& url, EnvironmentSettingsObject& settings_object);
 String resolve_a_module_integrity_metadata(URL::URL const& url, EnvironmentSettingsObject& settings_object);
 WebIDL::ExceptionOr<void> fetch_classic_script(GC::Ref<HTMLScriptElement>, URL::URL const&, EnvironmentSettingsObject& settings_object, ScriptFetchOptions options, CORSSettingAttribute cors_setting, String character_encoding, OnFetchScriptComplete on_complete);

+ 1 - 1
Libraries/LibWeb/HTML/Scripting/ImportMap.cpp

@@ -147,7 +147,7 @@ WebIDL::ExceptionOr<ModuleSpecifierMap> sort_and_normalise_module_specifier_map(
         }
 
         // 4. Let addressURL be the result of resolving a URL-like module specifier given value and baseURL.
-        auto address_url = resolve_url_like_module_specifier(value.as_string().byte_string(), base_url);
+        auto address_url = resolve_url_like_module_specifier(value.as_string().utf8_string_view(), base_url);
 
         // 5. If addressURL is null, then:
         if (!address_url.has_value()) {

+ 2 - 2
Tests/LibJS/test-js.cpp

@@ -69,14 +69,14 @@ TESTJS_GLOBAL_FUNCTION(mark_as_garbage, markAsGarbage)
         return execution_context->lexical_environment != nullptr;
     });
     if (!outer_environment.has_value())
-        return vm.throw_completion<JS::ReferenceError>(JS::ErrorType::UnknownIdentifier, variable_name.byte_string());
+        return vm.throw_completion<JS::ReferenceError>(JS::ErrorType::UnknownIdentifier, variable_name.utf8_string_view());
 
     auto reference = TRY(vm.resolve_binding(variable_name.utf8_string(), outer_environment.value()->lexical_environment));
 
     auto value = TRY(reference.get_value(vm));
 
     if (!can_be_held_weakly(value))
-        return vm.throw_completion<JS::TypeError>(JS::ErrorType::CannotBeHeldWeakly, ByteString::formatted("Variable with name {}", variable_name.byte_string()));
+        return vm.throw_completion<JS::TypeError>(JS::ErrorType::CannotBeHeldWeakly, ByteString::formatted("Variable with name {}", variable_name.utf8_string_view()));
 
     vm.heap().uproot_cell(&value.as_cell());
     TRY(reference.delete_(vm));