123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505 |
- /*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include "LayoutUtils.h"
- #include <gtest/gtest.h>
- #include "UnicodeUtils.h"
- namespace minikin {
- void ExpectNextWordBreakForCache(size_t offset_in, const char* query_str) {
- const size_t BUF_SIZE = 256U;
- uint16_t buf[BUF_SIZE];
- size_t expected_breakpoint = 0U;
- size_t size = 0U;
- ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint);
- EXPECT_EQ(expected_breakpoint, getNextWordBreakForCache(U16StringPiece(buf, size), offset_in))
- << "Expected position is [" << query_str << "] from offset " << offset_in;
- }
- void ExpectPrevWordBreakForCache(size_t offset_in, const char* query_str) {
- const size_t BUF_SIZE = 256U;
- uint16_t buf[BUF_SIZE];
- size_t expected_breakpoint = 0U;
- size_t size = 0U;
- ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint);
- EXPECT_EQ(expected_breakpoint, getPrevWordBreakForCache(U16StringPiece(buf, size), offset_in))
- << "Expected position is [" << query_str << "] from offset " << offset_in;
- }
- TEST(WordBreakTest, goNextWordBreakTest) {
- ExpectNextWordBreakForCache(0, "|");
- // Continue for spaces.
- ExpectNextWordBreakForCache(0, "'a' 'b' 'c' 'd' |");
- ExpectNextWordBreakForCache(1, "'a' 'b' 'c' 'd' |");
- ExpectNextWordBreakForCache(2, "'a' 'b' 'c' 'd' |");
- ExpectNextWordBreakForCache(3, "'a' 'b' 'c' 'd' |");
- ExpectNextWordBreakForCache(4, "'a' 'b' 'c' 'd' |");
- ExpectNextWordBreakForCache(1000, "'a' 'b' 'c' 'd' |");
- // Space makes word break.
- ExpectNextWordBreakForCache(0, "'a' 'b' | U+0020 'c' 'd'");
- ExpectNextWordBreakForCache(1, "'a' 'b' | U+0020 'c' 'd'");
- ExpectNextWordBreakForCache(2, "'a' 'b' U+0020 | 'c' 'd'");
- ExpectNextWordBreakForCache(3, "'a' 'b' U+0020 'c' 'd' |");
- ExpectNextWordBreakForCache(4, "'a' 'b' U+0020 'c' 'd' |");
- ExpectNextWordBreakForCache(5, "'a' 'b' U+0020 'c' 'd' |");
- ExpectNextWordBreakForCache(1000, "'a' 'b' U+0020 'c' 'd' |");
- ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 'c' 'd'");
- ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 'c' 'd'");
- ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | 'c' 'd'");
- ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 'c' 'd' |");
- ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 'c' 'd' |");
- ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 'c' 'd' |");
- ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 'c' 'd' |");
- ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 U+2000 'c' 'd'");
- ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 U+2000 'c' 'd'");
- ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | U+2000 'c' 'd'");
- ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
- ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 U+2000 'c' 'd' |");
- ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 U+2000 'c' 'd' |");
- ExpectNextWordBreakForCache(6, "'a' 'b' U+2000 U+2000 'c' 'd' |");
- ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 'c' 'd' |");
- // CJK ideographs makes word break.
- ExpectNextWordBreakForCache(0, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00");
- ExpectNextWordBreakForCache(1, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00");
- ExpectNextWordBreakForCache(2, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00");
- ExpectNextWordBreakForCache(3, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
- ExpectNextWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |");
- ExpectNextWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |");
- ExpectNextWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |");
- ExpectNextWordBreakForCache(0, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94");
- ExpectNextWordBreakForCache(1, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94");
- ExpectNextWordBreakForCache(2, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94");
- ExpectNextWordBreakForCache(3, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
- ExpectNextWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |");
- ExpectNextWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |");
- ExpectNextWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |");
- ExpectNextWordBreakForCache(0, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
- ExpectNextWordBreakForCache(1, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
- ExpectNextWordBreakForCache(2, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
- ExpectNextWordBreakForCache(3, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00");
- ExpectNextWordBreakForCache(4, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
- ExpectNextWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |");
- ExpectNextWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |");
- // Continue if trailing characters is Unicode combining characters.
- ExpectNextWordBreakForCache(0, "U+4E00 U+0332 | U+4E00");
- ExpectNextWordBreakForCache(1, "U+4E00 U+0332 | U+4E00");
- ExpectNextWordBreakForCache(2, "U+4E00 U+0332 U+4E00 |");
- ExpectNextWordBreakForCache(3, "U+4E00 U+0332 U+4E00 |");
- ExpectNextWordBreakForCache(1000, "U+4E00 U+0332 U+4E00 |");
- // Surrogate pairs.
- ExpectNextWordBreakForCache(0, "U+1F60D U+1F618 |");
- ExpectNextWordBreakForCache(1, "U+1F60D U+1F618 |");
- ExpectNextWordBreakForCache(2, "U+1F60D U+1F618 |");
- ExpectNextWordBreakForCache(3, "U+1F60D U+1F618 |");
- ExpectNextWordBreakForCache(4, "U+1F60D U+1F618 |");
- ExpectNextWordBreakForCache(1000, "U+1F60D U+1F618 |");
- // Broken surrogate pairs.
- // U+D84D is leading surrogate but there is no trailing surrogate for it.
- ExpectNextWordBreakForCache(0, "U+D84D U+1F618 |");
- ExpectNextWordBreakForCache(1, "U+D84D U+1F618 |");
- ExpectNextWordBreakForCache(2, "U+D84D U+1F618 |");
- ExpectNextWordBreakForCache(3, "U+D84D U+1F618 |");
- ExpectNextWordBreakForCache(1000, "U+D84D U+1F618 |");
- ExpectNextWordBreakForCache(0, "U+1F618 U+D84D |");
- ExpectNextWordBreakForCache(1, "U+1F618 U+D84D |");
- ExpectNextWordBreakForCache(2, "U+1F618 U+D84D |");
- ExpectNextWordBreakForCache(3, "U+1F618 U+D84D |");
- ExpectNextWordBreakForCache(1000, "U+1F618 U+D84D |");
- // U+DE0D is trailing surrogate but there is no leading surrogate for it.
- ExpectNextWordBreakForCache(0, "U+DE0D U+1F618 |");
- ExpectNextWordBreakForCache(1, "U+DE0D U+1F618 |");
- ExpectNextWordBreakForCache(2, "U+DE0D U+1F618 |");
- ExpectNextWordBreakForCache(3, "U+DE0D U+1F618 |");
- ExpectNextWordBreakForCache(1000, "U+DE0D U+1F618 |");
- ExpectNextWordBreakForCache(0, "U+1F618 U+DE0D |");
- ExpectNextWordBreakForCache(1, "U+1F618 U+DE0D |");
- ExpectNextWordBreakForCache(2, "U+1F618 U+DE0D |");
- ExpectNextWordBreakForCache(3, "U+1F618 U+DE0D |");
- ExpectNextWordBreakForCache(1000, "U+1F618 U+DE0D |");
- // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag.
- ExpectNextWordBreakForCache(0, "U+1F1FA U+1F1F8 |");
- ExpectNextWordBreakForCache(1, "U+1F1FA U+1F1F8 |");
- ExpectNextWordBreakForCache(2, "U+1F1FA U+1F1F8 |");
- ExpectNextWordBreakForCache(1000, "U+1F1FA U+1F1F8 |");
- // Tone marks.
- // CJK ideographic char + Tone mark + CJK ideographic char
- ExpectNextWordBreakForCache(0, "U+4444 U+302D | U+4444");
- ExpectNextWordBreakForCache(1, "U+4444 U+302D | U+4444");
- ExpectNextWordBreakForCache(2, "U+4444 U+302D U+4444 |");
- ExpectNextWordBreakForCache(3, "U+4444 U+302D U+4444 |");
- ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+4444 |");
- // Variation Selectors.
- // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char
- ExpectNextWordBreakForCache(0, "U+845B U+FE00 | U+845B");
- ExpectNextWordBreakForCache(1, "U+845B U+FE00 | U+845B");
- ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+845B |");
- ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+845B |");
- ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+845B |");
- // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char
- ExpectNextWordBreakForCache(0, "U+845B U+E0100 | U+845B");
- ExpectNextWordBreakForCache(1, "U+845B U+E0100 | U+845B");
- ExpectNextWordBreakForCache(2, "U+845B U+E0100 | U+845B");
- ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+845B |");
- ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+845B |");
- ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+845B |");
- ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+845B |");
- // CJK ideographic char + Tone mark + Variation Character(VS1)
- ExpectNextWordBreakForCache(0, "U+4444 U+302D U+FE00 | U+4444");
- ExpectNextWordBreakForCache(1, "U+4444 U+302D U+FE00 | U+4444");
- ExpectNextWordBreakForCache(2, "U+4444 U+302D U+FE00 | U+4444");
- ExpectNextWordBreakForCache(3, "U+4444 U+302D U+FE00 U+4444 |");
- ExpectNextWordBreakForCache(4, "U+4444 U+302D U+FE00 U+4444 |");
- ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+FE00 U+4444 |");
- // CJK ideographic char + Tone mark + Variation Character(VS17)
- ExpectNextWordBreakForCache(0, "U+4444 U+302D U+E0100 | U+4444");
- ExpectNextWordBreakForCache(1, "U+4444 U+302D U+E0100 | U+4444");
- ExpectNextWordBreakForCache(2, "U+4444 U+302D U+E0100 | U+4444");
- ExpectNextWordBreakForCache(3, "U+4444 U+302D U+E0100 | U+4444");
- ExpectNextWordBreakForCache(4, "U+4444 U+302D U+E0100 U+4444 |");
- ExpectNextWordBreakForCache(5, "U+4444 U+302D U+E0100 U+4444 |");
- ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+E0100 U+4444 |");
- // CJK ideographic char + Variation Character(VS1) + Tone mark
- ExpectNextWordBreakForCache(0, "U+4444 U+FE00 U+302D | U+4444");
- ExpectNextWordBreakForCache(1, "U+4444 U+FE00 U+302D | U+4444");
- ExpectNextWordBreakForCache(2, "U+4444 U+FE00 U+302D | U+4444");
- ExpectNextWordBreakForCache(3, "U+4444 U+FE00 U+302D U+4444 |");
- ExpectNextWordBreakForCache(4, "U+4444 U+FE00 U+302D U+4444 |");
- ExpectNextWordBreakForCache(1000, "U+4444 U+FE00 U+302D U+4444 |");
- // CJK ideographic char + Variation Character(VS17) + Tone mark
- ExpectNextWordBreakForCache(0, "U+4444 U+E0100 U+302D | U+4444");
- ExpectNextWordBreakForCache(1, "U+4444 U+E0100 U+302D | U+4444");
- ExpectNextWordBreakForCache(2, "U+4444 U+E0100 U+302D | U+4444");
- ExpectNextWordBreakForCache(3, "U+4444 U+E0100 U+302D | U+4444");
- ExpectNextWordBreakForCache(4, "U+4444 U+E0100 U+302D U+4444 |");
- ExpectNextWordBreakForCache(5, "U+4444 U+E0100 U+302D U+4444 |");
- ExpectNextWordBreakForCache(1000, "U+4444 U+E0100 U+302D U+4444 |");
- // Following test cases are unusual usage of variation selectors and tone
- // marks for caching up the further behavior changes, e.g. index of bounds
- // or crashes. Please feel free to update the test expectations if the
- // behavior change makes sense to you.
- // Isolated Tone marks and Variation Selectors
- ExpectNextWordBreakForCache(0, "U+FE00 |");
- ExpectNextWordBreakForCache(1, "U+FE00 |");
- ExpectNextWordBreakForCache(1000, "U+FE00 |");
- ExpectNextWordBreakForCache(0, "U+E0100 |");
- ExpectNextWordBreakForCache(1000, "U+E0100 |");
- ExpectNextWordBreakForCache(0, "U+302D |");
- ExpectNextWordBreakForCache(1000, "U+302D |");
- // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1)
- ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+FE00 | U+845B");
- ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+FE00 | U+845B");
- ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+FE00 | U+845B");
- ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+FE00 U+845B |");
- ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+FE00 U+845B |");
- ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+FE00 U+845B |");
- // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17)
- ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+E0100 | U+845B");
- ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+E0100 | U+845B");
- ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+E0100 | U+845B");
- ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+E0100 | U+845B");
- ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+E0100 | U+845B");
- ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+E0100 U+845B |");
- ExpectNextWordBreakForCache(6, "U+845B U+E0100 U+E0100 U+845B |");
- ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+E0100 U+845B |");
- // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17)
- ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+E0100 | U+845B");
- ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+E0100 | U+845B");
- ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+E0100 | U+845B");
- ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+E0100 | U+845B");
- ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+E0100 U+845B |");
- ExpectNextWordBreakForCache(5, "U+845B U+FE00 U+E0100 U+845B |");
- ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+E0100 U+845B |");
- // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1)
- ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+FE00 | U+845B");
- ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+FE00 | U+845B");
- ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+FE00 | U+845B");
- ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+FE00 | U+845B");
- ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+FE00 U+845B |");
- ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+FE00 U+845B |");
- ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+FE00 U+845B |");
- // Tone mark. + Tone mark
- ExpectNextWordBreakForCache(0, "U+4444 U+302D U+302D | U+4444");
- ExpectNextWordBreakForCache(1, "U+4444 U+302D U+302D | U+4444");
- ExpectNextWordBreakForCache(2, "U+4444 U+302D U+302D | U+4444");
- ExpectNextWordBreakForCache(3, "U+4444 U+302D U+302D U+4444 |");
- ExpectNextWordBreakForCache(4, "U+4444 U+302D U+302D U+4444 |");
- ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+302D U+4444 |");
- }
- TEST(WordBreakTest, goPrevWordBreakTest) {
- ExpectPrevWordBreakForCache(0, "|");
- // Continue for spaces.
- ExpectPrevWordBreakForCache(0, "| 'a' 'b' 'c' 'd'");
- ExpectPrevWordBreakForCache(1, "| 'a' 'b' 'c' 'd'");
- ExpectPrevWordBreakForCache(2, "| 'a' 'b' 'c' 'd'");
- ExpectPrevWordBreakForCache(3, "| 'a' 'b' 'c' 'd'");
- ExpectPrevWordBreakForCache(4, "| 'a' 'b' 'c' 'd'");
- ExpectPrevWordBreakForCache(1000, "| 'a' 'b' 'c' 'd'");
- // Space makes word break.
- ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+0020 'c' 'd'");
- ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+0020 'c' 'd'");
- ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+0020 'c' 'd'");
- ExpectPrevWordBreakForCache(3, "'a' 'b' | U+0020 'c' 'd'");
- ExpectPrevWordBreakForCache(4, "'a' 'b' U+0020 | 'c' 'd'");
- ExpectPrevWordBreakForCache(5, "'a' 'b' U+0020 | 'c' 'd'");
- ExpectPrevWordBreakForCache(1000, "'a' 'b' U+0020 | 'c' 'd'");
- ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 'c' 'd'");
- ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 'c' 'd'");
- ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 'c' 'd'");
- ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 'c' 'd'");
- ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | 'c' 'd'");
- ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 | 'c' 'd'");
- ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 | 'c' 'd'");
- ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
- ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
- ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
- ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 U+2000 'c' 'd'");
- ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | U+2000 'c' 'd'");
- ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
- ExpectPrevWordBreakForCache(6, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
- ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
- // CJK ideographs makes word break.
- ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00");
- ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00");
- ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00");
- ExpectPrevWordBreakForCache(3, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00");
- ExpectPrevWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00");
- ExpectPrevWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
- ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
- ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94");
- ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94");
- ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94");
- ExpectPrevWordBreakForCache(3, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94");
- ExpectPrevWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94");
- ExpectPrevWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
- ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
- // Mixed case.
- ExpectPrevWordBreakForCache(0, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
- ExpectPrevWordBreakForCache(1, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
- ExpectPrevWordBreakForCache(2, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
- ExpectPrevWordBreakForCache(3, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
- ExpectPrevWordBreakForCache(4, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
- ExpectPrevWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00");
- ExpectPrevWordBreakForCache(6, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
- ExpectPrevWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
- // Continue if trailing characters is Unicode combining characters.
- ExpectPrevWordBreakForCache(0, "| U+4E00 U+0332 U+4E00");
- ExpectPrevWordBreakForCache(1, "| U+4E00 U+0332 U+4E00");
- ExpectPrevWordBreakForCache(2, "| U+4E00 U+0332 U+4E00");
- ExpectPrevWordBreakForCache(3, "U+4E00 U+0332 | U+4E00");
- ExpectPrevWordBreakForCache(1000, "U+4E00 U+0332 | U+4E00");
- // Surrogate pairs.
- ExpectPrevWordBreakForCache(0, "| U+1F60D U+1F618");
- ExpectPrevWordBreakForCache(1, "| U+1F60D U+1F618");
- ExpectPrevWordBreakForCache(2, "| U+1F60D U+1F618");
- ExpectPrevWordBreakForCache(3, "| U+1F60D U+1F618");
- ExpectPrevWordBreakForCache(4, "| U+1F60D U+1F618");
- ExpectPrevWordBreakForCache(1000, "| U+1F60D U+1F618");
- // Broken surrogate pairs.
- // U+D84D is leading surrogate but there is no trailing surrogate for it.
- ExpectPrevWordBreakForCache(0, "| U+D84D U+1F618");
- ExpectPrevWordBreakForCache(1, "| U+D84D U+1F618");
- ExpectPrevWordBreakForCache(2, "| U+D84D U+1F618");
- ExpectPrevWordBreakForCache(3, "| U+D84D U+1F618");
- ExpectPrevWordBreakForCache(1000, "| U+D84D U+1F618");
- ExpectPrevWordBreakForCache(0, "| U+1F618 U+D84D");
- ExpectPrevWordBreakForCache(1, "| U+1F618 U+D84D");
- ExpectPrevWordBreakForCache(2, "| U+1F618 U+D84D");
- ExpectPrevWordBreakForCache(3, "| U+1F618 U+D84D");
- ExpectPrevWordBreakForCache(1000, "| U+1F618 U+D84D");
- // U+DE0D is trailing surrogate but there is no leading surrogate for it.
- ExpectPrevWordBreakForCache(0, "| U+DE0D U+1F618");
- ExpectPrevWordBreakForCache(1, "| U+DE0D U+1F618");
- ExpectPrevWordBreakForCache(2, "| U+DE0D U+1F618");
- ExpectPrevWordBreakForCache(3, "| U+DE0D U+1F618");
- ExpectPrevWordBreakForCache(1000, "| U+DE0D U+1F618");
- ExpectPrevWordBreakForCache(0, "| U+1F618 U+DE0D");
- ExpectPrevWordBreakForCache(1, "| U+1F618 U+DE0D");
- ExpectPrevWordBreakForCache(2, "| U+1F618 U+DE0D");
- ExpectPrevWordBreakForCache(3, "| U+1F618 U+DE0D");
- ExpectPrevWordBreakForCache(1000, "| U+1F618 U+DE0D");
- // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag.
- ExpectPrevWordBreakForCache(0, "| U+1F1FA U+1F1F8");
- ExpectPrevWordBreakForCache(1, "| U+1F1FA U+1F1F8");
- ExpectPrevWordBreakForCache(2, "| U+1F1FA U+1F1F8");
- ExpectPrevWordBreakForCache(1000, "| U+1F1FA U+1F1F8");
- // Tone marks.
- // CJK ideographic char + Tone mark + CJK ideographic char
- ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+4444");
- ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+4444");
- ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+4444");
- ExpectPrevWordBreakForCache(3, "U+4444 U+302D | U+4444");
- ExpectPrevWordBreakForCache(1000, "U+4444 U+302D | U+4444");
- // Variation Selectors.
- // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char
- ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+845B");
- ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+845B");
- ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+845B");
- ExpectPrevWordBreakForCache(3, "U+845B U+FE00 | U+845B");
- ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 | U+845B");
- // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char
- ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+845B");
- ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+845B");
- ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+845B");
- ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+845B");
- ExpectPrevWordBreakForCache(4, "U+845B U+E0100 | U+845B");
- ExpectPrevWordBreakForCache(5, "U+845B U+E0100 | U+845B");
- ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 | U+845B");
- // CJK ideographic char + Tone mark + Variation Character(VS1)
- ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+FE00 U+4444");
- ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+FE00 U+4444");
- ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+FE00 U+4444");
- ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+FE00 U+4444");
- ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+FE00 | U+4444");
- ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+FE00 | U+4444");
- // CJK ideographic char + Tone mark + Variation Character(VS17)
- ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+E0100 U+4444");
- ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+E0100 U+4444");
- ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+E0100 U+4444");
- ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+E0100 U+4444");
- ExpectPrevWordBreakForCache(4, "| U+4444 U+302D U+E0100 U+4444");
- ExpectPrevWordBreakForCache(5, "U+4444 U+302D U+E0100 | U+4444");
- ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+E0100 | U+4444");
- // CJK ideographic char + Variation Character(VS1) + Tone mark
- ExpectPrevWordBreakForCache(0, "| U+4444 U+FE00 U+302D U+4444");
- ExpectPrevWordBreakForCache(1, "| U+4444 U+FE00 U+302D U+4444");
- ExpectPrevWordBreakForCache(2, "| U+4444 U+FE00 U+302D U+4444");
- ExpectPrevWordBreakForCache(3, "| U+4444 U+FE00 U+302D U+4444");
- ExpectPrevWordBreakForCache(4, "U+4444 U+FE00 U+302D | U+4444");
- ExpectPrevWordBreakForCache(1000, "U+4444 U+FE00 U+302D | U+4444");
- // CJK ideographic char + Variation Character(VS17) + Tone mark
- ExpectPrevWordBreakForCache(0, "| U+4444 U+E0100 U+302D U+4444");
- ExpectPrevWordBreakForCache(1, "| U+4444 U+E0100 U+302D U+4444");
- ExpectPrevWordBreakForCache(2, "| U+4444 U+E0100 U+302D U+4444");
- ExpectPrevWordBreakForCache(3, "| U+4444 U+E0100 U+302D U+4444");
- ExpectPrevWordBreakForCache(4, "| U+4444 U+E0100 U+302D U+4444");
- ExpectPrevWordBreakForCache(5, "U+4444 U+E0100 U+302D | U+4444");
- ExpectPrevWordBreakForCache(1000, "U+4444 U+E0100 U+302D | U+4444");
- // Following test cases are unusual usage of variation selectors and tone
- // marks for caching up the further behavior changes, e.g. index of bounds
- // or crashes. Please feel free to update the test expectations if the
- // behavior change makes sense to you.
- // Isolated Tone marks and Variation Selectors
- ExpectPrevWordBreakForCache(0, "| U+FE00");
- ExpectPrevWordBreakForCache(1, "| U+FE00");
- ExpectPrevWordBreakForCache(1000, "| U+FE00");
- ExpectPrevWordBreakForCache(0, "| U+E0100");
- ExpectPrevWordBreakForCache(1000, "| U+E0100");
- ExpectPrevWordBreakForCache(0, "| U+302D");
- ExpectPrevWordBreakForCache(1000, "| U+302D");
- // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1)
- ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+FE00 U+845B");
- ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+FE00 U+845B");
- ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+FE00 U+845B");
- ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+FE00 U+845B");
- ExpectPrevWordBreakForCache(4, "U+845B U+FE00 U+FE00 | U+845B");
- ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+FE00 | U+845B");
- // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17)
- ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+E0100 U+845B");
- ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+E0100 U+845B");
- ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+E0100 U+845B");
- ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+E0100 U+845B");
- ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+E0100 U+845B");
- ExpectPrevWordBreakForCache(5, "| U+845B U+E0100 U+E0100 U+845B");
- ExpectPrevWordBreakForCache(6, "U+845B U+E0100 U+E0100 | U+845B");
- ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+E0100 | U+845B");
- // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17)
- ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+E0100 U+845B");
- ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+E0100 U+845B");
- ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+E0100 U+845B");
- ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+E0100 U+845B");
- ExpectPrevWordBreakForCache(4, "| U+845B U+FE00 U+E0100 U+845B");
- ExpectPrevWordBreakForCache(5, "U+845B U+FE00 U+E0100 | U+845B");
- ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+E0100 | U+845B");
- // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1)
- ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+FE00 U+845B");
- ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+FE00 U+845B");
- ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+FE00 U+845B");
- ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+FE00 U+845B");
- ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+FE00 U+845B");
- ExpectPrevWordBreakForCache(5, "U+845B U+E0100 U+FE00 | U+845B");
- ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+FE00 | U+845B");
- // Tone mark. + Tone mark
- ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+302D U+4444");
- ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+302D U+4444");
- ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+302D U+4444");
- ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+302D U+4444");
- ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+302D | U+4444");
- ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+302D | U+4444");
- }
- } // namespace minikin
|