Overview
Comment:Make sure to remove punctuation as well
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | stable
Files: files | file ages | folders
SHA3-256: 0b821fce4f579e4f77c5bab5ad49555c0c440c222d27ad6ff6a91f35b9ea80c4
User & Date: bohwaz on 2021-05-22 04:02:37
Other Links: manifest | tags
Context
2021-05-22
11:06
Fix: multiple values fields first value was not imported correctly check-in: 2506e46fdc user: bohwaz tags: trunk, stable
04:02
Make sure to remove punctuation as well check-in: 0b821fce4f user: bohwaz tags: trunk, stable
03:58
Fix: bring case-insensitive unicode comparison to LIKE operator in SQLite as it doesn't handle it by default check-in: d8061fc854 user: bohwaz tags: trunk, stable
Changes

Modified src/include/lib/Garradin/DB.php from [4b3fbe283f] to [e8db354fd5].

200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
     * This is probably not the best way to do that, but we have to resort to that
     * as ICU extension is rarely available.
     *
     * @see https://www.sqlite.org/c3ref/strlike.html
     * @see https://sqlite.org/src/file?name=ext/icu/icu.c&ci=trunk
     */
    static public function unicodeLike($pattern, $value, $escape = null) {
        $id = $pattern . $escape;

        if (!array_key_exists($id, self::$unicode_patterns_cache)) {
            $pattern = Utils::unicodeCaseFold($pattern);
            $escape = $escape ? '(?!' . preg_quote($escape, '/') . ')' : '';
            $pattern = preg_quote($pattern, '/');
            $pattern = preg_replace('/' . $escape . '%/', '.*', $pattern);
            $pattern = preg_replace('/' . $escape . '_/', '.', $pattern);
            $pattern = '/^' . $pattern . '$/';
            self::$unicode_patterns_cache[$id] = $pattern;
        }

        $value = Utils::unicodeCaseFold($value);

        return (bool) preg_match(self::$unicode_patterns_cache[$id], $value);
    }
}







|







|








200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
     * This is probably not the best way to do that, but we have to resort to that
     * as ICU extension is rarely available.
     *
     * @see https://www.sqlite.org/c3ref/strlike.html
     * @see https://sqlite.org/src/file?name=ext/icu/icu.c&ci=trunk
     */
    static public function unicodeLike($pattern, $value, $escape = null) {
        $id = md5($pattern . $escape);

        if (!array_key_exists($id, self::$unicode_patterns_cache)) {
            $pattern = Utils::unicodeCaseFold($pattern);
            $escape = $escape ? '(?!' . preg_quote($escape, '/') . ')' : '';
            $pattern = preg_quote($pattern, '/');
            $pattern = preg_replace('/' . $escape . '%/', '.*', $pattern);
            $pattern = preg_replace('/' . $escape . '_/', '.', $pattern);
            $pattern = '/' . $pattern . '/';
            self::$unicode_patterns_cache[$id] = $pattern;
        }

        $value = Utils::unicodeCaseFold($value);

        return (bool) preg_match(self::$unicode_patterns_cache[$id], $value);
    }
}

Modified src/include/lib/Garradin/Utils.php from [499d00eab1] to [616957930e].

922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
     * Transforms a unicode string to lowercase AND removes all diacritics
     *
     * @see https://www.matthecat.com/supprimer-les-accents-d-une-chaine-avec-php.html
     */
    static public function unicodeCaseFold(string $str): string
    {
        if (!isset(self::$transliterator) && function_exists('transliterator_create')) {
            self::$transliterator = \Transliterator::create('NFD; [:Nonspacing Mark:] Remove; NFC; Lower');
        }

        if (isset(self::$transliterator)) {
            return self::$transliterator->transliterate($str);
        }

        return strtoupper(self::transliterateToAscii($str));







|







922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
     * Transforms a unicode string to lowercase AND removes all diacritics
     *
     * @see https://www.matthecat.com/supprimer-les-accents-d-une-chaine-avec-php.html
     */
    static public function unicodeCaseFold(string $str): string
    {
        if (!isset(self::$transliterator) && function_exists('transliterator_create')) {
            self::$transliterator = \Transliterator::create('Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC; [:Punctuation:] Remove; Lower();');
        }

        if (isset(self::$transliterator)) {
            return self::$transliterator->transliterate($str);
        }

        return strtoupper(self::transliterateToAscii($str));