Overview
Comment:Fix: bring case-insensitive unicode comparison to LIKE operator in SQLite as it doesn't handle it by default
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | stable
Files: files | file ages | folders
SHA3-256: d8061fc8540dfbe69307c3a07495231441263d58f01e39757399542e2ea2f22c
User & Date: bohwaz on 2021-05-22 03:58:18
Other Links: manifest | tags
Context
2021-05-22
04:02
Make sure to remove punctuation as well check-in: 0b821fce4f user: bohwaz tags: trunk, stable
03:58
Fix: bring case-insensitive unicode comparison to LIKE operator in SQLite as it doesn't handle it by default check-in: d8061fc854 user: bohwaz tags: trunk, stable
02:40
Add notes textarea for transaction in activity subscription form check-in: db2b3c88c0 user: bohwaz tags: trunk, stable
Changes

Modified src/include/lib/Garradin/DB.php from [8dd3841b1b] to [4b3fbe283f].

11
12
13
14
15
16
17


18
19
20
21
22
23
24
..
47
48
49
50
51
52
53

54
55
56
57
58
59
60
...
184
185
186
187
188
189
190
191





























     * @link https://www.sqlite.org/pragma.html#pragma_application_id
     */
    const APPID = 0x5da2d811;

    static protected $_instance = null;

    protected $_version = -1;



    static public function getInstance($create = false, $readonly = false)
    {
        if (null === self::$_instance) {
            self::$_instance = new DB('sqlite', ['file' => DB_FILE]);
        }

................................................................................
        // Performance enhancement
        // see https://www.cs.utexas.edu/~jaya/slides/apsys17-sqlite-slides.pdf
        // https://ericdraken.com/sqlite-performance-testing/
        $this->exec(sprintf('PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL; PRAGMA journal_size_limit = %d;', 32 * 1024 * 1024));

        $this->db->createFunction('dirname', [Utils::class, 'dirname']);
        $this->db->createFunction('basename', [Utils::class, 'basename']);

        $this->db->createCollation('NOCASE', [Utils::class, 'unicodeCaseComparison']);
    }

    public function version(): ?string
    {
        if (-1 === $this->_version) {
            $this->connect();
................................................................................
            $this->db->exec('PRAGMA foreign_keys = OFF;');
        }
        else {
            $this->db->exec('PRAGMA legacy_alter_table = OFF;');
            $this->db->exec('PRAGMA foreign_keys = ON;');
        }
    }
}




































>
>







 







>







 







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
..
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
...
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
     * @link https://www.sqlite.org/pragma.html#pragma_application_id
     */
    const APPID = 0x5da2d811;

    static protected $_instance = null;

    protected $_version = -1;

    static protected $unicode_patterns_cache = [];

    static public function getInstance($create = false, $readonly = false)
    {
        if (null === self::$_instance) {
            self::$_instance = new DB('sqlite', ['file' => DB_FILE]);
        }

................................................................................
        // Performance enhancement
        // see https://www.cs.utexas.edu/~jaya/slides/apsys17-sqlite-slides.pdf
        // https://ericdraken.com/sqlite-performance-testing/
        $this->exec(sprintf('PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL; PRAGMA journal_size_limit = %d;', 32 * 1024 * 1024));

        $this->db->createFunction('dirname', [Utils::class, 'dirname']);
        $this->db->createFunction('basename', [Utils::class, 'basename']);
        $this->db->createFunction('like', [self::class, 'unicodeLike']);
        $this->db->createCollation('NOCASE', [Utils::class, 'unicodeCaseComparison']);
    }

    public function version(): ?string
    {
        if (-1 === $this->_version) {
            $this->connect();
................................................................................
            $this->db->exec('PRAGMA foreign_keys = OFF;');
        }
        else {
            $this->db->exec('PRAGMA legacy_alter_table = OFF;');
            $this->db->exec('PRAGMA foreign_keys = ON;');
        }
    }

    /**
     * This is a rewrite of SQLite LIKE function that is transforming
     * the pattern and the value to lowercase ascii, so that we can match
     * "émilie" with "emilie".
     *
     * This is probably not the best way to do that, but we have to resort to that
     * as ICU extension is rarely available.
     *
     * @see https://www.sqlite.org/c3ref/strlike.html
     * @see https://sqlite.org/src/file?name=ext/icu/icu.c&ci=trunk
     */
    static public function unicodeLike($pattern, $value, $escape = null) {
        $id = $pattern . $escape;

        if (!array_key_exists($id, self::$unicode_patterns_cache)) {
            $pattern = Utils::unicodeCaseFold($pattern);
            $escape = $escape ? '(?!' . preg_quote($escape, '/') . ')' : '';
            $pattern = preg_quote($pattern, '/');
            $pattern = preg_replace('/' . $escape . '%/', '.*', $pattern);
            $pattern = preg_replace('/' . $escape . '_/', '.', $pattern);
            $pattern = '/^' . $pattern . '$/';
            self::$unicode_patterns_cache[$id] = $pattern;
        }

        $value = Utils::unicodeCaseFold($value);

        return (bool) preg_match(self::$unicode_patterns_cache[$id], $value);
    }
}

Modified src/include/lib/Garradin/Utils.php from [7094cca199] to [499d00eab1].

11
12
13
14
15
16
17

18
19
20
21
22
23
24
...
912
913
914
915
916
917
918


















919
920
921
922
923
924
925
class Utils
{
    const EMAIL_CONTEXT_BULK = 'bulk';
    const EMAIL_CONTEXT_PRIVATE = 'private';
    const EMAIL_CONTEXT_SYSTEM = 'system';

    static protected $collator;


    const FRENCH_DATE_NAMES = [
        'January'=>'Janvier', 'February'=>'Février', 'March'=>'Mars', 'April'=>'Avril', 'May'=>'Mai',
        'June'=>'Juin', 'July'=>'Juillet', 'August'=>'Août', 'September'=>'Septembre', 'October'=>'Octobre',
        'November'=>'Novembre', 'December'=>'Décembre', 'Monday'=>'Lundi', 'Tuesday'=>'Mardi', 'Wednesday'=>'Mercredi',
        'Thursday'=>'Jeudi','Friday'=>'Vendredi','Saturday'=>'Samedi','Sunday'=>'Dimanche',
        'Feb'=>'Fév','Apr'=>'Avr','Jun'=>'Juin', 'Jul'=>'Juil','Aug'=>'Aout','Dec'=>'Déc',
................................................................................
        else {
            $a = strtoupper(self::transliterateToAscii($a));
            $b = strtoupper(self::transliterateToAscii($b));
        }

        return strcmp($a, $b);
    }



















    static public function knatcasesort(array $array)
    {
        uksort($array, [self::class, 'unicodeCaseComparison']);
        return $array;
    }
}







>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
...
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
class Utils
{
    const EMAIL_CONTEXT_BULK = 'bulk';
    const EMAIL_CONTEXT_PRIVATE = 'private';
    const EMAIL_CONTEXT_SYSTEM = 'system';

    static protected $collator;
    static protected $transliterator;

    const FRENCH_DATE_NAMES = [
        'January'=>'Janvier', 'February'=>'Février', 'March'=>'Mars', 'April'=>'Avril', 'May'=>'Mai',
        'June'=>'Juin', 'July'=>'Juillet', 'August'=>'Août', 'September'=>'Septembre', 'October'=>'Octobre',
        'November'=>'Novembre', 'December'=>'Décembre', 'Monday'=>'Lundi', 'Tuesday'=>'Mardi', 'Wednesday'=>'Mercredi',
        'Thursday'=>'Jeudi','Friday'=>'Vendredi','Saturday'=>'Samedi','Sunday'=>'Dimanche',
        'Feb'=>'Fév','Apr'=>'Avr','Jun'=>'Juin', 'Jul'=>'Juil','Aug'=>'Aout','Dec'=>'Déc',
................................................................................
        else {
            $a = strtoupper(self::transliterateToAscii($a));
            $b = strtoupper(self::transliterateToAscii($b));
        }

        return strcmp($a, $b);
    }

    /**
     * Transforms a unicode string to lowercase AND removes all diacritics
     *
     * @see https://www.matthecat.com/supprimer-les-accents-d-une-chaine-avec-php.html
     */
    static public function unicodeCaseFold(string $str): string
    {
        if (!isset(self::$transliterator) && function_exists('transliterator_create')) {
            self::$transliterator = \Transliterator::create('NFD; [:Nonspacing Mark:] Remove; NFC; Lower');
        }

        if (isset(self::$transliterator)) {
            return self::$transliterator->transliterate($str);
        }

        return strtoupper(self::transliterateToAscii($str));
    }

    static public function knatcasesort(array $array)
    {
        uksort($array, [self::class, 'unicodeCaseComparison']);
        return $array;
    }
}