Overview
Comment: | Make sure data imported is in UTF-8 and that it's also valid UTF-8 for collator comparison, to avoid database corruption |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | stable |
Files: | files | file ages | folders |
SHA3-256: |
5b023687059a3418e4cc90604618019d |
User & Date: | bohwaz on 2022-01-11 20:45:36 |
Other Links: | manifest | tags |
Context
2022-01-12
| ||
00:11 | Rename custom unicode collation to U_NOCASE, don't replace the native NOCASE collation as it can lead to index errors and malformed database check-in: 5cd6df6ced user: bohwaz tags: trunk | |
2022-01-11
| ||
20:45 | Make sure data imported is in UTF-8 and that it's also valid UTF-8 for collator comparison, to avoid database corruption check-in: 5b02368705 user: bohwaz tags: trunk, stable | |
2022-01-10
| ||
17:15 | Add SECURITY policy file for Github check-in: 61572f78dc user: bohwaz tags: trunk | |
Changes
Modified src/include/lib/Garradin/CSV.php from [1989ff97c6] to [cfc298524f].
︙ | ︙ | |||
43 44 45 46 47 48 49 50 51 52 53 54 55 56 | } if (count($row) != $nb_columns) { throw new UserException('Erreur sur la ligne ' . $line . ' : incohérence dans le nombre de colonnes avec la première ligne.'); } $out[$line] = $row; } fclose($fp); return $out; } | > > > | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | } if (count($row) != $nb_columns) { throw new UserException('Erreur sur la ligne ' . $line . ' : incohérence dans le nombre de colonnes avec la première ligne.'); } // Make sure the data is UTF-8 encoded $row = array_map($row, fn ($a) => Utils::utf8_encode(trim($a))); $out[$line] = $row; } fclose($fp); return $out; } |
︙ | ︙ | |||
231 232 233 234 235 236 237 | // Find the delimiter $delim = self::findDelimiter($fp); self::skipBOM($fp); $line = 0; $columns = fgetcsv($fp, 4096, $delim); | > > | | 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | // Find the delimiter $delim = self::findDelimiter($fp); self::skipBOM($fp); $line = 0; $columns = fgetcsv($fp, 4096, $delim); // Make sure the data is UTF-8 encoded $columns = array_map(fn ($a) => Utils::utf8_encode(trim($a)), $columns); // Check for required columns foreach ($expected_columns as $column) { if (!in_array($column, $columns, true)) { throw new UserException(sprintf('La colonne "%s" est absente du fichier importé', $column)); } } |
︙ | ︙ | |||
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | } if (count($row) != count($columns)) { throw new UserException('Erreur sur la ligne ' . $line . ' : le nombre de colonnes est incorrect.'); } $row = array_combine($columns, $row); yield $line => $row; } fclose($fp); } } | > > > | 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 | } if (count($row) != count($columns)) { throw new UserException('Erreur sur la ligne ' . $line . ' : le nombre de colonnes est incorrect.'); } // Make sure the data is UTF-8 encoded $row = array_map(fn ($a) => Utils::utf8_encode(trim($a)), $row); $row = array_combine($columns, $row); yield $line => $row; } fclose($fp); } } |
Modified src/include/lib/Garradin/Membres/Import.php from [9fc2a43ba6] to [e73b1e22a1].
︙ | ︙ | |||
104 105 106 107 108 109 110 111 112 113 114 115 116 117 | $line++; if (empty($row)) { continue; } if ($line == 1) { if (empty($row[0]) || !is_string($row[0]) || is_numeric($row[0])) { $db->rollback(); throw new UserException('Erreur sur la ligne 1 : devrait contenir l\'en-tête des colonnes.'); } | > > > | 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | $line++; if (empty($row)) { continue; } // Make sure the data is UTF-8 encoded $row = array_map(fn ($a) => Utils::utf8_encode(trim($a)), $row); if ($line == 1) { if (empty($row[0]) || !is_string($row[0]) || is_numeric($row[0])) { $db->rollback(); throw new UserException('Erreur sur la ligne 1 : devrait contenir l\'en-tête des colonnes.'); } |
︙ | ︙ |
Modified src/include/lib/Garradin/Utils.php from [f7279d34e4] to [8303b1eb7d].
︙ | ︙ | |||
936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 | self::$collator = \Collator::create('fr_FR'); // This is what makes the comparison case insensitive // https://www.php.net/manual/en/collator.setstrength.php self::$collator->setAttribute(\Collator::STRENGTH, \Collator::SECONDARY); // Don't use \Collator::NUMERIC_COLLATION here as it goes against what would feel logic // with NUMERIC_COLLATION: 1, 2, 10, 11, 101 // without: 1, 10, 101, 11, 2 } if (isset(self::$collator)) { return (int) self::$collator->compare($a, $b); } $a = strtoupper(self::transliterateToAscii($a)); $b = strtoupper(self::transliterateToAscii($b)); return strcmp($a, $b); } /** * Transforms a unicode string to lowercase AND removes all diacritics * * @see https://www.matthecat.com/supprimer-les-accents-d-une-chaine-avec-php.html */ static public function unicodeCaseFold(?string $str): string | > > > > > > > > > > > > > | 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 | self::$collator = \Collator::create('fr_FR'); // This is what makes the comparison case insensitive // https://www.php.net/manual/en/collator.setstrength.php self::$collator->setAttribute(\Collator::STRENGTH, \Collator::SECONDARY); // Don't use \Collator::NUMERIC_COLLATION here as it goes against what would feel logic // for account ordering // with NUMERIC_COLLATION: 1, 2, 10, 11, 101 // without: 1, 10, 101, 11, 2 } // Make sure we have UTF-8 // If we don't, we may end up with malformed database, eg. "row X missing from index" errors // when doing an integrity check $a = self::utf8_encode($a); $b = self::utf8_encode($b); if (isset(self::$collator)) { return (int) self::$collator->compare($a, $b); } $a = strtoupper(self::transliterateToAscii($a)); $b = strtoupper(self::transliterateToAscii($b)); return strcmp($a, $b); } static public function utf8_encode($str) { // Check if string is already UTF-8 encoded or not return !preg_match('//u', $str) ? utf8_encode($str) : $str; } /** * Transforms a unicode string to lowercase AND removes all diacritics * * @see https://www.matthecat.com/supprimer-les-accents-d-une-chaine-avec-php.html */ static public function unicodeCaseFold(?string $str): string |
︙ | ︙ |