Overview
Comment:Make sure data imported is in UTF-8 and that it's also valid UTF-8 for collator comparison, to avoid database corruption
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | stable
Files: files | file ages | folders
SHA3-256: 5b023687059a3418e4cc90604618019d648a4e94940071573250303df49e0683
User & Date: bohwaz on 2022-01-11 20:45:36
Other Links: manifest | tags
Context
2022-01-12
00:11
Rename custom unicode collation to U_NOCASE, don't replace the native NOCASE collation as it can lead to index errors and malformed database check-in: 5cd6df6ced user: bohwaz tags: trunk
2022-01-11
20:45
Make sure data imported is in UTF-8 and that it's also valid UTF-8 for collator comparison, to avoid database corruption check-in: 5b02368705 user: bohwaz tags: trunk, stable
2022-01-10
17:15
Add SECURITY policy file for Github check-in: 61572f78dc user: bohwaz tags: trunk
Changes

Modified src/include/lib/Garradin/CSV.php from [1989ff97c6] to [cfc298524f].

43
44
45
46
47
48
49



50
51
52
53
54
55
56
			}

			if (count($row) != $nb_columns)
			{
				throw new UserException('Erreur sur la ligne ' . $line . ' : incohérence dans le nombre de colonnes avec la première ligne.');
			}




			$out[$line] = $row;
		}

		fclose($fp);

		return $out;
	}







>
>
>







43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
			}

			if (count($row) != $nb_columns)
			{
				throw new UserException('Erreur sur la ligne ' . $line . ' : incohérence dans le nombre de colonnes avec la première ligne.');
			}

			// Make sure the data is UTF-8 encoded
			$row = array_map($row, fn ($a) => Utils::utf8_encode(trim($a)));

			$out[$line] = $row;
		}

		fclose($fp);

		return $out;
	}
231
232
233
234
235
236
237


238
239
240
241
242
243
244
245
		// Find the delimiter
		$delim = self::findDelimiter($fp);
		self::skipBOM($fp);

		$line = 0;

		$columns = fgetcsv($fp, 4096, $delim);


		$columns = array_map('trim', $columns);

		// Check for required columns
		foreach ($expected_columns as $column) {
			if (!in_array($column, $columns, true)) {
				throw new UserException(sprintf('La colonne "%s" est absente du fichier importé', $column));
			}
		}







>
>
|







234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
		// Find the delimiter
		$delim = self::findDelimiter($fp);
		self::skipBOM($fp);

		$line = 0;

		$columns = fgetcsv($fp, 4096, $delim);

		// Make sure the data is UTF-8 encoded
		$columns = array_map(fn ($a) => Utils::utf8_encode(trim($a)), $columns);

		// Check for required columns
		foreach ($expected_columns as $column) {
			if (!in_array($column, $columns, true)) {
				throw new UserException(sprintf('La colonne "%s" est absente du fichier importé', $column));
			}
		}
255
256
257
258
259
260
261



262
263
264
265
266
267
268
269
			}

			if (count($row) != count($columns))
			{
				throw new UserException('Erreur sur la ligne ' . $line . ' : le nombre de colonnes est incorrect.');
			}




			$row = array_combine($columns, $row);

			yield $line => $row;
		}

		fclose($fp);
	}
}







>
>
>








260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
			}

			if (count($row) != count($columns))
			{
				throw new UserException('Erreur sur la ligne ' . $line . ' : le nombre de colonnes est incorrect.');
			}

			// Make sure the data is UTF-8 encoded
			$row = array_map(fn ($a) => Utils::utf8_encode(trim($a)), $row);

			$row = array_combine($columns, $row);

			yield $line => $row;
		}

		fclose($fp);
	}
}

Modified src/include/lib/Garradin/Membres/Import.php from [9fc2a43ba6] to [e73b1e22a1].

104
105
106
107
108
109
110



111
112
113
114
115
116
117
			$line++;

			if (empty($row))
			{
				continue;
			}




			if ($line == 1)
			{
				if (empty($row[0]) || !is_string($row[0]) || is_numeric($row[0]))
				{
					$db->rollback();
					throw new UserException('Erreur sur la ligne 1 : devrait contenir l\'en-tête des colonnes.');
				}







>
>
>







104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
			$line++;

			if (empty($row))
			{
				continue;
			}

			// Make sure the data is UTF-8 encoded
			$row = array_map(fn ($a) => Utils::utf8_encode(trim($a)), $row);

			if ($line == 1)
			{
				if (empty($row[0]) || !is_string($row[0]) || is_numeric($row[0]))
				{
					$db->rollback();
					throw new UserException('Erreur sur la ligne 1 : devrait contenir l\'en-tête des colonnes.');
				}

Modified src/include/lib/Garradin/Utils.php from [f7279d34e4] to [8303b1eb7d].

936
937
938
939
940
941
942

943
944
945
946






947
948
949
950
951
952
953
954
955






956
957
958
959
960
961
962
            self::$collator = \Collator::create('fr_FR');

            // This is what makes the comparison case insensitive
            // https://www.php.net/manual/en/collator.setstrength.php
            self::$collator->setAttribute(\Collator::STRENGTH, \Collator::SECONDARY);

            // Don't use \Collator::NUMERIC_COLLATION here as it goes against what would feel logic

            // with NUMERIC_COLLATION: 1, 2, 10, 11, 101
            // without: 1, 10, 101, 11, 2
        }







        if (isset(self::$collator)) {
            return (int) self::$collator->compare($a, $b);
        }

        $a = strtoupper(self::transliterateToAscii($a));
        $b = strtoupper(self::transliterateToAscii($b));

        return strcmp($a, $b);
    }







    /**
     * Transforms a unicode string to lowercase AND removes all diacritics
     *
     * @see https://www.matthecat.com/supprimer-les-accents-d-une-chaine-avec-php.html
     */
    static public function unicodeCaseFold(?string $str): string







>




>
>
>
>
>
>









>
>
>
>
>
>







936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
            self::$collator = \Collator::create('fr_FR');

            // This is what makes the comparison case insensitive
            // https://www.php.net/manual/en/collator.setstrength.php
            self::$collator->setAttribute(\Collator::STRENGTH, \Collator::SECONDARY);

            // Don't use \Collator::NUMERIC_COLLATION here as it goes against what would feel logic
            // for account ordering
            // with NUMERIC_COLLATION: 1, 2, 10, 11, 101
            // without: 1, 10, 101, 11, 2
        }

        // Make sure we have UTF-8
        // If we don't, we may end up with malformed database, eg. "row X missing from index" errors
        // when doing an integrity check
        $a = self::utf8_encode($a);
        $b = self::utf8_encode($b);

        if (isset(self::$collator)) {
            return (int) self::$collator->compare($a, $b);
        }

        $a = strtoupper(self::transliterateToAscii($a));
        $b = strtoupper(self::transliterateToAscii($b));

        return strcmp($a, $b);
    }

    static public function utf8_encode($str)
    {
        // Check if string is already UTF-8 encoded or not
        return !preg_match('//u', $str) ? utf8_encode($str) : $str;
    }

    /**
     * Transforms a unicode string to lowercase AND removes all diacritics
     *
     * @see https://www.matthecat.com/supprimer-les-accents-d-une-chaine-avec-php.html
     */
    static public function unicodeCaseFold(?string $str): string