KD2 Framework  Check-in [c5273f5793]

Overview
Comment:Add comments
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: c5273f57935f9f4b47ae5d1d7f3179792f5033e7
User & Date: bohwaz on 2023-03-26 23:45:52
Other Links: manifest | tags
Context
2023-03-26
23:51
Add License check-in: 8997ff4276 user: bohwaz tags: trunk
23:45
Add comments check-in: c5273f5793 user: bohwaz tags: trunk
23:14
Implement HTML table to CSV check-in: 4a727c85d4 user: bohwaz tags: trunk
Changes

Modified src/lib/KD2/HTML/TableToCSV.php from [566bd21bad] to [1064f4e63c].

1
2
3
4
5
6
7
8











9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27

28
29
30
31
32
33
34
<?php

namespace KD2\HTML;

use DOMDocument;
use DOMNode;
use DOMXPath;












class TableToCSV
{
	protected string $csv = '';

	public function import(string $html): void
	{
		libxml_use_internal_errors(true);

		if (!stristr($html, '<body')) {
			$html = '<body>' . $html . '</body>';
		}

		$doc = new DOMDocument;
		$doc->loadHTML('<meta charset="utf-8" />' . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);

		$this->csv = '';

		foreach ($this->xpath($doc, './/table') as $i => $table) {
			$this->add($table, $i);

		}

		unset($doc);
	}

	public function xpath(DOMNode $dom, string $query, int $item = null)
	{








>
>
>
>
>
>
>
>
>
>
>



















>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
<?php

namespace KD2\HTML;

use DOMDocument;
use DOMNode;
use DOMXPath;

/**
 * Converts the first HTML table of a document to CSV
 *
 * - only the first table is handled
 * - colspan is supported
 * - rowspan is *NOT* supported
 *
 * Usage: $csv = new TableToCSV; $csv->import('<table...</table>'); $csv->save('file.csv');
 *
 * @author bohwaz <https://bohwaz.net/>
 */
class TableToCSV
{
	protected string $csv = '';

	public function import(string $html): void
	{
		libxml_use_internal_errors(true);

		if (!stristr($html, '<body')) {
			$html = '<body>' . $html . '</body>';
		}

		$doc = new DOMDocument;
		$doc->loadHTML('<meta charset="utf-8" />' . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);

		$this->csv = '';

		foreach ($this->xpath($doc, './/table') as $i => $table) {
			$this->add($table, $i);
			break; // We only support the first table currently
		}

		unset($doc);
	}

	public function xpath(DOMNode $dom, string $query, int $item = null)
	{

Modified src/lib/KD2/HTML/TableToODS.php from [8531a34f4f] to [e5cdf5fdfe].

1
2
3
4
5
6
7
8
9
10
11
12















13

14
15
16
17
18
19
20
21
22
23
24
25
26




27




28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

46
47


48

49




50
51
52



53
54


55
56
57


58


59
60
61



62
63
64
65
66
67
68
<?php

namespace KD2\HTML;

use KD2\HTML\CSSParser;
use KD2\ZipWriter;

use DOMDocument;
use DOMNode;


/**















 * Supported CSS properties:

 * 'initial' value
 * font-style: italic
 * font-weight: bold
 * font-size: XXpt
 * color: #aabbcc
 * background-color: #aabbcc
 * text-align: left|center|right
 * vertical-align: top|middle|bottom
 * padding: XXmm
 * border[-left/right/bottom/top]: none|0.06pt solid #aabbcc
 * wrap: wrap|nowrap
 * hyphens: auto|none
 * transform: rotate(90deg)




 * -spreadsheet-cell-type: number|




 */
class TableToODS
{
	protected array $styles = [];
	public string $default_sheet_name = 'Sheet%d';

	const XML_HEADER = '<?xml version="1.0" encoding="UTF-8"?>';

	const DATA_TYPES = [
		'number',
		'date',
		'currency',
		'percentage',
		'string',
		'auto',
	];

	const CUSTOM_CSS_PROPERTIES = [

		// fr-BE, en_AU, etc.
		'-spreadsheet-locale',


		// auto, string, currency, date, number, percentage

		'-spreadsheet-cell-type',




		// see https://unicode-org.github.io/icu/userguide/format_parse/date/#date-field-symbol-table
		// or one of the strings in DATE_FORMATS array
		'-spreadsheet-date-format',



		// integer, float, percentage_integer, percentage_float
		'-spreadsheet-number-format',


		// EUR, GBP
		'-spreadsheet-currency',
		// €, $, etc.


		'-spreadsheet-currency-symbol',


		// 'prefix' (default) or 'suffix'
		'-spreadsheet-currency-position',
		// true or false



		'-spreadsheet-number-negative-color'
	];

	const DATE_FORMATS = [
		'short' => 'dd/MM/yyyy',
		'short_hours' => 'dd/MM/yyyy hh:mm',
		'hours' => 'hh:mm',












>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
|
>
>
>
>


















>


>
>
|
>

>
>
>
>
|
<

>
>
>


>
>
|

|
>
>

>
>


|
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
<?php

namespace KD2\HTML;

use KD2\HTML\CSSParser;
use KD2\ZipWriter;

use DOMDocument;
use DOMNode;


/**
 * This class takes one or more HTML tables, and convert them to a single ODS document.
 *
 * - a basic set of CSS properties are supported!
 * - colspan (but not rowspan)
 * - automatic column width
 * - custom CSS properties
 * - each table is handled as a sheet, the <caption> will act as the name of the sheet
 * - detection of cell type, or force cell type using '-spreadsheet-cell-type'
 *
 * What is NOT supported:
 * - rowspan
 * - formulas
 *
 * Usage: $ods = new TableToODS; $ods->import('<table...</table>'); $ods->save('file.ods');
 *
 * Supported CSS properties:
 * - the following color names: black, white, red, green, blue, yellow, magenta, cyan
 * - 'initial' value to restore to default
 * - font-style: italic
 * - font-weight: bold
 * - font-size: XXpt
 * - color: #aabbcc|name
 * - background-color: #aabbcc|name
 * - text-align: left|center|right
 * - vertical-align: top|middle|bottom
 * - padding: XXmm
 * - border[-left|-right|-bottom|-top]: none|0.06pt solid #aabbcc|color
 * - wrap: wrap|nowrap
 * - hyphens: auto|none
 * - transform: rotate(90deg)
 *
 * Other properties, as well as units (eg. '2em', '99%', etc.), are not
 * supported and might end up in weird results.
 *
 * This supports a number of custom CSS properties (note the leading dash '-').
 * See TableToODS::CUSTOM_CSS_PROPERTIES for details.
 * Note that those properties are also cascading.
 *
 * @author bohwaz <https://bohwaz.net/>
 */
class TableToODS
{
	protected array $styles = [];
	public string $default_sheet_name = 'Sheet%d';

	const XML_HEADER = '<?xml version="1.0" encoding="UTF-8"?>';

	const DATA_TYPES = [
		'number',
		'date',
		'currency',
		'percentage',
		'string',
		'auto',
	];

	const CUSTOM_CSS_PROPERTIES = [
		// Which language is the spreadsheet document in?
		// fr-BE, en_AU, etc.
		'-spreadsheet-locale',

		// Force the type of the cell
		// auto (default), string, currency, date, number, percentage
		// 'auto' means the type will be detected as best as we can (see ::getCellType)
		'-spreadsheet-cell-type',

		// Force the displayed date format of dates
		// Default is 'short'
		// The format may be one of the strings in ::DATE_FORMATS array, or any ICU format:
		// https://unicode-org.github.io/icu/userguide/format_parse/date/#date-field-symbol-table

		'-spreadsheet-date-format',

		// Force the display of the number
		// Default is 'float'
		// integer, float, percentage_integer, percentage_float
		'-spreadsheet-number-format',

		// Force the currency of the number
		// EUR (default), GBP, etc.
		'-spreadsheet-currency',

		// Force the currency symbol
		// € (default if currency is EUR), $, etc.
		'-spreadsheet-currency-symbol',

		// Force the position of the currency symbol, to place it before or after the number
		// 'prefix' (default) or 'suffix'
		'-spreadsheet-currency-position',

		// Name of the text color for a negative number,
		// or 'none' to disable coloring negative numbers
		// Supported color names: black, white, red, green, blue, yellow, magenta, cyan
		'-spreadsheet-number-negative-color'
	];

	const DATE_FORMATS = [
		'short' => 'dd/MM/yyyy',
		'short_hours' => 'dd/MM/yyyy hh:mm',
		'hours' => 'hh:mm',
269
270
271
272
273
274
275


276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295

	protected function getCellType(string $value, ?string $type = null)
	{
		if ($type && $type != 'auto') {
			return $type;
		}



		if (is_object($value) && $value instanceof \DateTimeInterface) {
			return 'date';
		}
		elseif (is_int($value) || is_float($value) || (substr((string) $value, 0, 1) != '0' && preg_match('/^-?\d+(?:[,.]\d+)?$/', (string) $value))) {
			return 'number';
		}
		elseif (preg_match('!^(?:\d\d?/\d\d?/\d\d(?:\d\d)?|\d{4}-\d{2}-\d{2})(?:\s+\d\d?[:\.]\d\d?(?:[:\.]\d\d?))?$!', $value)) {
			return 'date';
		}
		elseif (preg_match('/^-?\d+(?:[,.]\d+)?\s*%$/', trim($value))) {
			return 'percentage';
		}
		elseif (preg_match('/^-?\d+(?:[,.]\d+)?\s*(?:€|\$|EUR|CHF)$/', trim($value))) {
			return 'currency';
		}

		return 'string';
	}

	public function save(string $filename): void







>
>



|





|


|







312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340

	protected function getCellType(string $value, ?string $type = null)
	{
		if ($type && $type != 'auto') {
			return $type;
		}

		$number_value = str_replace([' ', "\xC2\xA0"], '', trim($value));

		if (is_object($value) && $value instanceof \DateTimeInterface) {
			return 'date';
		}
		elseif (is_int($value) || is_float($value) || (substr((string) $number_value, 0, 1) != '0' && preg_match('/^-?\d+(?:[,.]\d+)?$/', (string) $number_value))) {
			return 'number';
		}
		elseif (preg_match('!^(?:\d\d?/\d\d?/\d\d(?:\d\d)?|\d{4}-\d{2}-\d{2})(?:\s+\d\d?[:\.]\d\d?(?:[:\.]\d\d?))?$!', $value)) {
			return 'date';
		}
		elseif (preg_match('/^-?\d+(?:[,.]\d+)?\s*%$/', $number_value)) {
			return 'percentage';
		}
		elseif (preg_match('/^-?\d+(?:[,.]\d+)?\s*(?:€|\$|EUR|CHF)$/', $number_value)) {
			return 'currency';
		}

		return 'string';
	}

	public function save(string $filename): void