KD2 Framework  Check-in [2a35507858]

Overview
Comment:Mail_Message: convert HTML to Markdown now
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 2a35507858947916b76ec2997fa48dd4aadc1c50
User & Date: bohwaz on 2023-05-15 11:23:59
Other Links: manifest | tags
Context
2023-05-23
11:35
Brindille: move else to a separate method check-in: 81ab1d5f60 user: bohwaz tags: trunk
2023-05-15
11:23
Mail_Message: convert HTML to Markdown now check-in: 2a35507858 user: bohwaz tags: trunk
11:23
Smartyer: Accept comma as separator in modifier arguments list check-in: 292dd1c813 user: bohwaz tags: trunk
Changes

Modified src/lib/KD2/Mail_Message.php from [c1939c1ae6] to [d4958f7739].

293
294
295
296
297
298
299










300
301
302

303
304
305
306
307

308
309



310




311
312
313
314
315
316
317
				return true;
			}
		}

		return $this->addPart('text/plain', $content);
	}











	public function getBody($html = false)
	{
		if ($html)

		{
			foreach ($this->parts as $part)
			{
				if ($part['type'] == 'text/html')
					return $part['content'];

			}




			return false;




		}

		foreach ($this->parts as $part)
		{
			if ($part['type'] == 'text/plain')
			{
				$part['content'] = trim($part['content']);







>
>
>
>
>
>
>
>
>
>
|

|
>
|
|
<
|
|
>
|
|
>
>
>
|
>
>
>
>







293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315

316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
				return true;
			}
		}

		return $this->addPart('text/plain', $content);
	}

	/**
	 * Return body text, using HTML as "best source" if available
	 * (as some HTML emails contain a shitty plaintext alternative),
	 * but converted to plaintext (MarkDown).
	 */
	public function getBodyText(bool $prefer_html = true)
	{
		return $this->getBody($prefer_html ? 2 : false);
	}

	public function getBody($html = null)
	{
		if ($html) {
			$body = null;

			foreach ($this->parts as $part) {

				if ($part['type'] == 'text/html') {
					$body = $part['content'];
					break;
				}
			}

			if (null !== $body) {
				if ($html === true || $html === 1) {
					return $body;
				}

				return $this->HTMLToText($body);
			}
		}

		foreach ($this->parts as $part)
		{
			if ($part['type'] == 'text/plain')
			{
				$part['content'] = trim($part['content']);
381
382
383
384
385
386
387
388
389
390


391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412

413


414
415
416

417
418
419
420
421
422
423
424
425
426
427
428

429





430
431
432
433
434
435
436
	{
		return $this->parts[$id]['content'];
	}

	public function HTMLToText($str)
	{
		$str = preg_replace('!<br\s*/?>\n!i', '<br />', $str);
		$str = preg_replace('!</?(?:b|strong)(?:\s+[^>]*)?>!i', '*', $str);
		$str = preg_replace('!</?(?:i|em)(?:\s+[^>]*)?>!i', '/', $str);
		$str = preg_replace('!</?(?:u|ins)(?:\s+[^>]*)?>!i', '_', $str);


		$str = preg_replace_callback('!<h(\d)(?:\s+[^>]*)?>!i', function ($match) {
			return str_repeat('=', (int)$match[1]) . ' ';
		}, $str);
		$str = preg_replace_callback('!</h(\d)>!i', function ($match) {
			return ' ' . str_repeat('=', (int)$match[1]);
		}, $str);

		$str = str_replace("\r", "\n", $str);
		$str = preg_replace("!</p>\n*!i", "\n\n", $str);
		$str = preg_replace("!<br[^>]*>\n*!i", "\n", $str);

		$str = preg_replace('!<img[^>]*src=([\'"])([^\1]*?)\1[^>]*>!i', 'Image : $2', $str);

		preg_match_all('!<a[^>]href=([\'"])([^\1]*?)\1[^>]*>(.*?)</a>!i', $str, $match, PREG_SET_ORDER);

		if (!empty($match))
		{
			foreach ($match as $key=>$link)
			{
				if ($link[3] == $link[2])
				{
					unset($match[$key]);

				}


			}
		}


		if (!empty($match))
		{
			$i = 1;
			$str .= "\n\n== Liens cités ==\n";

			foreach ($match as $link)
			{
				$str = str_replace($link[0], $link[3] . '['.$i.']', $str);
				$str.= str_pad($i, 2, ' ', STR_PAD_LEFT).'. '.$link[2]."\n";
				$i++;
			}
		}







		$str = strip_tags($str);

		$str = html_entity_decode($str, ENT_QUOTES, 'UTF-8');
		$str = preg_replace('/^\h*/m', '', $str);
		$str = preg_replace("!\n{3,}!", "\n\n", $str);

		return trim($str);







|
|
|
>
>

|


|






|



<
<
|
<
|
<
<
>
|
>
>



>












>

>
>
>
>
>







399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425


426

427


428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
	{
		return $this->parts[$id]['content'];
	}

	public function HTMLToText($str)
	{
		$str = preg_replace('!<br\s*/?>\n!i', '<br />', $str);
		$str = preg_replace('!</?(?:b|strong)(?:\s+[^>]*)?>!i', '**', $str);
		$str = preg_replace('!</?(?:i|em)(?:\s+[^>]*)?>!i', '*', $str);
		$str = preg_replace('!</?(?:u|ins)(?:\s+[^>]*)?>!i', '__', $str);
		$str = preg_replace('!</?(?:s|del)(?:\s+[^>]*)?>!i', '~~', $str);

		$str = preg_replace_callback('!<h(\d)(?:\s+[^>]*)?>!i', function ($match) {
			return str_repeat('#', (int)$match[1]) . ' ';
		}, $str);
		$str = preg_replace_callback('!</h(\d)>!i', function ($match) {
			return ' ' . str_repeat('#', (int)$match[1]);
		}, $str);

		$str = str_replace("\r", "\n", $str);
		$str = preg_replace("!</p>\n*!i", "\n\n", $str);
		$str = preg_replace("!<br[^>]*>\n*!i", "\n", $str);

		//$str = preg_replace('!<img[^>]*src=([\'"])([^\1]*?)\1[^>]*>!i', '![]($2)', $str);

		preg_match_all('!<a[^>]href=([\'"])([^\1]*?)\1[^>]*>(.*?)</a>!i', $str, $match, PREG_SET_ORDER);



		foreach ($match as $found) {

			if ($found[3] == $found[2] || trim($found[3]) === '') {


				$link = '&lt;' . $found[2] . '&gt;';
			}
			else {
				$link = sprintf('%s &lt;%s&gt;', $found[3], $found[2]);
			}
		}

		/*
		if (!empty($match))
		{
			$i = 1;
			$str .= "\n\n== Liens cités ==\n";

			foreach ($match as $link)
			{
				$str = str_replace($link[0], $link[3] . '['.$i.']', $str);
				$str.= str_pad($i, 2, ' ', STR_PAD_LEFT).'. '.$link[2]."\n";
				$i++;
			}
		}
		*/

		$str = preg_replace_callback('<blockquote[^>]*>(.*)</blockquote>!is', function ($match) {
			return preg_replace('!^!m', '> ', trim($match[1]));
		}, $str);

		$str = preg_replace('!<(script|style).*</\1>!is', '', $str);
		$str = strip_tags($str);

		$str = html_entity_decode($str, ENT_QUOTES, 'UTF-8');
		$str = preg_replace('/^\h*/m', '', $str);
		$str = preg_replace("!\n{3,}!", "\n\n", $str);

		return trim($str);