Newer
Older
<?php
namespace Drupal\Core\Mail;
use Drupal\Component\Utility\Html;
Alex Pott
committed
use Drupal\Component\Utility\Unicode;
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
use Drupal\Component\Utility\Xss;
use Drupal\Core\Site\Settings;
/**
* Defines a class containing utility methods for formatting mail messages.
*/
class MailFormatHelper {
/**
* Internal array of urls replaced with tokens.
*
* @var array
*/
protected static $urls = array();
/**
* Quoted regex expression based on base path.
*
* @var string
*/
protected static $regexp;
/**
* Array of tags supported.
*
* @var array
*/
protected static $supportedTags = array();
/**
* Performs format=flowed soft wrapping for mail (RFC 3676).
*
* We use delsp=yes wrapping, but only break non-spaced languages when
* absolutely necessary to avoid compatibility issues.
*
* We deliberately use LF rather than CRLF, see MailManagerInterface::mail().
*
* @param string $text
* The plain text to process.
* @param string $indent
* (optional) A string to indent the text with. Only '>' characters are
* repeated on subsequent wrapped lines. Others are replaced by spaces.
*
* @return string
* The content of the email as a string with formatting applied.
*/
public static function wrapMail($text, $indent = '') {
// Convert CRLF into LF.
$text = str_replace("\r", '', $text);
// See if soft-wrapping is allowed.
$clean_indent = static::htmlToTextClean($indent);
$soft = strpos($clean_indent, ' ') === FALSE;
// Check if the string has line breaks.
if (strpos($text, "\n") !== FALSE) {
// Remove trailing spaces to make existing breaks hard, but leave
// signature marker untouched (RFC 3676, Section 4.3).
$text = preg_replace('/(?(?<!^--) +\n| +\n)/m', "\n", $text);
// Wrap each line at the needed width.
$lines = explode("\n", $text);
array_walk($lines, '\Drupal\Core\Mail\MailFormatHelper::wrapMailLine', array('soft' => $soft, 'length' => strlen($indent)));
$text = implode("\n", $lines);
}
else {
// Wrap this line.
static::wrapMailLine($text, 0, array('soft' => $soft, 'length' => strlen($indent)));
}
// Empty lines with nothing but spaces.
$text = preg_replace('/^ +\n/m', "\n", $text);
// Space-stuff special lines.
$text = preg_replace('/^(>| |From)/m', ' $1', $text);
// Apply indentation. We only include non-'>' indentation on the first line.
$text = $indent . substr(preg_replace('/^/m', $clean_indent, $text), strlen($indent));
return $text;
}
/**
* Transforms an HTML string into plain text, preserving its structure.
*
* The output will be suitable for use as 'format=flowed; delsp=yes' text
* (RFC 3676) and can be passed directly to MailManagerInterface::mail() for sending.
*
* We deliberately use LF rather than CRLF, see MailManagerInterface::mail().
*
* This function provides suitable alternatives for the following tags:
* <a> <em> <i> <strong> <b> <br> <p> <blockquote> <ul> <ol> <li> <dl> <dt>
* <dd> <h1> <h2> <h3> <h4> <h5> <h6> <hr>
*
* @param string $string
* The string to be transformed.
* @param array $allowed_tags
* (optional) If supplied, a list of tags that will be transformed. If
* omitted, all supported tags are transformed.
*
* @return string
* The transformed string.
*/
public static function htmlToText($string, $allowed_tags = NULL) {
// Cache list of supported tags.
if (empty(static::$supportedTags)) {
static::$supportedTags = array('a', 'em', 'i', 'strong', 'b', 'br', 'p',
'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3',
'h4', 'h5', 'h6', 'hr');
}
// Make sure only supported tags are kept.
$allowed_tags = isset($allowed_tags) ? array_intersect(static::$supportedTags, $allowed_tags) : static::$supportedTags;
// Make sure tags, entities and attributes are well-formed and properly
// nested.
$string = Html::normalize(Xss::filter($string, $allowed_tags));
// Apply inline styles.
$string = preg_replace('!</?(em|i)((?> +)[^>]*)?>!i', '/', $string);
$string = preg_replace('!</?(strong|b)((?> +)[^>]*)?>!i', '*', $string);
// Replace inline <a> tags with the text of link and a footnote.
// 'See <a href="https://www.drupal.org">the Drupal site</a>' becomes
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
// 'See the Drupal site [1]' with the URL included as a footnote.
static::htmlToMailUrls(NULL, TRUE);
$pattern = '@(<a[^>]+?href="([^"]*)"[^>]*?>(.+?)</a>)@i';
$string = preg_replace_callback($pattern, 'static::htmlToMailUrls', $string);
$urls = static::htmlToMailUrls();
$footnotes = '';
if (count($urls)) {
$footnotes .= "\n";
for ($i = 0, $max = count($urls); $i < $max; $i++) {
$footnotes .= '[' . ($i + 1) . '] ' . $urls[$i] . "\n";
}
}
// Split tags from text.
$split = preg_split('/<([^>]+?)>/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
// Note: PHP ensures the array consists of alternating delimiters and
// literals and begins and ends with a literal (inserting $null as
// required).
// Odd/even counter (tag or no tag).
$tag = FALSE;
// Case conversion function.
$casing = NULL;
$output = '';
// All current indentation string chunks.
$indent = array();
// Array of counters for opened lists.
$lists = array();
foreach ($split as $value) {
// Holds a string ready to be formatted and output.
$chunk = NULL;
// Process HTML tags (but don't output any literally).
if ($tag) {
list($tagname) = explode(' ', strtolower($value), 2);
switch ($tagname) {
// List counters.
case 'ul':
array_unshift($lists, '*');
break;
case 'ol':
array_unshift($lists, 1);
break;
case '/ul':
case '/ol':
array_shift($lists);
// Ensure blank new-line.
$chunk = '';
break;
// Quotation/list markers, non-fancy headers.
case 'blockquote':
// Format=flowed indentation cannot be mixed with lists.
$indent[] = count($lists) ? ' "' : '>';
break;
case 'li':
$indent[] = isset($lists[0]) && is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * ';
break;
case 'dd':
$indent[] = ' ';
break;
case 'h3':
$indent[] = '.... ';
break;
case 'h4':
$indent[] = '.. ';
break;
case '/blockquote':
if (count($lists)) {
// Append closing quote for inline quotes (immediately).
$output = rtrim($output, "> \n") . "\"\n";
// Ensure blank new-line.
$chunk = '';
}
// Fall-through.
case '/li':
case '/dd':
array_pop($indent);
break;
case '/h3':
case '/h4':
array_pop($indent);
case '/h5':
case '/h6':
// Ensure blank new-line.
$chunk = '';
break;
// Fancy headers.
case 'h1':
$indent[] = '======== ';
$casing = '\Drupal\Component\Utility\Unicode::strtoupper';
break;
case 'h2':
$indent[] = '-------- ';
$casing = '\Drupal\Component\Utility\Unicode::strtoupper';
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
break;
case '/h1':
case '/h2':
$casing = NULL;
// Pad the line with dashes.
$output = static::htmlToTextPad($output, ($tagname == '/h1') ? '=' : '-', ' ');
array_pop($indent);
// Ensure blank new-line.
$chunk = '';
break;
// Horizontal rulers.
case 'hr':
// Insert immediately.
$output .= static::wrapMail('', implode('', $indent)) . "\n";
$output = static::htmlToTextPad($output, '-');
break;
// Paragraphs and definition lists.
case '/p':
case '/dl':
// Ensure blank new-line.
$chunk = '';
break;
}
}
// Process blocks of text.
else {
// Convert inline HTML text to plain text; not removing line-breaks or
// white-space, since that breaks newlines when sanitizing plain-text.
Alex Pott
committed
$value = trim(Html::decodeEntities($value));
Alex Pott
committed
if (Unicode::strlen($value)) {
$chunk = $value;
}
}
// See if there is something waiting to be output.
if (isset($chunk)) {
// Apply any necessary case conversion.
if (isset($casing)) {
$chunk = call_user_func($casing, $chunk);
}
$line_endings = Settings::get('mail_line_endings', PHP_EOL);
// Format it and apply the current indentation.
$output .= static::wrapMail($chunk, implode('', $indent)) . $line_endings;
// Remove non-quotation markers from indentation.
$indent = array_map('\Drupal\Core\Mail\MailFormatHelper::htmlToTextClean', $indent);
}
$tag = !$tag;
}
return $output . $footnotes;
}
/**
* Wraps words on a single line.
*
* Callback for array_walk() within
* \Drupal\Core\Mail\MailFormatHelper::wrapMail().
*
* Note that we are skipping MIME content header lines, because attached
* files, especially applications, could have long MIME types or long
* filenames which result in line length longer than the 77 characters limit
Jennifer Hodgdon
committed
* and wrapping that line will break the email format. For instance, the
* attached file hello_drupal.docx will produce the following Content-Type:
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
* @code
* Content-Type:
* application/vnd.openxmlformats-officedocument.wordprocessingml.document;
* name="hello_drupal.docx"
* @endcode
*/
protected static function wrapMailLine(&$line, $key, $values) {
$line_is_mime_header = FALSE;
$mime_headers = array(
'Content-Type',
'Content-Transfer-Encoding',
'Content-Disposition',
'Content-Description',
);
// Do not break MIME headers which could be longer than 77 characters.
foreach ($mime_headers as $header) {
if (strpos($line, $header . ': ') === 0) {
$line_is_mime_header = TRUE;
break;
}
}
if (!$line_is_mime_header) {
// Use soft-breaks only for purely quoted or unindented text.
$line = wordwrap($line, 77 - $values['length'], $values['soft'] ? " \n" : "\n");
}
// Break really long words at the maximum width allowed.
Alex Pott
committed
$line = wordwrap($line, 996 - $values['length'], $values['soft'] ? " \n" : "\n", TRUE);
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
}
/**
* Keeps track of URLs and replaces them with placeholder tokens.
*
* Callback for preg_replace_callback() within
* \Drupal\Core\Mail\MailFormatHelper::htmlToText().
*/
protected static function htmlToMailUrls($match = NULL, $reset = FALSE) {
// @todo Use request context instead.
global $base_url, $base_path;
if ($reset) {
// Reset internal URL list.
static::$urls = array();
}
else {
if (empty(static::$regexp)) {
static::$regexp = '@^' . preg_quote($base_path, '@') . '@';
}
if ($match) {
list(, , $url, $label) = $match;
// Ensure all URLs are absolute.
static::$urls[] = strpos($url, '://') ? $url : preg_replace(static::$regexp, $base_url . '/', $url);
return $label . ' [' . count(static::$urls) . ']';
}
}
return static::$urls;
}
/**
* Replaces non-quotation markers from a piece of indentation with spaces.
*
* Callback for array_map() within
* \Drupal\Core\Mail\MailFormatHelper::htmlToText().
*/
protected static function htmlToTextClean($indent) {
return preg_replace('/[^>]/', ' ', $indent);
}
/**
* Pads the last line with the given character.
*
* @param string $text
* The text to pad.
* @param string $pad
* The character to pad the end of the string with.
* @param string $prefix
* (optional) Prefix to add to the string.
*
* @return string
* The padded string.
*
* @see \Drupal\Core\Mail\MailFormatHelper::htmlToText()
*/
protected static function htmlToTextPad($text, $pad, $prefix = '') {
// Remove last line break.
$text = substr($text, 0, -1);
// Calculate needed padding space and add it.
if (($p = strrpos($text, "\n")) === FALSE) {
$p = -1;
}
$n = max(0, 79 - (strlen($text) - $p) - strlen($prefix));
// Add prefix and padding, and restore linebreak.
return $text . $prefix . str_repeat($pad, $n) . "\n";
}