1: <?php
2:
3: /**
4: * ProcessWire Sanitizer
5: *
6: * Sanitizer provides shared sanitization functions as commonly used throughout ProcessWire core and modules
7: *
8: * Modules may also add methods to the Sanitizer as needed i.e. $this->sanitizer->addHook('myMethod', $myClass, 'myMethod');
9: * See the Wire class definition for more details about the addHook method.
10: *
11: * ProcessWire 2.x
12: * Copyright (C) 2010 by Ryan Cramer
13: * Licensed under GNU/GPL v2, see LICENSE.TXT
14: *
15: * http://www.processwire.com
16: * http://www.ryancramer.com
17: *
18: */
19:
20: class Sanitizer extends Wire {
21:
22: /**
23: * Caches the status of multibyte support.
24: *
25: */
26: protected $multibyteSupport = false;
27:
28: /**
29: * Construct the sanitizer
30: *
31: */
32: public function __construct() {
33: $this->multibyteSupport = function_exists("mb_strlen");
34: }
35:
36: /**
37: * Internal filter used by other name filtering methods in this class
38: *
39: * @param string $value Value to filter
40: * @param array $allowedExtras Additional characters that are allowed in the value
41: * @param string 1 character replacement value for invalid characters
42: *
43: */
44: protected function nameFilter($value, array $allowedExtras, $replacementChar) {
45:
46: if(!is_string($value)) $value = (string) $value;
47: if(strlen($value) > 128) $value = substr($value, 0, 128);
48: if(ctype_alnum($value)) return $value; // quick exit if possible
49:
50: if(!ctype_alnum(str_replace($allowedExtras, '', $value))) {
51: $value = str_replace(array("'", '"'), '', $value); // blank out any quotes
52: $value = filter_var($value, FILTER_SANITIZE_STRING, FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_NO_ENCODE_QUOTES);
53: $chars = '';
54: foreach($allowedExtras as $char) $chars .= $char;
55: $chars .= 'a-zA-Z0-9';
56: $value = preg_replace('/[^' . $chars . ']/', $replacementChar, $value);
57: }
58:
59: return $value;
60: }
61:
62: /**
63: * Standard alphanumeric and dash, underscore, dot name
64: *
65: */
66: public function name($value) {
67: return $this->nameFilter($value, array('-', '_', '.'), '_');
68: }
69:
70: /**
71: * Standard alphanumeric and dash, underscore, dot name plus multiple names may be separated by a delimeter
72: *
73: * @param string $value Value to filter
74: * @param string $delimeter Character that delimits values (optional)
75: * @param array $allowedExtras Additional characters that are allowed in the value (optional)
76: * @param string 1 character replacement value for invalid characters (optional)
77: *
78: */
79: public function names($value, $delimeter = ' ', $allowedExtras = array('-', '_', '.'), $replacementChar = '_') {
80: $replace = array(',', '|', ' ');
81: if($delimeter != ' ' && !in_array($delimeter, $replace)) $replace[] = $delimeter;
82: $value = str_replace($replace, ' ', $value);
83: $allowedExtras[] = ' ';
84: $value = $this->nameFilter($value, $allowedExtras, $replacementChar);
85: if($delimeter != ' ') $value = str_replace(' ', $delimeter, $value);
86: return $value;
87: }
88:
89:
90: /**
91: * Standard alphanumeric and underscore, per class or variable names in PHP
92: *
93: */
94: public function varName($value) {
95: return $this->nameFilter($value, array('_'), '_');
96: }
97:
98: /**
99: * Name filter as used by ProcessWire Fields
100: *
101: * Note that dash and dot are excluded because they aren't allowed characters in PHP variables
102: *
103: */
104: public function fieldName($value) {
105: return $this->nameFilter($value, array('_'), '_');
106: }
107:
108: /**
109: * Name filter for ProcessWire Page names
110: *
111: * Because page names are often generated from a UTF-8 title, UTF8 to ASCII conversion will take place when $beautify is on
112: *
113: * @param string $value
114: * @param bool $beautify Should be true when creating a Page's name for the first time. Default is false.
115: * @return string
116: *
117: */
118: public function pageName($value, $beautify = false) {
119:
120: if($beautify) {
121: $v = iconv("UTF-8", "ASCII//TRANSLIT//IGNORE", $value);
122: if($v) $value = $v;
123: }
124:
125: $value = strtolower($this->nameFilter($value, array('-', '_', '.'), '-'));
126:
127: if($beautify) {
128: $value = trim($value, '-');
129:
130: // replace any of '-_.' next to each other with a single dash
131: $value = preg_replace('/[-_.]{2,}/', '-', $value);
132:
133: // replace double dashes
134: if(strpos($value, '--') !== false) {
135: $value = preg_replace('/--+/', '-', $value);
136: }
137:
138: }
139: return $value;
140: }
141:
142: /**
143: * Format required by ProcessWire user names
144: *
145: */
146: public function username($value) {
147: $value = trim($value);
148: if(strlen($value) > 128) $value = substr($value, 0, 128);
149: if(ctype_alnum(str_replace(array('-', '_', '.', '@'), '', $value))) return $value;
150: return preg_replace('/[^-_.@a-zA-Z0-9]/', '_', trim($value));
151: }
152:
153: /**
154: * Returns valid email address, or blank if it isn't valid
155: *
156: */
157: public function email($value) {
158: $value = filter_var($value, FILTER_SANITIZE_EMAIL);
159: if(filter_var($value, FILTER_VALIDATE_EMAIL)) return $value;
160: return '';
161: }
162:
163: /**
164: * Sanitize input text and remove tags
165: *
166: * @param string $value
167: * @param array $options See the $defaultOptions array in the method for options
168: * @return string
169: *
170: */
171: public function text($value, $options = array()) {
172:
173: $defaultOptions = array(
174: 'multiLine' => false,
175: 'maxLength' => 255,
176: 'maxBytes' => 1024,
177: 'stripTags' => true,
178: 'allowableTags' => '',
179: 'inCharset' => 'UTF-8',
180: 'outCharset' => 'UTF-8',
181: );
182:
183: $options = array_merge($defaultOptions, $options);
184:
185: if(!$options['multiLine']) $value = str_replace(array("\r", "\n"), " ", $value);
186:
187: if($options['stripTags']) $value = strip_tags($value, $options['allowableTags']);
188:
189: if($options['inCharset'] != $options['outCharset']) $value = iconv($options['inCharset'], $options['outCharset'], $value);
190:
191: if($this->multibyteSupport) {
192: if(mb_strlen($value, $options['outCharset']) > $options['maxLength']) $value = mb_substr($value, 0, $options['maxLength'], $options['outCharset']);
193: } else {
194: if(strlen($value) > $options['maxLength']) $value = substr($value, 0, $options['maxLength']);
195: }
196:
197: $n = $options['maxBytes'];
198: while(strlen($value) > $options['maxBytes']) {
199: $n--;
200: if($this->multibyteSupport) $value = mb_substr($value, 0, $n, $options['outCharset']);
201: else $value = substr($value, 0, $n);
202:
203: }
204:
205: return trim($value);
206: }
207:
208: /**
209: * Sanitize input multiline text and remove tags
210: *
211: * @param string $value
212: * @param array $options See Sanitizer::text and $defaultOptions array for an explanation of options
213: * @return string
214: *
215: */
216: public function textarea($value, $options = array()) {
217:
218: if(!isset($options['multiLine'])) $options['multiLine'] = true;
219: if(!isset($options['maxLength'])) $options['maxLength'] = 16384;
220: if(!isset($options['maxBytes'])) $options['maxBytes'] = $options['maxLength'] * 3;
221:
222: return $this->text($value, $options);
223: }
224:
225: /**
226: * Return the given path if valid, or blank if not.
227: *
228: * Path is validated per ProcessWire "name" convention of ascii only [-_./a-z0-9]
229: * As a result, this function is primarily useful for validating ProcessWire paths,
230: * and won't always work with paths outside ProcessWire.
231: *
232: * @param string $value Path
233: *
234: */
235: public function path($value) {
236: if(!preg_match('{^[-_./a-z0-9]+$}iD', $value)) return '';
237: if(strpos($value, '/./') !== false || strpos($value, '//') !== false) $value = '';
238: return $value;
239: }
240:
241: /**
242: * Returns a valid URL, or blank if it can't be made valid
243: *
244: * Performs some basic sanitization like adding a protocol to the front if it's missing, but leaves alone local/relative URLs.
245: *
246: * URL is not required to confirm to ProcessWire conventions unless a relative path is given.
247: *
248: * Please note that URLs should always be entity encoded in your output. <script> is technically allowed in a valid URL, so
249: * your output should always entity encoded any URLs that came from user input.
250: *
251: * @param string $value URL
252: * @param bool $allowRelative Whether to allow relative URLs
253: * @return string
254: * @todo add TLD validation
255: *
256: */
257: public function url($value, $allowRelative = true) {
258:
259: if(!strlen($value)) return '';
260:
261: // this filter_var sanitizer just removes invalid characters that don't appear in domains or paths
262: $value = filter_var($value, FILTER_SANITIZE_URL);
263:
264: if(!strpos($value, ".") && $allowRelative) {
265: // if there's no dot (or it's in position 0) and relative paths are allowed,
266: // we can safely assume this is a relative path.
267: // relative paths must follow ProcessWire convention of ascii-only,
268: // so they are passed through the $sanitizer->path() function.
269: return $this->path($value);
270: }
271:
272: if(!strpos($value, '://')) {
273: // URL is missing protocol, or is local/relative
274:
275: if($allowRelative) {
276: // determine if this is a domain name
277: // regex legend: (www.)? company. com ( .uk or / or end)
278: if(preg_match('{^([^\s_.]+\.)?[^-_\s.][^\s_.]+\.([a-z]{2,6})([./:#]|$)}i', $value, $matches)) {
279: // most likely a domain name
280: // $tld = $matches[3]; // TODO add TLD validation to confirm it's a domain name
281: $value = filter_var("http://$value", FILTER_VALIDATE_URL);
282:
283: } else {
284: // most likely a relative path
285: $value = $this->path($value);
286: }
287:
288: } else {
289: // relative urls aren't allowed, so add the protocol and validate
290: $value = filter_var("http://$value", FILTER_VALIDATE_URL);
291: }
292: }
293:
294: return $value ? $value : '';
295: }
296:
297: /**
298: * Field name filter as used by ProcessWire Fields
299: *
300: * Note that dash and dot are excluded because they aren't allowed characters in PHP variables
301: *
302: */
303: public function selectorField($value) {
304: return $this->nameFilter($value, array('_'), '_');
305: }
306:
307:
308: /**
309: * Sanitizes a string value that needs to go in a ProcessWire selector
310: *
311: * String value is assumed to be UTF-8. Replaces non-alphanumeric and non-space with space
312: *
313: *
314: */
315: public function selectorValue($value) {
316:
317: $value = trim($value);
318:
319: // determine if value is already quoted and set initial value of needsQuotes
320: // also pick out the initial quote style
321: if(strlen($value) && ($value[0] == "'" || $value[0] == '"')) {
322: $quoteChar = $value[0];
323: $value = trim($value, "\"'");
324: $needsQuotes = true;
325: } else {
326: $quoteChar = '"';
327: $needsQuotes = false;
328: }
329:
330: // selector value is limited to 100 chars
331: if(strlen($value) > 100) {
332: if($this->multibyteSupport) $value = mb_substr($value, 0, 100, 'UTF-8');
333: else $value = substr($value, 0, 100);
334: }
335:
336: // if commas are present, then the selector needs to be quoted
337: if(strpos($value, ',') !== false) $needsQuotes = true;
338:
339: // if an apostrophe is present, then make sure the value isn't already quoted with the same character
340: // if it is, then switch to double quotes
341: if(strpos($value, "'") !== false) {
342: $needsQuotes = true;
343: if($quoteChar == "'") $quoteChar = '"';
344: }
345:
346: // disallow double quotes -- remove any if they are present
347: if(strpos($value, '"') !== false) $value = str_replace('"', '', $value);
348:
349: // see if we can avoid the preg_matches and do a quick filter
350: $test = str_replace(array(',', ' ', '-'), '', $value);
351:
352: if(!ctype_alnum($test)) {
353:
354: // value needs more filtering, replace all non-alphanumeric, non-single-quote and space chars
355: $value = preg_replace('/[^[:alnum:]\pL \'\/]/u', ' ', $value);
356:
357: // replace multiple space characters in sequence
358: $value = preg_replace('/\s\s+/u', ' ', $value);
359:
360: //$value = iconv("UTF-8", "ISO-8859-1", $value);
361: }
362:
363: $value = trim($value);
364: if($needsQuotes) $value = $quoteChar . $value . $quoteChar;
365: return $value;
366:
367: }
368:
369: public function __toString() {
370: return "Sanitizer";
371: }
372:
373: }
374:
375: