Source for file Input.php
Documentation is available at Input.php
<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
* An open source application development framework for PHP 4.3.2 or newer
* @copyright Copyright (c) 2006, pMachine, Inc.
* @license http://www.codeignitor.com/user_guide/license.html
* @link http://www.codeigniter.com
// ------------------------------------------------------------------------
* Pre-processes global input data for security
* @link http://www.codeigniter.com/user_guide/libraries/input.html
* Sets whether to globally enable the XSS processing
* and whether to allow the $_GET array
$this->use_xss_clean =
($CFG->item('global_xss_filtering') ===
TRUE) ?
TRUE :
FALSE;
$this->allow_get_array =
($CFG->item('enable_query_strings') ===
TRUE) ?
TRUE :
FALSE;
$this->_sanitize_globals();
// --------------------------------------------------------------------
* This function does the folowing:
* Unsets $_GET data (if query strings are not enabled)
* Unsets all globals if register_globals is enabled
* Standardizes newline characters to \n
function _sanitize_globals()
// Unset globals. This is effectively the same as register_globals = off
foreach (array($_GET, $_POST, $_COOKIE) as $global)
foreach ($global as $key =>
$val)
// Is $_GET data allowed?
foreach($_POST as $key =>
$val)
foreach($val as $k =>
$v)
$_POST[$this->_clean_input_keys($key)][$this->_clean_input_keys($k)] =
$this->_clean_input_data($v);
$_POST[$this->_clean_input_keys($key)] =
$this->_clean_input_data($val);
foreach($_COOKIE as $key =>
$val)
$_COOKIE[$this->_clean_input_keys($key)] =
$this->_clean_input_data($val);
log_message('debug', "Global POST and COOKIE data sanitized");
// END _sanitize_globals()
// --------------------------------------------------------------------
* This is a helper function. It escapes data and
* standardizes newline characters to \n
function _clean_input_data($str)
foreach ($str as $key =>
$val)
$new_array[$key] =
$this->_clean_input_data($val);
// END _clean_input_data()
// --------------------------------------------------------------------
* This is a helper function. To prevent malicious users
* from trying to exploit keys we make sure that keys are
* only named with alpha-numeric text and a few other items.
function _clean_input_keys($str)
exit('Disallowed Key Characters: '.
$str);
// END _clean_input_keys()
// --------------------------------------------------------------------
* Fetch an item from the POST array
function post($index =
'', $xss_clean =
FALSE)
if ( ! isset
($_POST[$index]))
// --------------------------------------------------------------------
* Fetch an item from the COOKIE array
function cookie($index =
'', $xss_clean =
FALSE)
if ( ! isset
($_COOKIE[$index]))
// --------------------------------------------------------------------
$cip =
(isset
($_SERVER['HTTP_CLIENT_IP']) AND $_SERVER['HTTP_CLIENT_IP'] !=
"") ?
$_SERVER['HTTP_CLIENT_IP'] :
FALSE;
$rip =
(isset
($_SERVER['REMOTE_ADDR']) AND $_SERVER['REMOTE_ADDR'] !=
"") ?
$_SERVER['REMOTE_ADDR'] :
FALSE;
$fip =
(isset
($_SERVER['HTTP_X_FORWARDED_FOR']) AND $_SERVER['HTTP_X_FORWARDED_FOR'] !=
"") ?
$_SERVER['HTTP_X_FORWARDED_FOR'] :
FALSE;
// --------------------------------------------------------------------
return ( ! preg_match( "/^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$/", $ip)) ?
FALSE :
TRUE;
// --------------------------------------------------------------------
$this->user_agent =
( ! isset
($_SERVER['HTTP_USER_AGENT'])) ?
FALSE :
$_SERVER['HTTP_USER_AGENT'];
// --------------------------------------------------------------------
* Sanitizes data so that Cross Site Scripting Hacks can be
* prevented.Ê This function does a fair amount of work but
* it is extremely thorough, designed to prevent even the
* most obscure XSS attempts.Ê Nothing is ever 100% foolproof,
* of course, but I haven't been able to get anything passed
* Note: This function should only be used to deal with data
* upon submission.Ê It's not something that should
* be used for general runtime processing.
* This function was based in part on some code and ideas I
* got from Bitflux: http://blog.bitflux.ch/wiki/XSS_Prevention
* To help develop this script I used this great list of
* vulnerabilities along with a few other hacks I've
* harvested from examining vulnerabilities in other programs:
* http://ha.ckers.org/xss.html
function xss_clean($str, $charset =
'ISO-8859-1')
* This prevents sandwiching null characters
* between ascii characters, like Java\0script.
* Validate standard character entites
* Add a semicolon if missing. We do this to enable
* the conversion of entities to ASCII later.
$str =
preg_replace('#(&\#*\w+)[\x00-\x20]+;#u',"\\1;",$str);
* Validate UTF16 two byte encodeing (x00)
* Just as above, adds a semicolon if missing.
$str =
preg_replace('#(&\#x*)([0-9A-F]+);*#iu',"\\1\\2;",$str);
* Just in case stuff like this is submitted:
* <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
* Note: Normally urldecode() would be easier but it removes plus signs
$str =
preg_replace("/%u0([a-z0-9]{3})/i", "&#x\\1;", $str);
* Convert character entities to ASCII
* This permits our tests below to work reliably.
* We only convert entities that are within tags since
* these are the ones that will pose security problems.
for ($i =
0; $i <
count($matches['0']); $i++
)
$this->_html_entity_decode($matches['1'][$i], $charset),
* Convert all tabs to spaces
* This prevents strings like this: ja vascript
* Note: we deal with spaces between characters later.
* Note: XML tags are inadvertently replaced too:
* But it doesn't seem to pose a problem.
$str =
str_replace(array('<?php', '<?PHP', '<?', '?>'), array('<?php', '<?PHP', '<?', '?>'), $str);
* Compact any exploded words
* This corrects words like: j a v a s c r i p t
* These words are compacted back to their correct state.
$words =
array('javascript', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
foreach ($words as $word)
for ($i =
0; $i <
strlen($word); $i++
)
$temp .=
substr($word, $i, 1).
"\s*";
* Remove disallowed Javascript in links or img tags
$str =
preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $str);
$str =
preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si", "", $str);
* Remove JavaScript Event Handlers
* Note: This code is a little blunt. It removes
* the event handler and anything up to the closing >,
* but it's unlkely to be a problem.
$str =
preg_replace('#(<[^>]+.*?)(onblur|onchange|onclick|onfocus|onload|onmouseover|onmouseup|onmousedown|onselect|onsubmit|onunload|onkeypress|onkeydown|onkeyup|onresize)[^>]*>#iU',"\\1>",$str);
* Sanitize naughty HTML elements
* If a tag containing any of the words in the list
* below is found, the tag gets converted to entities.
$str =
preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "<\\1\\2\\3>", $str);
* Sanitize naughty scripting elements
* Similar to above, only instead of looking for
* tags it looks for PHP and JavaScript commands
* that are disallowed. Rather than removing the
* code, it simply converts the parenthesis to entities
* rendering the code unexecutable.
* For example: eval('some code')
* Becomes: eval('some code')
$str =
preg_replace('#(alert|cmd|passthru|eval|exec|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2(\\3)", $str);
* This adds a bit of extra precaution in case
* something got through the above filters
foreach ($bad as $key =>
$val)
* This function is a replacement for html_entity_decode()
* In some versions of PHP the native function does not work
* when UTF-8 is the specified character set, so this gives us
* a work-around. More info here:
* http://bugs.php.net/bug.php?id=25670
/* -------------------------------------------------
/* Replacement for html_entity_decode()
/* -------------------------------------------------*/
NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
character set, and the PHP developers said they were not back porting the
fix to versions other than PHP 5.x.
function _html_entity_decode($str, $charset=
'ISO-8859-1')
if (stristr($str, '&') ===
FALSE) return $str;
// The reason we are not using html_entity_decode() by itself is because
// while it is not technically correct to leave out the semicolon
// at the end of an entity most browsers will still interpret the entity
// correctly. html_entity_decode() does not convert entities without
// semicolons, so we are left with our own little solution here. Bummer.