*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace HtmlSanitizer;
use HtmlSanitizer\Parser\MastermindsParser;
use HtmlSanitizer\Parser\ParserInterface;
use Psr\Log\LoggerInterface;
/**
* @author Titouan Galopin
*
* @final
*/
class Sanitizer implements SanitizerInterface
{
/**
* @var DomVisitorInterface
*/
private $domVisitor;
/**
* @var int
*/
private $maxInputLength;
/**
* @var ParserInterface
*/
private $parser;
/**
* @var LoggerInterface|null
*/
private $logger;
public function __construct(DomVisitorInterface $domVisitor, int $maxInputLength, ParserInterface $parser = null, LoggerInterface $logger = null)
{
$this->domVisitor = $domVisitor;
$this->maxInputLength = $maxInputLength;
$this->parser = $parser ?: new MastermindsParser();
$this->logger = $logger;
}
/**
* Quickly create an already configured sanitizer using the default builder.
*
* @param array $config
*
* @return SanitizerInterface
*/
public static function create(array $config): SanitizerInterface
{
return SanitizerBuilder::createDefault()->build($config);
}
public function sanitize(string $html): string
{
$sanitized = $this->doSanitize($html);
if ($this->logger) {
$this->logger->debug('Sanitized given input to "{output}".', [
'output' => mb_substr($sanitized, 0, 50).(mb_strlen($sanitized) > 50 ? '...' : ''),
]);
}
return $sanitized;
}
private function doSanitize(string $html): string
{
// Prevent DOS attack induced by extremely long HTML strings
if (mb_strlen($html) > $this->maxInputLength) {
$html = mb_substr($html, 0, $this->maxInputLength);
}
/*
* Only operate on valid UTF-8 strings. This is necessary to prevent cross
* site scripting issues on Internet Explorer 6. Idea from Drupal (filter_xss).
*/
if (!$this->isValidUtf8($html)) {
return '';
}
// Remove NULL character
$html = str_replace(\chr(0), '', $html);
try {
$parsed = $this->parser->parse($html);
} catch (\Exception $exception) {
return '';
}
return $this->domVisitor->visit($parsed)->render();
}
/**
* @param string $html
*
* @return bool
*/
private function isValidUtf8(string $html): bool
{
// preg_match() fails silently on strings containing invalid UTF-8.
return '' === $html || 1 === preg_match('/^./us', $html);
}
}