XmlScanner.php
4.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
<?php
namespace PhpOffice\PhpSpreadsheet\Reader\Security;
use PhpOffice\PhpSpreadsheet\Reader;
use PhpOffice\PhpSpreadsheet\Settings;
class XmlScanner
{
/**
* String used to identify risky xml elements.
*
* @var string
*/
private $pattern;
private $callback;
private static $libxmlDisableEntityLoaderValue;
/**
* @var bool
*/
private static $shutdownRegistered = false;
public function __construct($pattern = '<!DOCTYPE')
{
$this->pattern = $pattern;
$this->disableEntityLoaderCheck();
// A fatal error will bypass the destructor, so we register a shutdown here
if (!self::$shutdownRegistered) {
self::$shutdownRegistered = true;
register_shutdown_function([__CLASS__, 'shutdown']);
}
}
public static function getInstance(Reader\IReader $reader)
{
switch (true) {
case $reader instanceof Reader\Html:
return new self('<!ENTITY');
case $reader instanceof Reader\Xlsx:
case $reader instanceof Reader\Xml:
case $reader instanceof Reader\Ods:
case $reader instanceof Reader\Gnumeric:
return new self('<!DOCTYPE');
default:
return new self('<!DOCTYPE');
}
}
public static function threadSafeLibxmlDisableEntityLoaderAvailability()
{
if (PHP_MAJOR_VERSION == 7) {
switch (PHP_MINOR_VERSION) {
case 2:
return PHP_RELEASE_VERSION >= 1;
case 1:
return PHP_RELEASE_VERSION >= 13;
case 0:
return PHP_RELEASE_VERSION >= 27;
}
return true;
}
return false;
}
private function disableEntityLoaderCheck(): void
{
if (Settings::getLibXmlDisableEntityLoader() && \PHP_VERSION_ID < 80000) {
$libxmlDisableEntityLoaderValue = libxml_disable_entity_loader(true);
if (self::$libxmlDisableEntityLoaderValue === null) {
self::$libxmlDisableEntityLoaderValue = $libxmlDisableEntityLoaderValue;
}
}
}
public static function shutdown(): void
{
if (self::$libxmlDisableEntityLoaderValue !== null && \PHP_VERSION_ID < 80000) {
libxml_disable_entity_loader(self::$libxmlDisableEntityLoaderValue);
self::$libxmlDisableEntityLoaderValue = null;
}
}
public function __destruct()
{
self::shutdown();
}
public function setAdditionalCallback(callable $callback): void
{
$this->callback = $callback;
}
private function toUtf8($xml)
{
$pattern = '/encoding="(.*?)"/';
$result = preg_match($pattern, $xml, $matches);
$charset = strtoupper($result ? $matches[1] : 'UTF-8');
if ($charset !== 'UTF-8') {
$xml = mb_convert_encoding($xml, 'UTF-8', $charset);
$result = preg_match($pattern, $xml, $matches);
$charset = strtoupper($result ? $matches[1] : 'UTF-8');
if ($charset !== 'UTF-8') {
throw new Reader\Exception('Suspicious Double-encoded XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
}
return $xml;
}
/**
* Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param mixed $xml
*
* @return string
*/
public function scan($xml)
{
$this->disableEntityLoaderCheck();
$xml = $this->toUtf8($xml);
// Don't rely purely on libxml_disable_entity_loader()
$pattern = '/\\0?' . implode('\\0?', str_split($this->pattern)) . '\\0?/';
if (preg_match($pattern, $xml)) {
throw new Reader\Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
if ($this->callback !== null && is_callable($this->callback)) {
$xml = call_user_func($this->callback, $xml);
}
return $xml;
}
/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $filestream
*
* @return string
*/
public function scanFile($filestream)
{
return $this->scan(file_get_contents($filestream));
}
}