]>
Commit | Line | Data |
---|---|---|
010efc9b AD |
1 | <?php |
2 | ||
3 | /** | |
4 | * Parses a URI into the components and fragment identifier as specified | |
5 | * by RFC 3986. | |
6 | */ | |
7 | class HTMLPurifier_URIParser | |
8 | { | |
9 | ||
10 | /** | |
11 | * Instance of HTMLPurifier_PercentEncoder to do normalization with. | |
12 | */ | |
13 | protected $percentEncoder; | |
14 | ||
15 | public function __construct() { | |
16 | $this->percentEncoder = new HTMLPurifier_PercentEncoder(); | |
17 | } | |
18 | ||
19 | /** | |
20 | * Parses a URI. | |
21 | * @param $uri string URI to parse | |
22 | * @return HTMLPurifier_URI representation of URI. This representation has | |
23 | * not been validated yet and may not conform to RFC. | |
24 | */ | |
25 | public function parse($uri) { | |
26 | ||
27 | $uri = $this->percentEncoder->normalize($uri); | |
28 | ||
29 | // Regexp is as per Appendix B. | |
30 | // Note that ["<>] are an addition to the RFC's recommended | |
31 | // characters, because they represent external delimeters. | |
32 | $r_URI = '!'. | |
33 | '(([^:/?#"<>]+):)?'. // 2. Scheme | |
34 | '(//([^/?#"<>]*))?'. // 4. Authority | |
35 | '([^?#"<>]*)'. // 5. Path | |
36 | '(\?([^#"<>]*))?'. // 7. Query | |
37 | '(#([^"<>]*))?'. // 8. Fragment | |
38 | '!'; | |
39 | ||
40 | $matches = array(); | |
41 | $result = preg_match($r_URI, $uri, $matches); | |
42 | ||
43 | if (!$result) return false; // *really* invalid URI | |
44 | ||
45 | // seperate out parts | |
46 | $scheme = !empty($matches[1]) ? $matches[2] : null; | |
47 | $authority = !empty($matches[3]) ? $matches[4] : null; | |
48 | $path = $matches[5]; // always present, can be empty | |
49 | $query = !empty($matches[6]) ? $matches[7] : null; | |
50 | $fragment = !empty($matches[8]) ? $matches[9] : null; | |
51 | ||
52 | // further parse authority | |
53 | if ($authority !== null) { | |
54 | $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; | |
55 | $matches = array(); | |
56 | preg_match($r_authority, $authority, $matches); | |
57 | $userinfo = !empty($matches[1]) ? $matches[2] : null; | |
58 | $host = !empty($matches[3]) ? $matches[3] : ''; | |
59 | $port = !empty($matches[4]) ? (int) $matches[5] : null; | |
60 | } else { | |
61 | $port = $host = $userinfo = null; | |
62 | } | |
63 | ||
64 | return new HTMLPurifier_URI( | |
65 | $scheme, $userinfo, $host, $port, $path, $query, $fragment); | |
66 | } | |
67 | ||
68 | } | |
69 | ||
70 | // vim: et sw=4 sts=4 |