80, 'https' => 443, 'ftp' => 21, 'gopher' => 70, 'nntp' => 119, 'news' => 119, 'telnet' => 23, 'tn3270' => 23, 'imap' => 143, 'pop' => 110, 'ldap' => 389, ]; /** * Unreserved characters for use in a regex. * * @see https://datatracker.ietf.org/doc/html/rfc3986#section-2.3 */ private const CHAR_UNRESERVED = 'a-zA-Z0-9_\-\.~'; /** * Sub-delims for use in a regex. * * @see https://datatracker.ietf.org/doc/html/rfc3986#section-2.2 */ private const CHAR_SUB_DELIMS = '!\$&\'\(\)\*\+,;='; private const QUERY_SEPARATORS_REPLACEMENT = ['=' => '%3D', '&' => '%26']; /** @var string Uri scheme. */ private $scheme = ''; /** @var string Uri user info. */ private $userInfo = ''; /** @var string Uri host. */ private $host = ''; /** @var int|null Uri port. */ private $port; /** @var string Uri path. */ private $path = ''; /** @var string Uri query string. */ private $query = ''; /** @var string Uri fragment. */ private $fragment = ''; /** @var string|null String representation */ private $composedComponents; public function __construct(string $uri = '') { if ($uri !== '') { $parts = self::parse($uri); if ($parts === false) { throw new MalformedUriException("Unable to parse URI: $uri"); } $this->applyParts($parts); } } /** * UTF-8 aware \parse_url() replacement. * * The internal function produces broken output for non ASCII domain names * (IDN) when used with locales other than "C". * * On the other hand, cURL understands IDN correctly only when UTF-8 locale * is configured ("C.UTF-8", "en_US.UTF-8", etc.). * * @see https://bugs.php.net/bug.php?id=52923 * @see https://www.php.net/manual/en/function.parse-url.php#114817 * @see https://curl.haxx.se/libcurl/c/CURLOPT_URL.html#ENCODING * * @return array|false */ private static function parse(string $url) { // If IPv6 $prefix = ''; if (preg_match('%^(.*://\[[0-9:a-f]+\])(.*?)$%', $url, $matches)) { /** @var array{0:string, 1:string, 2:string} $matches */ $prefix = $matches[1]; $url = $matches[2]; } /** @var string */ $encodedUrl = preg_replace_callback( '%[^:/@?&=#]+%usD', static function ($matches) { return urlencode($matches[0]); }, $url ); $result = parse_url($prefix.$encodedUrl); if ($result === false) { return false; } return array_map('urldecode', $result); } public function __toString(): string { if ($this->composedComponents === null) { $this->composedComponents = self::composeComponents( $this->scheme, $this->getAuthority(), $this->path, $this->query, $this->fragment ); } return $this->composedComponents; } /** * Composes a URI reference string from its various components. * * Usually this method does not need to be called manually but instead is used indirectly via * `Psr\Http\Message\UriInterface::__toString`. * * PSR-7 UriInterface treats an empty component the same as a missing component as * getQuery(), getFragment() etc. always return a string. This explains the slight * difference to RFC 3986 Section 5.3. * * Another adjustment is that the authority separator is added even when the authority is missing/empty * for the "file" scheme. This is because PHP stream functions like `file_get_contents` only work with * `file:///myfile` but not with `file:/myfile` although they are equivalent according to RFC 3986. But * `file:///` is the more common syntax for the file scheme anyway (Chrome for example redirects to * that format). * * @see https://datatracker.ietf.org/doc/html/rfc3986#section-5.3 */ public static function composeComponents(?string $scheme, ?string $authority, string $path, ?string $query, ?string $fragment): string { $uri = ''; // weak type checks to also accept null until we can add scalar type hints if ($scheme != '') { $uri .= $scheme.':'; } if ($authority != '' || $scheme === 'file') { $uri .= '//'.$authority; } if ($authority != '' && $path != '' && $path[0] != '/') { $path = '/'.$path; } $uri .= $path; if ($query != '') { $uri .= '?'.$query; } if ($fragment != '') { $uri .= '#'.$fragment; } return $uri; } /** * Whether the URI has the default port of the current scheme. * * `Psr\Http\Message\UriInterface::getPort` may return null or the standard port. This method can be used * independently of the implementation. */ public static function isDefaultPort(UriInterface $uri): bool { return $uri->getPort() === null || (isset(self::DEFAULT_PORTS[$uri->getScheme()]) && $uri->getPort() === self::DEFAULT_PORTS[$uri->getScheme()]); } /** * Whether the URI is absolute, i.e. it has a scheme. * * An instance of UriInterface can either be an absolute URI or a relative reference. This method returns true * if it is the former. An absolute URI has a scheme. A relative reference is used to express a URI relative * to another URI, the base URI. Relative references can be divided into several forms: * - network-path references, e.g. '//example.com/path' * - absolute-path references, e.g. '/path' * - relative-path references, e.g. 'subpath' * * @see Uri::isNetworkPathReference * @see Uri::isAbsolutePathReference * @see Uri::isRelativePathReference * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4 */ public static function isAbsolute(UriInterface $uri): bool { return $uri->getScheme() !== ''; } /** * Whether the URI is a network-path reference. * * A relative reference that begins with two slash characters is termed an network-path reference. * * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 */ public static function isNetworkPathReference(UriInterface $uri): bool { return $uri->getScheme() === '' && $uri->getAuthority() !== ''; } /** * Whether the URI is a absolute-path reference. * * A relative reference that begins with a single slash character is termed an absolute-path reference. * * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 */ public static function isAbsolutePathReference(UriInterface $uri): bool { return $uri->getScheme() === '' && $uri->getAuthority() === '' && isset($uri->getPath()[0]) && $uri->getPath()[0] === '/'; } /** * Whether the URI is a relative-path reference. * * A relative reference that does not begin with a slash character is termed a relative-path reference. * * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 */ public static function isRelativePathReference(UriInterface $uri): bool { return $uri->getScheme() === '' && $uri->getAuthority() === '' && (!isset($uri->getPath()[0]) || $uri->getPath()[0] !== '/'); } /** * Whether the URI is a same-document reference. * * A same-document reference refers to a URI that is, aside from its fragment * component, identical to the base URI. When no base URI is given, only an empty * URI reference (apart from its fragment) is considered a same-document reference. * * @param UriInterface $uri The URI to check * @param UriInterface|null $base An optional base URI to compare against * * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4.4 */ public static function isSameDocumentReference(UriInterface $uri, ?UriInterface $base = null): bool { if ($base !== null) { $uri = UriResolver::resolve($base, $uri); return ($uri->getScheme() === $base->getScheme()) && ($uri->getAuthority() === $base->getAuthority()) && ($uri->getPath() === $base->getPath()) && ($uri->getQuery() === $base->getQuery()); } return $uri->getScheme() === '' && $uri->getAuthority() === '' && $uri->getPath() === '' && $uri->getQuery() === ''; } /** * Creates a new URI with a specific query string value removed. * * Any existing query string values that exactly match the provided key are * removed. * * @param UriInterface $uri URI to use as a base. * @param string $key Query string key to remove. */ public static function withoutQueryValue(UriInterface $uri, string $key): UriInterface { $result = self::getFilteredQueryString($uri, [$key]); return $uri->withQuery(implode('&', $result)); } /** * Creates a new URI with a specific query string value. * * Any existing query string values that exactly match the provided key are * removed and replaced with the given key value pair. * * A value of null will set the query string key without a value, e.g. "key" * instead of "key=value". * * @param UriInterface $uri URI to use as a base. * @param string $key Key to set. * @param string|null $value Value to set */ public static function withQueryValue(UriInterface $uri, string $key, ?string $value): UriInterface { $result = self::getFilteredQueryString($uri, [$key]); $result[] = self::generateQueryString($key, $value); return $uri->withQuery(implode('&', $result)); } /** * Creates a new URI with multiple specific query string values. * * It has the same behavior as withQueryValue() but for an associative array of key => value. * * @param UriInterface $uri URI to use as a base. * @param (string|null)[] $keyValueArray Associative array of key and values */ public static function withQueryValues(UriInterface $uri, array $keyValueArray): UriInterface { $result = self::getFilteredQueryString($uri, array_keys($keyValueArray)); foreach ($keyValueArray as $key => $value) { $result[] = self::generateQueryString((string) $key, $value !== null ? (string) $value : null); } return $uri->withQuery(implode('&', $result)); } /** * Creates a URI from a hash of `parse_url` components. * * @see https://www.php.net/manual/en/function.parse-url.php * * @throws MalformedUriException If the components do not form a valid URI. */ public static function fromParts(array $parts): UriInterface { $uri = new self(); $uri->applyParts($parts); $uri->validateState(); return $uri; } public function getScheme(): string { return $this->scheme; } public function getAuthority(): string { $authority = $this->host; if ($this->userInfo !== '') { $authority = $this->userInfo.'@'.$authority; } if ($this->port !== null) { $authority .= ':'.$this->port; } return $authority; } public function getUserInfo(): string { return $this->userInfo; } public function getHost(): string { return $this->host; } public function getPort(): ?int { return $this->port; } public function getPath(): string { return $this->path; } public function getQuery(): string { return $this->query; } public function getFragment(): string { return $this->fragment; } public function withScheme($scheme): UriInterface { $scheme = $this->filterScheme($scheme); if ($this->scheme === $scheme) { return $this; } $new = clone $this; $new->scheme = $scheme; $new->composedComponents = null; $new->removeDefaultPort(); $new->validateState(); return $new; } public function withUserInfo($user, $password = null): UriInterface { $info = $this->filterUserInfoComponent($user); if ($password !== null) { $info .= ':'.$this->filterUserInfoComponent($password); } if ($this->userInfo === $info) { return $this; } $new = clone $this; $new->userInfo = $info; $new->composedComponents = null; $new->validateState(); return $new; } public function withHost($host): UriInterface { $host = $this->filterHost($host); if ($this->host === $host) { return $this; } $new = clone $this; $new->host = $host; $new->composedComponents = null; $new->validateState(); return $new; } public function withPort($port): UriInterface { $port = $this->filterPort($port); if ($this->port === $port) { return $this; } $new = clone $this; $new->port = $port; $new->composedComponents = null; $new->removeDefaultPort(); $new->validateState(); return $new; } public function withPath($path): UriInterface { $path = $this->filterPath($path); if ($this->path === $path) { return $this; } $new = clone $this; $new->path = $path; $new->composedComponents = null; $new->validateState(); return $new; } public function withQuery($query): UriInterface { $query = $this->filterQueryAndFragment($query); if ($this->query === $query) { return $this; } $new = clone $this; $new->query = $query; $new->composedComponents = null; return $new; } public function withFragment($fragment): UriInterface { $fragment = $this->filterQueryAndFragment($fragment); if ($this->fragment === $fragment) { return $this; } $new = clone $this; $new->fragment = $fragment; $new->composedComponents = null; return $new; } public function jsonSerialize(): string { return $this->__toString(); } /** * Apply parse_url parts to a URI. * * @param array $parts Array of parse_url parts to apply. */ private function applyParts(array $parts): void { $this->scheme = isset($parts['scheme']) ? $this->filterScheme($parts['scheme']) : ''; $this->userInfo = isset($parts['user']) ? $this->filterUserInfoComponent($parts['user']) : ''; $this->host = isset($parts['host']) ? $this->filterHost($parts['host']) : ''; $this->port = isset($parts['port']) ? $this->filterPort($parts['port']) : null; $this->path = isset($parts['path']) ? $this->filterPath($parts['path']) : ''; $this->query = isset($parts['query']) ? $this->filterQueryAndFragment($parts['query']) : ''; $this->fragment = isset($parts['fragment']) ? $this->filterQueryAndFragment($parts['fragment']) : ''; if (isset($parts['pass'])) { $this->userInfo .= ':'.$this->filterUserInfoComponent($parts['pass']); } $this->removeDefaultPort(); } /** * @param mixed $scheme * * @throws \InvalidArgumentException If the scheme is invalid. */ private function filterScheme($scheme): string { if (!is_string($scheme)) { throw new \InvalidArgumentException('Scheme must be a string'); } return \strtr($scheme, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'); } /** * @param mixed $component * * @throws \InvalidArgumentException If the user info is invalid. */ private function filterUserInfoComponent($component): string { if (!is_string($component)) { throw new \InvalidArgumentException('User info must be a string'); } return preg_replace_callback( '/(?:[^%'.self::CHAR_UNRESERVED.self::CHAR_SUB_DELIMS.']+|%(?![A-Fa-f0-9]{2}))/', [$this, 'rawurlencodeMatchZero'], $component ); } /** * @param mixed $host * * @throws \InvalidArgumentException If the host is invalid. */ private function filterHost($host): string { if (!is_string($host)) { throw new \InvalidArgumentException('Host must be a string'); } return \strtr($host, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'); } /** * @param mixed $port * * @throws \InvalidArgumentException If the port is invalid. */ private function filterPort($port): ?int { if ($port === null) { return null; } $port = (int) $port; if (0 > $port || 0xFFFF < $port) { throw new \InvalidArgumentException( sprintf('Invalid port: %d. Must be between 0 and 65535', $port) ); } return $port; } /** * @param (string|int)[] $keys * * @return string[] */ private static function getFilteredQueryString(UriInterface $uri, array $keys): array { $current = $uri->getQuery(); if ($current === '') { return []; } $decodedKeys = array_map(function ($k): string { return rawurldecode((string) $k); }, $keys); return array_filter(explode('&', $current), function ($part) use ($decodedKeys) { return !in_array(rawurldecode(explode('=', $part)[0]), $decodedKeys, true); }); } private static function generateQueryString(string $key, ?string $value): string { // Query string separators ("=", "&") within the key or value need to be encoded // (while preventing double-encoding) before setting the query string. All other // chars that need percent-encoding will be encoded by withQuery(). $queryString = strtr($key, self::QUERY_SEPARATORS_REPLACEMENT); if ($value !== null) { $queryString .= '='.strtr($value, self::QUERY_SEPARATORS_REPLACEMENT); } return $queryString; } private function removeDefaultPort(): void { if ($this->port !== null && self::isDefaultPort($this)) { $this->port = null; } } /** * Filters the path of a URI * * @param mixed $path * * @throws \InvalidArgumentException If the path is invalid. */ private function filterPath($path): string { if (!is_string($path)) { throw new \InvalidArgumentException('Path must be a string'); } return preg_replace_callback( '/(?:[^'.self::CHAR_UNRESERVED.self::CHAR_SUB_DELIMS.'%:@\/]++|%(?![A-Fa-f0-9]{2}))/', [$this, 'rawurlencodeMatchZero'], $path ); } /** * Filters the query string or fragment of a URI. * * @param mixed $str * * @throws \InvalidArgumentException If the query or fragment is invalid. */ private function filterQueryAndFragment($str): string { if (!is_string($str)) { throw new \InvalidArgumentException('Query and fragment must be a string'); } return preg_replace_callback( '/(?:[^'.self::CHAR_UNRESERVED.self::CHAR_SUB_DELIMS.'%:@\/\?]++|%(?![A-Fa-f0-9]{2}))/', [$this, 'rawurlencodeMatchZero'], $str ); } private function rawurlencodeMatchZero(array $match): string { return rawurlencode($match[0]); } private function validateState(): void { if ($this->host === '' && ($this->scheme === 'http' || $this->scheme === 'https')) { $this->host = self::HTTP_DEFAULT_HOST; } if ($this->getAuthority() === '') { if (0 === strpos($this->path, '//')) { throw new MalformedUriException('The path of a URI without an authority must not start with two slashes "//"'); } if ($this->scheme === '' && false !== strpos(explode('/', $this->path, 2)[0], ':')) { throw new MalformedUriException('A relative URI must not have a path beginning with a segment containing a colon'); } } } } /** * Resolves a URI reference in the context of a base URI and the opposite way. * * @author Tobias Schultze * * @see https://datatracker.ietf.org/doc/html/rfc3986#section-5 */ final class UriResolver { /** * Removes dot segments from a path and returns the new path. * * @see https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4 */ public static function removeDotSegments(string $path): string { if ($path === '' || $path === '/') { return $path; } $results = []; $segments = explode('/', $path); foreach ($segments as $segment) { if ($segment === '..') { array_pop($results); } elseif ($segment !== '.') { $results[] = $segment; } } $newPath = implode('/', $results); if ($path[0] === '/' && (!isset($newPath[0]) || $newPath[0] !== '/')) { // Re-add the leading slash if necessary for cases like "/.." $newPath = '/'.$newPath; } elseif ($newPath !== '' && ($segment === '.' || $segment === '..')) { // Add the trailing slash if necessary // If newPath is not empty, then $segment must be set and is the last segment from the foreach $newPath .= '/'; } return $newPath; } /** * Converts the relative URI into a new URI that is resolved against the base URI. * * @see https://datatracker.ietf.org/doc/html/rfc3986#section-5.2 */ public static function resolve(UriInterface $base, UriInterface $rel): UriInterface { if ((string) $rel === '') { // we can simply return the same base URI instance for this same-document reference return $base; } if ($rel->getScheme() != '') { return $rel->withPath(self::removeDotSegments($rel->getPath())); } if ($rel->getAuthority() != '') { $targetAuthority = $rel->getAuthority(); $targetPath = self::removeDotSegments($rel->getPath()); $targetQuery = $rel->getQuery(); } else { $targetAuthority = $base->getAuthority(); if ($rel->getPath() === '') { $targetPath = $base->getPath(); $targetQuery = $rel->getQuery() != '' ? $rel->getQuery() : $base->getQuery(); } else { if ($rel->getPath()[0] === '/') { $targetPath = $rel->getPath(); } else { if ($targetAuthority != '' && $base->getPath() === '') { $targetPath = '/'.$rel->getPath(); } else { $lastSlashPos = strrpos($base->getPath(), '/'); if ($lastSlashPos === false) { $targetPath = $rel->getPath(); } else { $targetPath = substr($base->getPath(), 0, $lastSlashPos + 1).$rel->getPath(); } } } $targetPath = self::removeDotSegments($targetPath); $targetQuery = $rel->getQuery(); } } return new StringURI(StringURI::composeComponents( $base->getScheme(), $targetAuthority, $targetPath, $targetQuery, $rel->getFragment() )); } /** * Returns the target URI as a relative reference from the base URI. * * This method is the counterpart to resolve(): * * (string) $target === (string) UriResolver::resolve($base, UriResolver::relativize($base, $target)) * * One use-case is to use the current request URI as base URI and then generate relative links in your documents * to reduce the document size or offer self-contained downloadable document archives. * * $base = new Uri('http://example.com/a/b/'); * echo UriResolver::relativize($base, new Uri('http://example.com/a/b/c')); // prints 'c'. * echo UriResolver::relativize($base, new Uri('http://example.com/a/x/y')); // prints '../x/y'. * echo UriResolver::relativize($base, new Uri('http://example.com/a/b/?q')); // prints '?q'. * echo UriResolver::relativize($base, new Uri('http://example.org/a/b/')); // prints '//example.org/a/b/'. * * This method also accepts a target that is already relative and will try to relativize it further. Only a * relative-path reference will be returned as-is. * * echo UriResolver::relativize($base, new Uri('/a/b/c')); // prints 'c' as well */ public static function relativize(UriInterface $base, UriInterface $target): UriInterface { if ($target->getScheme() !== '' && ($base->getScheme() !== $target->getScheme() || $target->getAuthority() === '' && $base->getAuthority() !== '') ) { return $target; } if (StringURI::isRelativePathReference($target)) { // As the target is already highly relative we return it as-is. It would be possible to resolve // the target with `$target = self::resolve($base, $target);` and then try make it more relative // by removing a duplicate query. But let's not do that automatically. return $target; } if ($target->getAuthority() !== '' && $base->getAuthority() !== $target->getAuthority()) { return $target->withScheme(''); } // We must remove the path before removing the authority because if the path starts with two slashes, the URI // would turn invalid. And we also cannot set a relative path before removing the authority, as that is also // invalid. $emptyPathUri = $target->withScheme('')->withPath('')->withUserInfo('')->withPort(null)->withHost(''); if ($base->getPath() !== $target->getPath()) { return $emptyPathUri->withPath(self::getRelativePath($base, $target)); } if ($base->getQuery() === $target->getQuery()) { // Only the target fragment is left. And it must be returned even if base and target fragment are the same. return $emptyPathUri->withQuery(''); } // If the base URI has a query but the target has none, we cannot return an empty path reference as it would // inherit the base query component when resolving. if ($target->getQuery() === '') { $segments = explode('/', $target->getPath()); /** @var string $lastSegment */ $lastSegment = end($segments); return $emptyPathUri->withPath($lastSegment === '' ? './' : $lastSegment); } return $emptyPathUri; } private static function getRelativePath(UriInterface $base, UriInterface $target): string { $sourceSegments = explode('/', $base->getPath()); $targetSegments = explode('/', $target->getPath()); array_pop($sourceSegments); $targetLastSegment = array_pop($targetSegments); foreach ($sourceSegments as $i => $segment) { if (isset($targetSegments[$i]) && $segment === $targetSegments[$i]) { unset($sourceSegments[$i], $targetSegments[$i]); } else { break; } } $targetSegments[] = $targetLastSegment; $relativePath = str_repeat('../', count($sourceSegments)).implode('/', $targetSegments); // A reference to am empty last segment or an empty first sub-segment must be prefixed with "./". // This also applies to a segment with a colon character (e.g., "file:colon") that cannot be used // as the first segment of a relative-path reference, as it would be mistaken for a scheme name. if ('' === $relativePath || false !== strpos(explode('/', $relativePath, 2)[0], ':')) { $relativePath = "./$relativePath"; } elseif ('/' === $relativePath[0]) { if ($base->getAuthority() != '' && $base->getPath() === '') { // In this case an extra slash is added by resolve() automatically. So we must not add one here. $relativePath = ".$relativePath"; } else { $relativePath = "./$relativePath"; } } return $relativePath; } private function __construct() { // cannot be instantiated } }