80, 'https' => 443]; if (isset($url['scheme'])) { $url['scheme'] = strtolower($url['scheme']); // Strip scheme default ports if ( isset($defaultSchemes[$url['scheme']]) && isset($url['port']) && $defaultSchemes[$url['scheme']] == $url['port'] ) { unset($url['port']); } $newUrl .= "{$url['scheme']}://"; } if (isset($url['host'])) { $url['host'] = mb_strtolower($url['host']); $newUrl .= $url['host']; } if (isset($url['port'])) { $newUrl .= ":{$url['port']}"; } if (isset($url['path'])) { // Case normalization $url['path'] = normalizer_normalize($url['path'], Normalizer::FORM_C); // Strip duplicate slashes while (preg_match("/\/\//", $url['path'])) { $url['path'] = preg_replace('/\/\//', '/', $url['path']); } /* * Decode unreserved characters, http://www.apps.ietf.org/rfc/rfc3986.html#sec-2.3 * Heavily rewritten version of urlDecodeUnreservedChars() in Glen Scott's url-normalizer. */ $u = []; for ($o = 65; $o <= 90; $o++) { $u[] = dechex($o); } for ($o = 97; $o <= 122; $o++) { $u[] = dechex($o); } for ($o = 48; $o <= 57; $o++) { $u[] = dechex($o); } $chrs = ['-', '.', '_', '~']; foreach ($chrs as $chr) { $u[] = dechex(ord($chr)); } $url['path'] = preg_replace_callback( array_map( function ($str) { return '/%' . strtoupper($str) . '/x'; }, $u ), function ($matches) { return chr(hexdec($matches[0])); }, $url['path'] ); // Remove directory index $defaultIndexes = ["/default\.aspx/" => 'default.aspx/', "/default\.asp/" => 'default.asp/', "/index\.html/" => 'index.html/', "/index\.htm/" => 'index.htm/', "/default\.html/" => 'default.html/', "/default\.htm/" => 'default.htm/', "/index\.php/" => 'index.php/', "/index\.jsp/" => 'index.jsp/', ]; foreach ($defaultIndexes as $index => $strip) { if (preg_match($index, $url['path'])) { $url['path'] = str_replace($strip, '', $url['path']); } } // here we only want to drop a slash for the root domain // e.g. http://example.com/ -> http://example.com // but http://example.com/path/ -/-> http://example.com/path if ($url['path'] == '/') { unset($url['path']); } /** * Path segment normalization, http://www.apps.ietf.org/rfc/rfc3986.html#sec-5.2.4 * Heavily rewritten version of removeDotSegments() in Glen Scott's url-normalizer. */ $new_path = ''; while (! empty($url['path'])) { if (preg_match('!^(\.\./|\./)!x', $url['path'])) { $url['path'] = preg_replace('!^(\.\./|\./)!x', '', $url['path']); } elseif ( preg_match('!^(/\./)!x', $url['path'], $matches) || preg_match('!^(/\.)$!x', $url['path'], $matches) ) { $url['path'] = preg_replace('!^' . $matches[1] . '!', '/', $url['path']); } elseif (preg_match('!^(/\.\./|/\.\.)!x', $url['path'], $matches)) { $url['path'] = preg_replace('!^' . preg_quote($matches[1], '!') . '!x', '/', $url['path']); $new_path = preg_replace('!/([^/]+)$!x', '', $new_path); } elseif (preg_match('!^(\.|\.\.)$!x', $url['path'])) { $url['path'] = preg_replace('!^(\.|\.\.)$!x', '', $url['path']); } else { if (preg_match('!(/*[^/]*)!x', $url['path'], $matches)) { $first_path_segment = $matches[1]; $url['path'] = preg_replace( '/^' . preg_quote($first_path_segment, '/') . '/', '', $url['path'], 1 ); $new_path .= $first_path_segment; } } } $newUrl .= $new_path; } if (isset($url['fragment'])) { unset($url['fragment']); } // Sort GET params alphabetically, not because the RFC requires it but because it's cool! if (isset($url['query'])) { $queries = explode('&', $url['query']); $url['query'] = ''; sort($queries); foreach ($queries as $query) { //lets drop query params we don’t want $key = stristr($query, '=', true); if (queryKeyIsBanned($key) === false) { $url['query'] .= "{$query}&"; } } $url['query'] = preg_replace('/&\Z/', '', $url['query']); if ($url['query'] !== '') { $newUrl .= "?{$url['query']}"; } } return $newUrl; } function queryKeyIsBanned(string $key): bool { $bannedKeys = [ 'ref_src', ]; return in_array($key, $bannedKeys); } } // sourced from https://stackoverflow.com/a/9776726 if (! function_exists('prettyPrintJson')) { function prettyPrintJson(string $json): string { $result = ''; $level = 0; $in_quotes = false; $in_escape = false; $ends_line_level = null; $json_length = strlen($json); for ($i = 0; $i < $json_length; $i++) { $char = $json[$i]; $new_line_level = null; $post = ''; if ($ends_line_level !== null) { $new_line_level = $ends_line_level; $ends_line_level = null; } if ($in_escape) { $in_escape = false; } elseif ($char === '"') { $in_quotes = ! $in_quotes; } elseif (! $in_quotes) { switch ($char) { case '}': case ']': $level--; $ends_line_level = null; $new_line_level = $level; break; case '{': case '[': $level++; //no break case ',': $ends_line_level = $level; break; case ':': $post = ' '; break; case ' ': case "\t": case "\n": case "\r": $char = ''; $ends_line_level = $new_line_level; $new_line_level = null; break; } } elseif ($char === '\\') { $in_escape = true; } if ($new_line_level !== null) { $result .= "\n" . str_repeat("\t", $new_line_level); } $result .= $char . $post; } return str_replace("\t", ' ', $result); } } // sourced from https://twitter.com/jrubsc/status/907776591320764416/photo/1 if (! function_exists('carbon')) { function carbon(...$args) { return new Carbon\Carbon(...$args); } }