2017-05-18 15:15:53 +01:00
|
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
declare(strict_types=1);
|
|
|
|
|
|
|
|
|
|
/*
|
2020-11-01 14:24:36 +00:00
|
|
|
|
* helpers.php
|
|
|
|
|
*/
|
2017-05-18 15:15:53 +01:00
|
|
|
|
|
|
|
|
|
// sourced from https://github.com/flattr/normalize-url/blob/master/normalize_url.php
|
|
|
|
|
if (! function_exists('normalize_url')) {
|
|
|
|
|
function normalize_url(?string $url): ?string
|
|
|
|
|
{
|
|
|
|
|
if ($url === null) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
$newUrl = '';
|
|
|
|
|
$url = parse_url($url);
|
|
|
|
|
$defaultSchemes = ['http' => 80, 'https' => 443];
|
2017-09-12 14:39:22 +01:00
|
|
|
|
|
2017-05-18 15:15:53 +01:00
|
|
|
|
if (isset($url['scheme'])) {
|
|
|
|
|
$url['scheme'] = strtolower($url['scheme']);
|
|
|
|
|
// Strip scheme default ports
|
2020-11-01 14:24:36 +00:00
|
|
|
|
if (
|
|
|
|
|
isset($defaultSchemes[$url['scheme']]) &&
|
2017-05-18 15:15:53 +01:00
|
|
|
|
isset($url['port']) &&
|
|
|
|
|
$defaultSchemes[$url['scheme']] == $url['port']
|
|
|
|
|
) {
|
|
|
|
|
unset($url['port']);
|
|
|
|
|
}
|
|
|
|
|
$newUrl .= "{$url['scheme']}://";
|
|
|
|
|
}
|
2017-09-12 14:39:22 +01:00
|
|
|
|
|
2017-05-18 15:15:53 +01:00
|
|
|
|
if (isset($url['host'])) {
|
|
|
|
|
$url['host'] = mb_strtolower($url['host']);
|
|
|
|
|
$newUrl .= $url['host'];
|
|
|
|
|
}
|
2017-09-12 14:39:22 +01:00
|
|
|
|
|
2017-05-18 15:15:53 +01:00
|
|
|
|
if (isset($url['port'])) {
|
|
|
|
|
$newUrl .= ":{$url['port']}";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isset($url['path'])) {
|
|
|
|
|
// Case normalization
|
|
|
|
|
$url['path'] = normalizer_normalize($url['path'], Normalizer::FORM_C);
|
|
|
|
|
// Strip duplicate slashes
|
|
|
|
|
while (preg_match("/\/\//", $url['path'])) {
|
|
|
|
|
$url['path'] = preg_replace('/\/\//', '/', $url['path']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Decode unreserved characters, http://www.apps.ietf.org/rfc/rfc3986.html#sec-2.3
|
|
|
|
|
* Heavily rewritten version of urlDecodeUnreservedChars() in Glen Scott's url-normalizer.
|
|
|
|
|
*/
|
|
|
|
|
$u = [];
|
|
|
|
|
for ($o = 65; $o <= 90; $o++) {
|
|
|
|
|
$u[] = dechex($o);
|
|
|
|
|
}
|
|
|
|
|
for ($o = 97; $o <= 122; $o++) {
|
|
|
|
|
$u[] = dechex($o);
|
|
|
|
|
}
|
|
|
|
|
for ($o = 48; $o <= 57; $o++) {
|
|
|
|
|
$u[] = dechex($o);
|
|
|
|
|
}
|
|
|
|
|
$chrs = ['-', '.', '_', '~'];
|
|
|
|
|
foreach ($chrs as $chr) {
|
|
|
|
|
$u[] = dechex(ord($chr));
|
|
|
|
|
}
|
|
|
|
|
$url['path'] = preg_replace_callback(
|
|
|
|
|
array_map(
|
2017-09-12 14:39:22 +01:00
|
|
|
|
function ($str) {
|
2022-07-09 10:08:26 +01:00
|
|
|
|
return '/%' . strtoupper($str) . '/x';
|
2017-09-12 14:39:22 +01:00
|
|
|
|
},
|
2017-05-18 15:15:53 +01:00
|
|
|
|
$u
|
|
|
|
|
),
|
2017-09-12 14:39:22 +01:00
|
|
|
|
function ($matches) {
|
|
|
|
|
return chr(hexdec($matches[0]));
|
|
|
|
|
},
|
2017-05-18 15:15:53 +01:00
|
|
|
|
$url['path']
|
|
|
|
|
);
|
|
|
|
|
// Remove directory index
|
2022-07-09 10:08:26 +01:00
|
|
|
|
$defaultIndexes = ["/default\.aspx/" => 'default.aspx/', "/default\.asp/" => 'default.asp/',
|
|
|
|
|
"/index\.html/" => 'index.html/', "/index\.htm/" => 'index.htm/',
|
|
|
|
|
"/default\.html/" => 'default.html/', "/default\.htm/" => 'default.htm/',
|
|
|
|
|
"/index\.php/" => 'index.php/', "/index\.jsp/" => 'index.jsp/', ];
|
2017-05-18 15:15:53 +01:00
|
|
|
|
foreach ($defaultIndexes as $index => $strip) {
|
|
|
|
|
if (preg_match($index, $url['path'])) {
|
|
|
|
|
$url['path'] = str_replace($strip, '', $url['path']);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// here we only want to drop a slash for the root domain
|
|
|
|
|
// e.g. http://example.com/ -> http://example.com
|
|
|
|
|
// but http://example.com/path/ -/-> http://example.com/path
|
|
|
|
|
if ($url['path'] == '/') {
|
|
|
|
|
unset($url['path']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Path segment normalization, http://www.apps.ietf.org/rfc/rfc3986.html#sec-5.2.4
|
|
|
|
|
* Heavily rewritten version of removeDotSegments() in Glen Scott's url-normalizer.
|
|
|
|
|
*/
|
|
|
|
|
$new_path = '';
|
|
|
|
|
while (! empty($url['path'])) {
|
|
|
|
|
if (preg_match('!^(\.\./|\./)!x', $url['path'])) {
|
|
|
|
|
$url['path'] = preg_replace('!^(\.\./|\./)!x', '', $url['path']);
|
2020-11-01 14:24:36 +00:00
|
|
|
|
} elseif (
|
|
|
|
|
preg_match('!^(/\./)!x', $url['path'], $matches)
|
|
|
|
|
|| preg_match('!^(/\.)$!x', $url['path'], $matches)
|
|
|
|
|
) {
|
2017-05-18 15:15:53 +01:00
|
|
|
|
$url['path'] = preg_replace('!^' . $matches[1] . '!', '/', $url['path']);
|
|
|
|
|
} elseif (preg_match('!^(/\.\./|/\.\.)!x', $url['path'], $matches)) {
|
|
|
|
|
$url['path'] = preg_replace('!^' . preg_quote($matches[1], '!') . '!x', '/', $url['path']);
|
|
|
|
|
$new_path = preg_replace('!/([^/]+)$!x', '', $new_path);
|
|
|
|
|
} elseif (preg_match('!^(\.|\.\.)$!x', $url['path'])) {
|
|
|
|
|
$url['path'] = preg_replace('!^(\.|\.\.)$!x', '', $url['path']);
|
|
|
|
|
} else {
|
|
|
|
|
if (preg_match('!(/*[^/]*)!x', $url['path'], $matches)) {
|
|
|
|
|
$first_path_segment = $matches[1];
|
2020-11-01 14:24:36 +00:00
|
|
|
|
$url['path'] = preg_replace(
|
|
|
|
|
'/^' . preg_quote($first_path_segment, '/') . '/',
|
|
|
|
|
'',
|
|
|
|
|
$url['path'],
|
|
|
|
|
1
|
|
|
|
|
);
|
2017-05-18 15:15:53 +01:00
|
|
|
|
$new_path .= $first_path_segment;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
$newUrl .= $new_path;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isset($url['fragment'])) {
|
|
|
|
|
unset($url['fragment']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Sort GET params alphabetically, not because the RFC requires it but because it's cool!
|
|
|
|
|
if (isset($url['query'])) {
|
2018-01-12 12:20:36 +00:00
|
|
|
|
$queries = explode('&', $url['query']);
|
|
|
|
|
$url['query'] = '';
|
|
|
|
|
sort($queries);
|
|
|
|
|
foreach ($queries as $query) {
|
|
|
|
|
//lets drop query params we don’t want
|
|
|
|
|
$key = stristr($query, '=', true);
|
|
|
|
|
if (queryKeyIsBanned($key) === false) {
|
|
|
|
|
$url['query'] .= "{$query}&";
|
2017-05-18 15:15:53 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
2018-01-12 12:20:36 +00:00
|
|
|
|
$url['query'] = preg_replace('/&\Z/', '', $url['query']);
|
|
|
|
|
if ($url['query'] !== '') {
|
|
|
|
|
$newUrl .= "?{$url['query']}";
|
|
|
|
|
}
|
2017-05-18 15:15:53 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $newUrl;
|
|
|
|
|
}
|
2018-01-12 12:20:36 +00:00
|
|
|
|
|
|
|
|
|
function queryKeyIsBanned(string $key): bool
|
|
|
|
|
{
|
|
|
|
|
$bannedKeys = [
|
|
|
|
|
'ref_src',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
return in_array($key, $bannedKeys);
|
|
|
|
|
}
|
2017-05-18 15:15:53 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// sourced from https://stackoverflow.com/a/9776726
|
|
|
|
|
if (! function_exists('prettyPrintJson')) {
|
|
|
|
|
function prettyPrintJson(string $json): string
|
|
|
|
|
{
|
|
|
|
|
$result = '';
|
|
|
|
|
$level = 0;
|
|
|
|
|
$in_quotes = false;
|
|
|
|
|
$in_escape = false;
|
|
|
|
|
$ends_line_level = null;
|
|
|
|
|
$json_length = strlen($json);
|
|
|
|
|
|
|
|
|
|
for ($i = 0; $i < $json_length; $i++) {
|
|
|
|
|
$char = $json[$i];
|
|
|
|
|
$new_line_level = null;
|
|
|
|
|
$post = '';
|
|
|
|
|
if ($ends_line_level !== null) {
|
|
|
|
|
$new_line_level = $ends_line_level;
|
|
|
|
|
$ends_line_level = null;
|
|
|
|
|
}
|
|
|
|
|
if ($in_escape) {
|
|
|
|
|
$in_escape = false;
|
|
|
|
|
} elseif ($char === '"') {
|
|
|
|
|
$in_quotes = ! $in_quotes;
|
|
|
|
|
} elseif (! $in_quotes) {
|
|
|
|
|
switch ($char) {
|
|
|
|
|
case '}':
|
|
|
|
|
case ']':
|
|
|
|
|
$level--;
|
|
|
|
|
$ends_line_level = null;
|
|
|
|
|
$new_line_level = $level;
|
|
|
|
|
break;
|
|
|
|
|
case '{':
|
|
|
|
|
case '[':
|
|
|
|
|
$level++;
|
|
|
|
|
//no break
|
|
|
|
|
case ',':
|
|
|
|
|
$ends_line_level = $level;
|
|
|
|
|
break;
|
|
|
|
|
case ':':
|
|
|
|
|
$post = ' ';
|
|
|
|
|
break;
|
|
|
|
|
case ' ':
|
|
|
|
|
case "\t":
|
|
|
|
|
case "\n":
|
|
|
|
|
case "\r":
|
|
|
|
|
$char = '';
|
|
|
|
|
$ends_line_level = $new_line_level;
|
|
|
|
|
$new_line_level = null;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} elseif ($char === '\\') {
|
|
|
|
|
$in_escape = true;
|
|
|
|
|
}
|
|
|
|
|
if ($new_line_level !== null) {
|
2020-11-01 14:24:36 +00:00
|
|
|
|
$result .= "\n" . str_repeat("\t", $new_line_level);
|
2017-05-18 15:15:53 +01:00
|
|
|
|
}
|
2020-11-01 14:24:36 +00:00
|
|
|
|
$result .= $char . $post;
|
2017-05-18 15:15:53 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return str_replace("\t", ' ', $result);
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-09-13 16:13:58 +01:00
|
|
|
|
|
|
|
|
|
// sourced from https://twitter.com/jrubsc/status/907776591320764416/photo/1
|
|
|
|
|
if (! function_exists('carbon')) {
|
2020-11-01 14:24:36 +00:00
|
|
|
|
function carbon(...$args)
|
|
|
|
|
{
|
2017-09-13 16:13:58 +01:00
|
|
|
|
return new Carbon\Carbon(...$args);
|
|
|
|
|
}
|
|
|
|
|
}
|