:///;?# Return an associative array with keys: scheme, netloc, path, params, query, fragment Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes. */ function urlparse($url, $scheme='', $allow_fragments=1) { global $uses_params; $parsed = urlsplit($url, $scheme, $allow_fragments); if (in_array($parsed['scheme'], $uses_params) && strpos($parsed['path'], ';')) { $splits = _splitparams($parsed['path']); $parsed['path'] = $splits['url']; $parsed['params'] = $splits['params']; } else { $parsed['params'] = ''; } return $parsed; } function _splitparams($url) { if (strpos($url, '/')) { $j = strrpos($url, '/'); $i = strpos($url, ';', $j); if (!$i) return array('url' => $url, 'params' => ''); } else { $i = strpos($url, ';'); } return array('url' => substr($url, 0, $i), 'params' => substr($url, $i+1)); } /* Parse a URL into 5 components: :///?# Return an associative array with keys: scheme, netloc, path, query, fragment Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes. */ function urlsplit($url, $scheme='', $allow_fragments=1) { global $uses_netloc, $scheme_chars, $uses_fragment, $uses_query; $netloc = ''; $query = ''; $fragment = ''; $i = strpos($url, ':'); if ($i > 0) { /* optimize the common case */ if (substr($url, 0, $i) == 'http') { $scheme = strtolower(substr($url, 0, $i)); $url = substr($url, $i+1); if (substr($url, 0, 2) == '//') { $url = substr($url, 2); $i = strpos($url, '/'); if (!$i) { $i = strpos($url, '#'); if (!$i) $i = strlen($url); } $netloc = substr($url, 0, $i); $url = substr($url, $i); } if ($allow_fragments && strpos($url, '#')) { $exploded = explode('#', $url, 2); $url = $exploded[0]; $fragment = $exploded[1]; } if (strpos($url, '?') >= 0) { $exploded = explode('?', $url, 2); $url = $exploded[0]; $query = $exploded[1]; } $answer = array('scheme' => $scheme, 'netloc' => $netloc, 'path' => $url, 'query' => $query, 'fragment' => $fragment); return $answer; } $reset = true; for ($c = 0; $c < strlen(substr($url, 0, $i)); $c++) { if (!strpos($scheme_chars, substr($url, $c, $c+1))) $reset = false; } if ($reset) { $scheme = strtolower(substr($url, 0, $i)); $url = substr($url, $i+1); } } if (in_array($scheme, $uses_netloc)) { if (substr($url, 0, 2) == '//') { $i = strpos($url, '/', 2); if (!$i) $i = strlen($url); $netloc = substr($url, 2, $i); $url = substr($url, $i); } } if ($allow_fragments && in_array($scheme, $uses_fragment) && strpos($url, '#')) { $exploded = explode('#', $url, 2); $url = $exploded[0]; $fragment = $exploded[1]; } if (in_array($scheme, $uses_query) && strpos($url, '?') >= 0) { $exploded = explode('?', $url, 2); $url = $exploded[0]; $query = $exploded[1]; } $answer = array('scheme' => $scheme, 'netloc' => $netloc, 'path' => $url, 'query' => $query, 'fragment' => $fragment); return $answer; } /* Put a parsed URL back together again. This may result in a slightly different, but equivalent URL, if the URL that was parsed originally had redundant delimiters, e.g. a ? with an empty query (the draft states that these are equivalent). Expects associative array with keys: scheme, netloc, url, params, query, fragment */ function urlunparse($parsed) { if ($parsed['params']) $parsed['path'] = $parsed['path'] . ";" . $parsed['params']; return urlunsplit($parsed); } /* Expects associative array with the keys: scheme, netloc, url, query, fragment */ function urlunsplit($parsed) { global $uses_netloc; $url = ''; if ($parsed['netloc'] || ($parsed['scheme'] && in_array($parsed['scheme'], $uses_netloc) && substr($parsed['path'], 0, 2) != '//')) { if ($parsed['path'] and substr($parsed['path'], 0, 1) != '/') $url = '/' . $parsed['path']; if ($parsed['netloc']) $url = '//' . $parsed['netloc'] . $parsed['path']; else $url = '//' . $parsed['path']; } if ($parsed['scheme']) $url = $parsed['scheme'] . ':' . $url; if ($parsed['query']) $url = $url . '?' . $parsed['query']; if ($parsed['fragment']) $url = $url . '#' . $parsed['fragment']; return $url; } /* Join a base URL and a possibly relative URL to form an absolute interpretation of the latter. */ function urljoin($base, $url, $allow_fragments = 1) { global $uses_relative, $uses_netloc; if (!$base) { return $url; } if (!$url) { return $base; } $bparsed = urlparse($base, '', $allow_fragments); $parsed = urlparse($url, $bparsed['scheme'], $allow_fragments); /*print_r($bparsed); print_r($parsed);*/ if ($parsed['scheme'] != $bparsed['scheme'] || !in_array($parsed['scheme'], $uses_relative)) return $url; if (in_array($parsed['scheme'], $uses_netloc)) { if ($parsed['netloc']) return urlunparse($parsed); $parsed['netloc'] = $bparsed['netloc']; } if (substr($parsed['path'], 0, 1) == '/') return urlunparse($parsed); if (!$parsed['path']) { if (!$parsed['params']) { $parsed['params'] = $bparsed['params']; if (!$parsed['query']) $parsed['query'] = $bparsed['query']; } $parsed['path'] = $bparsed['path']; return urlunparse($parsed); } $segments = explode('/', $bparsed['path']); array_pop($segments); $segments = array_merge($segments, explode('/', $parsed['path'])); /* XXX The stuff below is bogus in various ways... */ if (end($segments) == '.') $segments[key($segments)] = ''; while (in_array('.', $segments)) { unset($segments[array_search('.', $segments)]); } while (1) { $segments = array_values($segments); $i = 1; $n = count($segments) - 1; while ($i < $n) { if ($segments[$i] == '..' && $segments[$i-1] != '' && $segments[$i-1] != '..') { unset($segments[$i-1]); unset($segments[$i]); break 1; } $i = $i+1; } if ($i >= $n) break; } if ($segments[0] == '' && $segments[1] == '..' && count($segments) == 2) { $segments[1] = ''; } else if (count($segments) >= 2 and end($segments) == '..') { unset($segments[key($segments)]); end($segments); $segments[key($segments)] = ''; } $parsed['path'] = implode('/', $segments); return urlunparse($parsed); } /* Removes any existing fragment from URL. Returns an associative array of the defragmented URL and the fragment with keys: url, fragment If the URL contained no fragments, the second element is the empty string. */ function urldefrag($url) { if (strpos($url, '#')) { $parsed = urlparse($url); $frag = $parsed['fragment']; $parsed['fragment'] = ''; $defrag = urlunparse($parsed); return array('url' => $defrag, 'fragment' => $frag); } else { return array('url' => $url, 'fragment' => ''); } } $test_input = " g:h = [URL:g:h] http:g = [URL:http://a/b/c/g] http: = [URL:http://a/b/c/d] g = [URL:http://a/b/c/g] ./g = [URL:http://a/b/c/g] g/ = [URL:http://a/b/c/g/] /g = [URL:http://a/g] //g = [URL:http://g] ?y = [URL:http://a/b/c/d?y] g?y = [URL:http://a/b/c/g?y] g?y/./x = [URL:http://a/b/c/g?y/./x] . = [URL:http://a/b/c/] ./ = [URL:http://a/b/c/] .. = [URL:http://a/b/] ../ = [URL:http://a/b/] ../g = [URL:http://a/b/g] ../.. = [URL:http://a/] ../../g = [URL:http://a/g] ../../../g = [URL:http://a/../g] ./../g = [URL:http://a/b/g] ./g/. = [URL:http://a/b/c/g/] /./g = [URL:http://a/./g] g/./h = [URL:http://a/b/c/g/h] g/../h = [URL:http://a/b/c/h] http:g = [URL:http://a/b/c/g] http: = [URL:http://a/b/c/d] http:?y = [URL:http://a/b/c/d?y] http:g?y = [URL:http://a/b/c/g?y] http:g?y/./x = [URL:http://a/b/c/g?y/./x]"; function test() { global $test_input; $base = 'http://a/b/c/d'; $exploded = split("\n", $test_input); foreach ($exploded as $line) { if (!$line) break; $words = split(" +", $line); if (!$words) continue; $url = $words[1]; $parts = urlparse($url); /*echo $url . " : " . print_r($parts);*/ $abs = urljoin($base, $url); if (!$base) $base = $abs; $wrapped = '[URL:' . $abs . "]"; echo $url . " = " . $wrapped; if (count($words) == 4 and $words[2] == '=') { if ($wrapped != $words[3]) { echo " IS NOT\n " . $words[3] . "\n\n"; } else { echo "\n\n"; } } } } // function for converting a bunch of html with links function urlparse_external_links($content, $id) { // find all href and src while (eregi("<(a|img|form)[^>]* (href|src|action)=[\"']([[:alnum:]/+=%&_.~?-]*)[\"']", $content, $regs)) { $frag = $regs[3]; if ((strpos($frag, 'http://') === false || strpos($frag, 'http://') > 0) && (strpos($frag, 'mailto:') == false || strpos($frag, 'mailto:') > 0)) { $abs = urljoin(get_permalink($id), $frag); $content = eregi_replace("(<(a|img|form)[^>]* (href|src|action)=[\"\'])$frag([\"\'])", "\\1$abs\\4", $content); } } return $content; } ?>