<?php
/*
Plugin Name: urlparse
Plugin URI: http://ryanlee.org/software/wp/urlparse/
Description: Provides functions for parsing absolute and relative URLs and other functions for manipulating, such as joining two URLs as if determining the absolute location of a relative URL.  Translated from Python 2.3.5's included urlparse.py into PHP.  See RFC 1808.  Licensed under the Python Software Foundation License, the same as that of Python 2.3.5.  See http://www.python.org/2.3.5/license.html for the full license text.  Caveat emptor, do not include this library in other software distributions unless you are willing to vouch for its compatibility with the PSF License's text.  Also includes work by Simon Ronnqvist's Smarty plugin found at http://ownmedia.net/products/ , many thanks to Simon for licensing it under BSD.  See RFC 3986, http://www.ietf.org/rfc/rfc3986.txt  I would be happy to stop development on this plugin and redirect it if someone writes the same functionality in PHP and releases it under BSD.
Version: 2.3.5
Author: Ryan Lee
Author URI: http://ryanlee.org/
*/

/*
 Functions: urlparse, urlunparse, urljoin, urldefrag, urlsplit, urlunsplit,
            urlparse_external_links
*/

/* A classification of schemes ('' means apply by default) */
$uses_relative = array('ftp', 'http', 'gopher', 'nntp', 'imap',
                       'wais', 'file', 'https', 'shttp', 'mms',
                       'prospero', 'rtsp', 'rtspu', '');

$uses_netloc = array('ftp', 'http', 'gopher', 'nntp', 'telnet',
                     'imap', 'wais', 'file', 'mms', 'https', 'shttp',
                     'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '');

$uses_params = array('ftp', 'hdl', 'prospero', 'http', 'imap',
                     'https', 'shttp', 'rtsp', 'rtspu', 'sip',
                     'mms', '');

$uses_query = array('http', 'wais', 'imap', 'https', 'shttp', 'mms',
                    'gopher', 'rtsp', 'rtspu', 'sip', '');

$uses_fragment = array('ftp', 'hdl', 'http', 'gopher', 'news',
                       'nntp', 'wais', 'https', 'shttp', 'snews',
                       'file', 'prospero', '');

# Characters valid in scheme names
$scheme_chars =  "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+-.";

/*
    Parse a URL into 6 components:
    <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
    Return an associative array with keys:
      scheme, netloc, path, params, query, fragment
    Note that we don't break the components up in smaller bits
    (e.g. netloc is a single string) and we don't expand % escapes.
*/
function urlparse($url, $scheme='', $allow_fragments=1) {
    global $uses_params;
    $parsed = urlsplit($url, $scheme, $allow_fragments);
    if (in_array($parsed['scheme'], $uses_params) && strpos($parsed['path'], ';')) {
        $splits = _splitparams($parsed['path']);
        $parsed['path'] = $splits['url'];
        $parsed['params'] = $splits['params'];
    } else {
        $parsed['params'] = '';
    }
    return $parsed;
}

function _splitparams($url) {
    if (strpos($url, '/')) {
        $j = strrpos($url, '/');
        $i = strpos($url, ';', $j);
        if (!$i)
            return array('url' => $url, 'params' => '');
    } else {
        $i = strpos($url, ';');
    }
    return array('url' => substr($url, 0, $i), 'params' => substr($url, $i+1));
}

/*
    Parse a URL into 5 components:
    <scheme>://<netloc>/<path>?<query>#<fragment>
    Return an associative array with keys: 
      scheme, netloc, path, query, fragment
    Note that we don't break the components up in smaller bits
    (e.g. netloc is a single string) and we don't expand % escapes.
*/
function urlsplit($url, $scheme='', $allow_fragments=1) {
    global $uses_netloc, $scheme_chars, $uses_fragment, $uses_query;
    $netloc = '';
    $query = '';
    $fragment = '';
    $i = strpos($url, ':');
    if ($i > 0) {
        /* optimize the common case */
        if (substr($url, 0, $i) == 'http') {
            $scheme = strtolower(substr($url, 0, $i));
            $url = substr($url, $i+1);
            if (substr($url, 0, 2) == '//') {
                $url = substr($url, 2);
                $i = strpos($url, '/');
                if (!$i) {
                    $i = strpos($url, '#');
                    if (!$i)
                        $i = strlen($url);
                }
                $netloc = substr($url, 0, $i);
                $url = substr($url, $i);
            }
            if ($allow_fragments && strpos($url, '#')) {
                $exploded = explode('#', $url, 2);
                $url = $exploded[0];
                $fragment = $exploded[1];
            }
            if (strpos($url, '?') >= 0) {
                $exploded = explode('?', $url, 2);
                $url = $exploded[0];
                $query = $exploded[1];
            }
            $answer = array('scheme' => $scheme,
	                    'netloc' => $netloc,
			    'path' => $url,
			    'query' => $query,
			    'fragment' => $fragment);
            return $answer;
        }
        $reset = true;
        for ($c = 0; $c < strlen(substr($url, 0, $i)); $c++) {
            if (!strpos($scheme_chars, substr($url, $c, $c+1)))
                $reset = false;
        }
        if ($reset) {
            $scheme = strtolower(substr($url, 0, $i));
            $url = substr($url, $i+1);
        }
    }
    if (in_array($scheme, $uses_netloc)) {
        if (substr($url, 0, 2) == '//') {
            $i = strpos($url, '/', 2);
            if (!$i)
                $i = strlen($url);
            $netloc = substr($url, 2, $i);
            $url = substr($url, $i);
        }
    }
    if ($allow_fragments && in_array($scheme, $uses_fragment) && strpos($url, '#')) {
        $exploded = explode('#', $url, 2);
        $url = $exploded[0];
        $fragment = $exploded[1];
    }
    if (in_array($scheme, $uses_query) && strpos($url, '?') >= 0) {
        $exploded = explode('?', $url, 2);
        $url = $exploded[0];
        $query = $exploded[1];
    }
    $answer = array('scheme' => $scheme,
                    'netloc' => $netloc,
                    'path' => $url,
                    'query' => $query,
                    'fragment' => $fragment);
    return $answer;
}

/*
    Put a parsed URL back together again.  This may result in a
    slightly different, but equivalent URL, if the URL that was parsed
    originally had redundant delimiters, e.g. a ? with an empty query
    (the draft states that these are equivalent).
    Expects associative array with keys:
      scheme, netloc, url, params, query, fragment
*/
function urlunparse($parsed) {
    if ($parsed['params'])
        $parsed['path'] = $parsed['path'] . ";" . $parsed['params'];
    return urlunsplit($parsed);
}

/* Expects associative array with the keys:
      scheme, netloc, url, query, fragment
*/
function urlunsplit($parsed) {
    global $uses_netloc;
    $url = '';
    if ($parsed['netloc'] || ($parsed['scheme'] && in_array($parsed['scheme'], $uses_netloc) && substr($parsed['path'], 0, 2) != '//')) {
        if ($parsed['path'] and substr($parsed['path'], 0, 1) != '/')
            $url = '/' . $parsed['path'];
        if ($parsed['netloc'])
            $url = '//' . $parsed['netloc'] . $parsed['path'];
        else
            $url = '//' . $parsed['path'];
    }
    if ($parsed['scheme'])
        $url = $parsed['scheme'] . ':' . $url;
    if ($parsed['query'])
        $url = $url . '?' . $parsed['query'];
    if ($parsed['fragment'])
        $url = $url . '#' . $parsed['fragment'];
    return $url;
}

/*
    Join a base URL and a possibly relative URL to form an absolute
    interpretation of the latter.
*/
function urljoin($base, $url, $allow_fragments = 1) {
    global $uses_relative, $uses_netloc;
    if (!$base) {
        return $url;
    }
    if (!$url) {
        return $base;
    }
    $bparsed = urlparse($base, '', $allow_fragments);
    $parsed = urlparse($url, $bparsed['scheme'], $allow_fragments);
/*print_r($bparsed);
print_r($parsed);*/
    if ($parsed['scheme'] != $bparsed['scheme'] || !in_array($parsed['scheme'], $uses_relative))
        return $url;
    if (in_array($parsed['scheme'], $uses_netloc)) {
        if ($parsed['netloc'])
            return urlunparse($parsed);
        $parsed['netloc'] = $bparsed['netloc'];
    }
    if (substr($parsed['path'], 0, 1) == '/')
        return urlunparse($parsed);
    if (!$parsed['path']) {
        if (!$parsed['params']) {
            $parsed['params'] = $bparsed['params'];
            if (!$parsed['query'])
                $parsed['query'] = $bparsed['query'];
        }
        $parsed['path'] = $bparsed['path'];
        return urlunparse($parsed);
    }
    $segments = explode('/', $bparsed['path']);
    array_pop($segments);
    $segments = array_merge($segments, explode('/', $parsed['path']));
    /* XXX The stuff below is bogus in various ways... */
    if (end($segments) == '.')
        $segments[key($segments)] = '';
    while (in_array('.', $segments)) {
        unset($segments[array_search('.', $segments)]);
    }
    while (1) {
        $segments = array_values($segments);
        $i = 1;
        $n = count($segments) - 1;
        while ($i < $n) {
            if ($segments[$i] == '..'
                && $segments[$i-1] != '' && $segments[$i-1] != '..') {
                unset($segments[$i-1]);
                unset($segments[$i]);
                break 1;
            }
            $i = $i+1;
        }
        if ($i >= $n) break;
    }
    if ($segments[0] == '' && $segments[1] == '..' && count($segments) == 2) {
        $segments[1] = '';
    } else if (count($segments) >= 2 and end($segments) == '..') {
        unset($segments[key($segments)]);
        end($segments);
        $segments[key($segments)] = '';
    }
    $parsed['path'] = implode('/', $segments);
    return urlunparse($parsed);
}

/*
    Removes any existing fragment from URL.

    Returns an associative array of the defragmented URL and the fragment with
    keys:
       url, fragment
    If the URL contained no fragments, the second element is the
    empty string.
*/
function urldefrag($url) {
    if (strpos($url, '#')) {
        $parsed = urlparse($url);
        $frag = $parsed['fragment'];
        $parsed['fragment'] = '';
        $defrag = urlunparse($parsed);
        return array('url' => $defrag, 'fragment' => $frag);
    } else {
        return array('url' => $url, 'fragment' => '');
    }
}

$test_input = "      g:h        = [URL:g:h]
      http:g     = [URL:http://a/b/c/g]
      http:      = [URL:http://a/b/c/d]
      g          = [URL:http://a/b/c/g]
      ./g        = [URL:http://a/b/c/g]
      g/         = [URL:http://a/b/c/g/]
      /g         = [URL:http://a/g]
      //g        = [URL:http://g]
      ?y         = [URL:http://a/b/c/d?y]
      g?y        = [URL:http://a/b/c/g?y]
      g?y/./x    = [URL:http://a/b/c/g?y/./x]
      .          = [URL:http://a/b/c/]
      ./         = [URL:http://a/b/c/]
      ..         = [URL:http://a/b/]
      ../        = [URL:http://a/b/]
      ../g       = [URL:http://a/b/g]
      ../..      = [URL:http://a/]
      ../../g    = [URL:http://a/g]
      ../../../g = [URL:http://a/../g]
      ./../g     = [URL:http://a/b/g]
      ./g/.      = [URL:http://a/b/c/g/]
      /./g       = [URL:http://a/./g]
      g/./h      = [URL:http://a/b/c/g/h]
      g/../h     = [URL:http://a/b/c/h]
      http:g     = [URL:http://a/b/c/g]
      http:      = [URL:http://a/b/c/d]
      http:?y         = [URL:http://a/b/c/d?y]
      http:g?y        = [URL:http://a/b/c/g?y]
      http:g?y/./x    = [URL:http://a/b/c/g?y/./x]";

function test() {
    global $test_input;
    $base = 'http://a/b/c/d';
    $exploded = split("\n", $test_input);
    foreach ($exploded as $line) {
        if (!$line) break;
        $words = split(" +", $line);
        if (!$words)
            continue;
        $url = $words[1];

        $parts = urlparse($url);
        /*echo $url . " : " .  print_r($parts);*/
        $abs = urljoin($base, $url);
        if (!$base)
            $base = $abs;
        $wrapped = '[URL:' . $abs . "]";
        echo $url . " = " . $wrapped;
        if (count($words) == 4 and $words[2] == '=') {
            if ($wrapped != $words[3]) {
                echo " IS NOT\n " . $words[3] . "\n\n";
            } else {
                echo "\n\n";
            }
        }
    }
}

// function for converting a bunch of html with links
function urlparse_external_links($content, $id) {
    // find all href and src
    while (eregi("<(a|img|form)[^>]* (href|src|action)=[\"']([[:alnum:]/+=%&_.~?-]*)[\"']", $content, $regs)) {
        $frag = $regs[3];
        if ((strpos($frag, 'http://') === false || strpos($frag, 'http://') > 0) && (strpos($frag, 'mailto:') == false || strpos($frag, 'mailto:') > 0)) {
            $abs = urljoin(get_permalink($id), $frag);
            $content = eregi_replace("(<(a|img|form)[^>]* (href|src|action)=[\"\'])$frag([\"\'])", "\\1$abs\\4", $content);
        }
    }
    return $content;
}
?>
