Minimal Page Viewer

Strip all of the "useless" bits from a web page for use in low bandwidth environments, optionally loading images. [link]

The Code:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
	"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">

<head>
	<title>Minimal Page Display</title>
	<style type="text/css">
	body {
		margin: 0;
		padding: 0;
	}
	a {
		color: blue;
		font-weight: bold;
		text-decoration: none;
	}
	a:hover {
		color: orange;
		text-decoration: underline;
	}
	form {
		margin: 0;
		padding: 0;
	}
	#header {
		border-bottom: 6px double #000;
	}
	#title {
		font-size: 28pt;
		margin: 0.1em;
		text-align: center;
	}
	#subtitle {
		font-size: 12pt;
		margin-bottom: 0.1em;
		text-align: center;
	}
	</style>
</head>

<body>
<?php

$url = $_GET['url'];
if ($url == "")
	$url = "goahead.html";

if ($url != "goahead.html" && strpos($url, "://") === false)
	$url = "http://" . $url;

$count = preg_match("/http\:\/\/(.*?)\//", $url, $domain);
if ($count == 0)
	$domain = $url;
else
	$domain = $domain[0];
if ($domain[strlen($domain) - 1] != "/")
	$domain .= "/";
	
$slash_loc = strrpos($url, '/');
if ($slash_loc == 7)
        $path = $domain;
else
        $path = substr($url, 0, $slash_loc + 1);

$images = $_GET['images'] == "true";

?>
<div id="header">
	<div id="title">Minimal Page Viewer</div>
	<div id="subtitle">brought to you by <a href="http://www.celtrenicdesigns.com">Celtrenic Designs</a></div>
	<form action="index.php" name="address_bar" method="get">
	<table style="width:100%">
	<tr>
		<td>address:</td>
		<td style="width:98%"><input type="text" name="url" style="width: 100%"
				value="<?php echo $url == "goahead.html" ? "" : $url ?>" /></td>
		<td style="padding-left: 0.5em; text-align:center;"><input type="checkbox"
				name="images" value="true" <?php echo $images ? 'checked="true" ' : ""; ?>/> images</td>
		<td><input type="submit" value="Go!" /></td>
	</tr>
	</table>
	</form>
</div>
<div style="padding:1em;">
<?php

//retrieve the remote file
$html = file_get_contents($url);

//strip everything before the body tag
$html = explode("<body", $html);
$loc = strpos($html[1], ">");
$html = substr($html[1], $loc + 1);

//strip everything after the body tag
$html = explode("</body>", $html);
$html = $html[0];

//strip image source
if (!$images) {
	$html = preg_replace('/src="(.*?)"/si', '', $html);
	$html = preg_replace("/src='(.*?)'/si", '', $html);
}
//strip inline style information
$html = preg_replace('/style="(.*?)"/si', '', $html);
$html = preg_replace("/style='(.*?)'/si", '', $html);
//strip id information
$html = preg_replace('/id="(.*?)"/si', '', $html);
$html = preg_replace("/id='(.*?)'/si", '', $html);
//strip class information
$html = preg_replace('/class="(.*?)"/si', '', $html);
$html = preg_replace("/class='(.*?)'/si", '', $html);
//strip onMouseOver information
$html = preg_replace('/onMouseOver="(.*?)"/si', '', $html);
$html = preg_replace("/onMouseOver='(.*?)'/si", '', $html);
//strip onMouseDown information
$html = preg_replace('/onMouseDown="(.*?)"/si', '', $html);
$html = preg_replace("/onMouseDown='(.*?)'/si", '', $html);
//strip onMouseOut information
$html = preg_replace('/onMouseOut="(.*?)"/si', '', $html);
$html = preg_replace("/onMouseOut='(.*?)'/si", '', $html);
//fix slashdot stupidity
$html = preg_replace('/href="\/\//i', 'href="http://', $html);
//prefix domain onto / addresses
$html = preg_replace('/href="\//i', 'href="' . $domain, $html);
$html = preg_replace("/href='\//i", "href='" . $domain, $html);
//prefix path onto relative addresses
$html = preg_replace('/href="(?!http:\/\/)/si', 'href="' . $path, $html);
$html = preg_replace("/href='(?!http:\/\/)/si", "href='" . $path, $html);  
//fix arstechnica silliness
$html = preg_replace('/feeds\.arstechnica\.com/si', 'arstechnica.com', $html);
//internal redirction
$html = preg_replace('/href="(?!#)/si', 'href="?url=', $html);
$html = preg_replace("/href='(?!#)/si", "href='?url=", $html);
//append $images
if ($images) {
        $html = preg_replace('/href="(.*?)"/sie', "'href=\"' . '$1' . '&images=true\"'", $html);
        $html = preg_replace("/href='(.*?)'/sie", "'href=\"' . '$1' . '&images=true\"'", $html);
}
//disable style
$html = preg_replace('/\<style(.*?)\/style>/si', '<!-- no style, thank you -->', $html);
//disable scripts
$html = preg_replace('/\<script(.*?)\/script>/si', '<!-- no scripts, thank you -->', $html);
//strip objects
$html = preg_replace('/\<object(.*?)\/object>/si', '<!-- no objects, thank you -->', $html);

echo $html;

?>
</div>
</body>

</html>