Minimal Page Viewer
Strip all of the "useless" bits from a web page for use in low bandwidth environments, optionally loading images. [link]The Code:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>Minimal Page Display</title>
<style type="text/css">
body {
margin: 0;
padding: 0;
}
a {
color: blue;
font-weight: bold;
text-decoration: none;
}
a:hover {
color: orange;
text-decoration: underline;
}
form {
margin: 0;
padding: 0;
}
#header {
border-bottom: 6px double #000;
}
#title {
font-size: 28pt;
margin: 0.1em;
text-align: center;
}
#subtitle {
font-size: 12pt;
margin-bottom: 0.1em;
text-align: center;
}
</style>
</head>
<body>
<?php
$url = $_GET['url'];
if ($url == "")
$url = "goahead.html";
if ($url != "goahead.html" && strpos($url, "://") === false)
$url = "http://" . $url;
$count = preg_match("/http\:\/\/(.*?)\//", $url, $domain);
if ($count == 0)
$domain = $url;
else
$domain = $domain[0];
if ($domain[strlen($domain) - 1] != "/")
$domain .= "/";
$slash_loc = strrpos($url, '/');
if ($slash_loc == 7)
$path = $domain;
else
$path = substr($url, 0, $slash_loc + 1);
$images = $_GET['images'] == "true";
?>
<div id="header">
<div id="title">Minimal Page Viewer</div>
<div id="subtitle">brought to you by <a href="http://www.celtrenicdesigns.com">Celtrenic Designs</a></div>
<form action="index.php" name="address_bar" method="get">
<table style="width:100%">
<tr>
<td>address:</td>
<td style="width:98%"><input type="text" name="url" style="width: 100%"
value="<?php echo $url == "goahead.html" ? "" : $url ?>" /></td>
<td style="padding-left: 0.5em; text-align:center;"><input type="checkbox"
name="images" value="true" <?php echo $images ? 'checked="true" ' : ""; ?>/> images</td>
<td><input type="submit" value="Go!" /></td>
</tr>
</table>
</form>
</div>
<div style="padding:1em;">
<?php
//retrieve the remote file
$html = file_get_contents($url);
//strip everything before the body tag
$html = explode("<body", $html);
$loc = strpos($html[1], ">");
$html = substr($html[1], $loc + 1);
//strip everything after the body tag
$html = explode("</body>", $html);
$html = $html[0];
//strip image source
if (!$images) {
$html = preg_replace('/src="(.*?)"/si', '', $html);
$html = preg_replace("/src='(.*?)'/si", '', $html);
}
//strip inline style information
$html = preg_replace('/style="(.*?)"/si', '', $html);
$html = preg_replace("/style='(.*?)'/si", '', $html);
//strip id information
$html = preg_replace('/id="(.*?)"/si', '', $html);
$html = preg_replace("/id='(.*?)'/si", '', $html);
//strip class information
$html = preg_replace('/class="(.*?)"/si', '', $html);
$html = preg_replace("/class='(.*?)'/si", '', $html);
//strip onMouseOver information
$html = preg_replace('/onMouseOver="(.*?)"/si', '', $html);
$html = preg_replace("/onMouseOver='(.*?)'/si", '', $html);
//strip onMouseDown information
$html = preg_replace('/onMouseDown="(.*?)"/si', '', $html);
$html = preg_replace("/onMouseDown='(.*?)'/si", '', $html);
//strip onMouseOut information
$html = preg_replace('/onMouseOut="(.*?)"/si', '', $html);
$html = preg_replace("/onMouseOut='(.*?)'/si", '', $html);
//fix slashdot stupidity
$html = preg_replace('/href="\/\//i', 'href="http://', $html);
//prefix domain onto / addresses
$html = preg_replace('/href="\//i', 'href="' . $domain, $html);
$html = preg_replace("/href='\//i", "href='" . $domain, $html);
//prefix path onto relative addresses
$html = preg_replace('/href="(?!http:\/\/)/si', 'href="' . $path, $html);
$html = preg_replace("/href='(?!http:\/\/)/si", "href='" . $path, $html);
//fix arstechnica silliness
$html = preg_replace('/feeds\.arstechnica\.com/si', 'arstechnica.com', $html);
//internal redirction
$html = preg_replace('/href="(?!#)/si', 'href="?url=', $html);
$html = preg_replace("/href='(?!#)/si", "href='?url=", $html);
//append $images
if ($images) {
$html = preg_replace('/href="(.*?)"/sie', "'href=\"' . '$1' . '&images=true\"'", $html);
$html = preg_replace("/href='(.*?)'/sie", "'href=\"' . '$1' . '&images=true\"'", $html);
}
//disable style
$html = preg_replace('/\<style(.*?)\/style>/si', '<!-- no style, thank you -->', $html);
//disable scripts
$html = preg_replace('/\<script(.*?)\/script>/si', '<!-- no scripts, thank you -->', $html);
//strip objects
$html = preg_replace('/\<object(.*?)\/object>/si', '<!-- no objects, thank you -->', $html);
echo $html;
?>
</div>
</body>
</html>