Minimal Page Viewer
Strip all of the "useless" bits from a web page for use in low bandwidth environments, optionally loading images. [link]The Code:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <title>Minimal Page Display</title> <style type="text/css"> body { margin: 0; padding: 0; } a { color: blue; font-weight: bold; text-decoration: none; } a:hover { color: orange; text-decoration: underline; } form { margin: 0; padding: 0; } #header { border-bottom: 6px double #000; } #title { font-size: 28pt; margin: 0.1em; text-align: center; } #subtitle { font-size: 12pt; margin-bottom: 0.1em; text-align: center; } </style> </head> <body> <?php $url = $_GET['url']; if ($url == "") $url = "goahead.html"; if ($url != "goahead.html" && strpos($url, "://") === false) $url = "http://" . $url; $count = preg_match("/http\:\/\/(.*?)\//", $url, $domain); if ($count == 0) $domain = $url; else $domain = $domain[0]; if ($domain[strlen($domain) - 1] != "/") $domain .= "/"; $slash_loc = strrpos($url, '/'); if ($slash_loc == 7) $path = $domain; else $path = substr($url, 0, $slash_loc + 1); $images = $_GET['images'] == "true"; ?> <div id="header"> <div id="title">Minimal Page Viewer</div> <div id="subtitle">brought to you by <a href="http://www.celtrenicdesigns.com">Celtrenic Designs</a></div> <form action="index.php" name="address_bar" method="get"> <table style="width:100%"> <tr> <td>address:</td> <td style="width:98%"><input type="text" name="url" style="width: 100%" value="<?php echo $url == "goahead.html" ? "" : $url ?>" /></td> <td style="padding-left: 0.5em; text-align:center;"><input type="checkbox" name="images" value="true" <?php echo $images ? 'checked="true" ' : ""; ?>/> images</td> <td><input type="submit" value="Go!" /></td> </tr> </table> </form> </div> <div style="padding:1em;"> <?php //retrieve the remote file $html = file_get_contents($url); //strip everything before the body tag $html = explode("<body", $html); $loc = strpos($html[1], ">"); $html = substr($html[1], $loc + 1); //strip everything after the body tag $html = explode("</body>", $html); $html = $html[0]; //strip image source if (!$images) { $html = preg_replace('/src="(.*?)"/si', '', $html); $html = preg_replace("/src='(.*?)'/si", '', $html); } //strip inline style information $html = preg_replace('/style="(.*?)"/si', '', $html); $html = preg_replace("/style='(.*?)'/si", '', $html); //strip id information $html = preg_replace('/id="(.*?)"/si', '', $html); $html = preg_replace("/id='(.*?)'/si", '', $html); //strip class information $html = preg_replace('/class="(.*?)"/si', '', $html); $html = preg_replace("/class='(.*?)'/si", '', $html); //strip onMouseOver information $html = preg_replace('/onMouseOver="(.*?)"/si', '', $html); $html = preg_replace("/onMouseOver='(.*?)'/si", '', $html); //strip onMouseDown information $html = preg_replace('/onMouseDown="(.*?)"/si', '', $html); $html = preg_replace("/onMouseDown='(.*?)'/si", '', $html); //strip onMouseOut information $html = preg_replace('/onMouseOut="(.*?)"/si', '', $html); $html = preg_replace("/onMouseOut='(.*?)'/si", '', $html); //fix slashdot stupidity $html = preg_replace('/href="\/\//i', 'href="http://', $html); //prefix domain onto / addresses $html = preg_replace('/href="\//i', 'href="' . $domain, $html); $html = preg_replace("/href='\//i", "href='" . $domain, $html); //prefix path onto relative addresses $html = preg_replace('/href="(?!http:\/\/)/si', 'href="' . $path, $html); $html = preg_replace("/href='(?!http:\/\/)/si", "href='" . $path, $html); //fix arstechnica silliness $html = preg_replace('/feeds\.arstechnica\.com/si', 'arstechnica.com', $html); //internal redirction $html = preg_replace('/href="(?!#)/si', 'href="?url=', $html); $html = preg_replace("/href='(?!#)/si", "href='?url=", $html); //append $images if ($images) { $html = preg_replace('/href="(.*?)"/sie', "'href=\"' . '$1' . '&images=true\"'", $html); $html = preg_replace("/href='(.*?)'/sie", "'href=\"' . '$1' . '&images=true\"'", $html); } //disable style $html = preg_replace('/\<style(.*?)\/style>/si', '<!-- no style, thank you -->', $html); //disable scripts $html = preg_replace('/\<script(.*?)\/script>/si', '<!-- no scripts, thank you -->', $html); //strip objects $html = preg_replace('/\<object(.*?)\/object>/si', '<!-- no objects, thank you -->', $html); echo $html; ?> </div> </body> </html>