aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/anubis.php100
-rw-r--r--lib/backend.php178
-rw-r--r--lib/bingcache-todo-fix.php144
-rw-r--r--lib/bot_protection.php281
-rw-r--r--lib/curlproxy.php660
-rw-r--r--lib/favicon404.pngbin0 -> 744 bytes
-rw-r--r--lib/frontend.php1356
-rw-r--r--lib/fuckhtml.php622
-rw-r--r--lib/img404.pngbin0 -> 216 bytes
-rw-r--r--lib/type-todo.php132
10 files changed, 3473 insertions, 0 deletions
diff --git a/lib/anubis.php b/lib/anubis.php
new file mode 100644
index 0000000..2bd6d90
--- /dev/null
+++ b/lib/anubis.php
@@ -0,0 +1,100 @@
+<?php
+
+//
+// Reference
+// https://github.com/TecharoHQ/anubis/blob/ecc716940e34ebe7249974f2789a99a2c7115e4e/web/js/proof-of-work.mjs
+//
+
+class anubis{
+
+ public function __construct(){
+
+ include_once "fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function scrape($html){
+
+ $this->fuckhtml->load($html);
+
+ $script =
+ $this->fuckhtml
+ ->getElementById(
+ "anubis_challenge",
+ "script"
+ );
+
+ if($script === false){
+
+ throw new Exception("Failed to scrape anubis challenge data");
+ }
+
+ $script =
+ json_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $script
+ ),
+ true
+ );
+
+ if($script === null){
+
+ throw new Exception("Failed to decode anubis challenge data");
+ }
+
+ if(
+ !isset($script["challenge"]) ||
+ !isset($script["rules"]["difficulty"]) ||
+ !is_int($script["rules"]["difficulty"]) ||
+ !is_string($script["challenge"])
+ ){
+
+ throw new Exception("Found invalid challenge data");
+ }
+
+ return $this->rape($script["challenge"], $script["rules"]["difficulty"]);
+ }
+
+ private function is_valid_hash($hash, $difficulty){
+
+ for ($i=0; $i<$difficulty; $i++) {
+
+ $index = (int)floor($i / 2);
+ $nibble = $i % 2;
+
+ $byte = ord($hash[$index]);
+ $nibble = ($byte >> ($nibble === 0 ? 4 : 0)) & 0x0f;
+
+ if($nibble !== 0){
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ public function rape($data, $difficulty = 5){
+
+ $nonce = 0;
+
+ while(true){
+
+ $hash_binary = hash("sha256", $data . $nonce, true);
+
+ if($this->is_valid_hash($hash_binary, $difficulty)){
+
+ $hash_hex = bin2hex($hash_binary);
+
+ return [
+ "response" => $hash_hex,
+ //"data" => $data,
+ //"difficulty" => $difficulty,
+ "nonce" => $nonce
+ ];
+ }
+
+ $nonce++;
+ }
+ }
+}
diff --git a/lib/backend.php b/lib/backend.php
new file mode 100644
index 0000000..66e78a1
--- /dev/null
+++ b/lib/backend.php
@@ -0,0 +1,178 @@
+<?php
+class backend{
+
+ public function __construct($scraper){
+
+ $this->scraper = $scraper;
+ }
+
+ /*
+ Proxy stuff
+ */
+ public function get_ip(){
+
+ $pool = constant("config::PROXY_" . strtoupper($this->scraper));
+ if($pool === false){
+
+ // we don't want a proxy, fuck off!
+ return 'raw_ip::::';
+ }
+
+ // indent
+ $proxy_index_raw = apcu_inc("p." . $this->scraper);
+
+ $proxylist = file_get_contents("data/proxies/" . $pool . ".txt");
+ $proxylist = explode("\n", $proxylist);
+
+ // ignore empty or commented lines
+ $proxylist = array_filter($proxylist, function($entry){
+ $entry = ltrim($entry);
+ return strlen($entry) > 0 && substr($entry, 0, 1) != "#";
+ });
+
+ $proxylist = array_values($proxylist);
+
+ return $proxylist[$proxy_index_raw % count($proxylist)];
+ }
+
+ // this function is also called directly on nextpage
+ public function assign_proxy(&$curlproc, string $ip){
+
+ // parse proxy line
+ [
+ $type,
+ $address,
+ $port,
+ $username,
+ $password
+ ] = explode(":", $ip, 5);
+
+ switch($type){
+
+ case "raw_ip":
+ return;
+ break;
+
+ case "http":
+ case "https":
+ curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
+ curl_setopt($curlproc, CURLOPT_PROXY, $type . "://" . $address . ":" . $port);
+ break;
+
+ case "socks4":
+ curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4);
+ curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
+ break;
+
+ case "socks5":
+ curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
+ curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
+ break;
+
+ case "socks4a":
+ curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
+ curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
+ break;
+
+ case "socks5_hostname":
+ case "socks5h":
+ case "socks5a":
+ curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
+ curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
+ break;
+ }
+
+ if($username != ""){
+
+ curl_setopt($curlproc, CURLOPT_PROXYUSERPWD, $username . ":" . $password);
+ }
+ }
+
+
+
+ /*
+ Next page stuff
+ */
+ public function store(string $payload, string $page, string $proxy){
+
+ $key = sodium_crypto_secretbox_keygen();
+ $nonce = random_bytes(SODIUM_CRYPTO_SECRETBOX_NONCEBYTES);
+
+ $requestid = apcu_inc("requestid");
+
+ apcu_store(
+ $page[0] . "." . // first letter of page name
+ $this->scraper . // scraper name
+ $requestid,
+ [
+ $nonce,
+ $proxy,
+ // compress and encrypt
+ sodium_crypto_secretbox(
+ gzdeflate($payload),
+ $nonce,
+ $key
+ )
+ ],
+ 900 // cache information for 15 minutes
+ );
+
+ return
+ $this->scraper . $requestid . "." .
+ rtrim(strtr(base64_encode($key), '+/', '-_'), '=');
+ }
+
+ public function get(string $npt, string $page){
+
+ $page = $page[0];
+ $explode = explode(".", $npt, 2);
+
+ if(count($explode) !== 2){
+
+ throw new Exception("Malformed nextPageToken!");
+ }
+
+ $apcu = $page . "." . $explode[0];
+ $key = $explode[1];
+
+ $payload = apcu_fetch($apcu);
+
+ if($payload === false){
+
+ throw new Exception("The next page token is invalid or has expired!");
+ }
+
+ $key =
+ base64_decode(
+ str_pad(
+ strtr($key, '-_', '+/'),
+ strlen($key) % 4,
+ '=',
+ STR_PAD_RIGHT
+ )
+ );
+
+ // decrypt and decompress data
+ $payload[2] =
+ gzinflate(
+ sodium_crypto_secretbox_open(
+ $payload[2], // data
+ $payload[0], // nonce
+ $key
+ )
+ );
+
+ if($payload[2] === false){
+
+ throw new Exception("The next page token is invalid or has expired!");
+ }
+
+ // remove the key after using successfully
+ apcu_delete($apcu);
+
+ return [
+ $payload[2], // data
+ $payload[1] // proxy
+ ];
+ }
+}
diff --git a/lib/bingcache-todo-fix.php b/lib/bingcache-todo-fix.php
new file mode 100644
index 0000000..a4acb5b
--- /dev/null
+++ b/lib/bingcache-todo-fix.php
@@ -0,0 +1,144 @@
+<?php
+
+// https://www.bing.com/search?q=url%3Ahttps%3A%2F%2Flolcat.ca
+// https://cc.bingj.com/cache.aspx?q=url%3ahttps%3a%2f%2flolcat.ca&d=4769685974291356&mkt=en-CA&setlang=en-US&w=tEsWuE7HW3Z5AIPQMVkDH4WaotS4LrK-
+// <div class="b_attribution" u="0N|5119|4769685974291356|tEsWuE7HW3Z5AIPQMVkDH4WaotS4LrK-" tabindex="0">
+
+new bingcache();
+
+class bingcache{
+
+ public function __construct(){
+
+ if(
+ !isset($_GET["s"]) ||
+ $this->validate_url($_GET["s"]) === false
+ ){
+
+ var_dump($this->validate_url($_GET["s"]));
+ $this->do404("Please provide a valid URL.");
+ }
+
+ $url = $_GET["s"];
+
+ $curlproc = curl_init();
+
+ curl_setopt(
+ $curlproc,
+ CURLOPT_URL,
+ "https://www.bing.com/search?q=url%3A" .
+ urlencode($url)
+ );
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt(
+ $curlproc,
+ CURLOPT_HTTPHEADER,
+ ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 5);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ $this->do404("Failed to connect to bing servers. Please try again later.");
+ }
+
+ curl_close($curlproc);
+
+ preg_match(
+ '/<div class="b_attribution" u="(.*)" tabindex="0">/',
+ $data,
+ $keys
+ );
+
+ print_r($keys);
+
+ if(count($keys) === 0){
+
+ $this->do404("Bing has not archived this URL.");
+ }
+
+ $keys = explode("|", $keys[1]);
+ $count = count($keys);
+
+ //header("Location: https://cc.bingj.com/cache.aspx?d=" . $keys[$count - 2] . "&w=" . $keys[$count - 1]);
+ echo("Location: https://cc.bingj.com/cache.aspx?d=" . $keys[$count - 2] . "&w=" . $keys[$count - 1]);
+ }
+
+ public function do404($text){
+
+ include "lib/frontend.php";
+ $frontend = new frontend();
+
+ echo
+ $frontend->load(
+ "error.html",
+ [
+ "title" => "Shit",
+ "text" => $text
+ ]
+ );
+
+ die();
+ }
+
+ public function validate_url($url){
+
+ $url_parts = parse_url($url);
+
+ // check if required parts are there
+ if(
+ !isset($url_parts["scheme"]) ||
+ !(
+ $url_parts["scheme"] == "http" ||
+ $url_parts["scheme"] == "https"
+ ) ||
+ !isset($url_parts["host"])
+ ){
+ return false;
+ }
+
+ if(
+ // if its not an RFC-valid URL
+ !filter_var($url, FILTER_VALIDATE_URL)
+ ){
+ return false;
+ }
+
+ $ip =
+ str_replace(
+ ["[", "]"], // handle ipv6
+ "",
+ $url_parts["host"]
+ );
+
+ // if its not an IP
+ if(!filter_var($ip, FILTER_VALIDATE_IP)){
+
+ // resolve domain's IP
+ $ip = gethostbyname($url_parts["host"] . ".");
+ }
+
+ // check if its localhost
+ return filter_var(
+ $ip,
+ FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
+ );
+ }
+}
diff --git a/lib/bot_protection.php b/lib/bot_protection.php
new file mode 100644
index 0000000..e3d51a8
--- /dev/null
+++ b/lib/bot_protection.php
@@ -0,0 +1,281 @@
+<?php
+
+class bot_protection{
+
+ public function __construct($frontend, $get, $filters, $page, $output){
+
+ // check if we want captcha
+ if(config::BOT_PROTECTION !== 1){
+
+ apcu_inc("real_requests");
+ if($output === true){
+ $frontend->loadheader(
+ $get,
+ $filters,
+ $page
+ );
+ }
+ return;
+ }
+
+ /*
+ Validate cookie, if it exists
+ */
+ if(isset($_COOKIE["pass"])){
+
+ if(
+ // check if key is not malformed
+ preg_match(
+ '/^k[0-9]+\.[A-Za-z0-9_]{20}$/',
+ $_COOKIE["pass"]
+ ) &&
+ // does key exist
+ apcu_exists($_COOKIE["pass"])
+ ){
+
+ // exists, increment counter
+ $inc = apcu_inc($_COOKIE["pass"]);
+
+ // we start counting from 1
+ // when it has been incremented to 102, it has reached
+ // 100 reqs
+ if($inc >= config::MAX_SEARCHES + 2){
+
+ // reached limit, delete and give captcha
+ apcu_delete($_COOKIE["pass"]);
+ }else{
+
+ // the cookie is OK! dont die() and give results
+ apcu_inc("real_requests");
+
+ if($output === true){
+ $frontend->loadheader(
+ $get,
+ $filters,
+ $page
+ );
+ }
+ return;
+ }
+ }
+ }
+
+ if($output === false){
+
+ http_response_code(401); // forbidden
+ echo json_encode([
+ "status" => "The \"pass\" token in your cookies is missing or has expired!!"
+ ]);
+ die();
+ }
+
+ /*
+ Validate form data
+ */
+ $lines =
+ explode(
+ "\r\n",
+ file_get_contents("php://input")
+ );
+
+ $invalid = false;
+ $answers = [];
+ $key = false;
+ $error = "";
+
+ foreach($lines as $line){
+
+ $line = explode("=", $line, 2);
+
+ if(count($line) !== 2){
+
+ $invalid = true;
+ break;
+ }
+
+ preg_match(
+ '/^c\[([0-9]+)\]$/',
+ $line[0],
+ $regex
+ );
+
+ if(
+ $line[1] != "on" ||
+ !isset($regex[0][1])
+ ){
+
+ // check if its the v key
+ if(
+ $line[0] == "v" &&
+ preg_match(
+ '/^c[0-9]+\.[A-Za-z0-9_]{20}$/',
+ $line[1]
+ )
+ ){
+
+ $key = apcu_fetch($line[1]);
+ apcu_delete($line[1]);
+ }
+ break;
+ }
+
+ $regex = (int)$regex[1];
+
+ if(
+ $regex >= 16 ||
+ $regex <= -1
+ ){
+
+ $invalid = true;
+ break;
+ }
+
+ $answers[] = $regex;
+ }
+
+ if(
+ !$invalid &&
+ $key !== false // has captcha been gen'd?
+ ){
+ $check = count($key);
+
+ // validate answer
+ for($i=0; $i<count($answers); $i++){
+
+ if(in_array($answers[$i], $key)){
+
+ $check--;
+ }else{
+
+ $check = -1;
+ break;
+ }
+ }
+
+ if($check === 0){
+
+ // we passed the captcha
+ // set cookie
+ $inc = apcu_inc("cookie");
+
+ $key = "k" . $inc . "." . $this->randomchars();
+
+ apcu_inc($key, 1, $stupid, 86400);
+
+ apcu_inc("real_requests");
+
+ setcookie(
+ "pass",
+ $key,
+ [
+ "expires" => time() + 86400, // expires in 24 hours
+ "samesite" => "Lax",
+ "path" => "/"
+ ]
+ );
+
+ $frontend->loadheader(
+ $get,
+ $filters,
+ $page
+ );
+ return;
+
+ }else{
+
+ $error = "<div class=\"quote\">You were <a href=\"https://www.youtube.com/watch?v=e1d7fkQx2rk\" target=\"_BLANK\" rel=\"noreferrer nofollow\">kicked out of Mensa.</a> Please try again.</div>";
+ }
+ }
+
+ $key = "c" . apcu_inc("captcha_gen", 1) . "." . $this->randomchars();
+
+ $payload = [
+ "timetaken" => microtime(true),
+ "class" => "",
+ "right-left" => "",
+ "right-right" => "",
+ "left" =>
+ '<div class="infobox">' .
+ '<h1>IQ test</h1>' .
+ 'IQ test has been enabled due to bot abuse on the network.<br>' .
+ 'Solving this IQ test will let you make 100 searches today. I will add an invite system to bypass this soon...' .
+ $error .
+ '<form method="POST" enctype="text/plain" autocomplete="off">' .
+ '<div class="captcha-wrapper">' .
+ '<div class="captcha">' .
+ '<img src="captcha?v=' . $key . '" alt="Captcha image">' .
+ '<div class="captcha-controls">' .
+ '<input type="checkbox" name="c[0]" id="c0">' .
+ '<label for="c0"></label>' .
+ '<input type="checkbox" name="c[1]" id="c1">' .
+ '<label for="c1"></label>' .
+ '<input type="checkbox" name="c[2]" id="c2">' .
+ '<label for="c2"></label>' .
+ '<input type="checkbox" name="c[3]" id="c3">' .
+ '<label for="c3"></label>' .
+ '<input type="checkbox" name="c[4]" id="c4">' .
+ '<label for="c4"></label>' .
+ '<input type="checkbox" name="c[5]" id="c5">' .
+ '<label for="c5"></label>' .
+ '<input type="checkbox" name="c[6]" id="c6">' .
+ '<label for="c6"></label>' .
+ '<input type="checkbox" name="c[7]" id="c7">' .
+ '<label for="c7"></label>' .
+ '<input type="checkbox" name="c[8]" id="c8">' .
+ '<label for="c8"></label>' .
+ '<input type="checkbox" name="c[9]" id="c9">' .
+ '<label for="c9"></label>' .
+ '<input type="checkbox" name="c[10]" id="c10">' .
+ '<label for="c10"></label>' .
+ '<input type="checkbox" name="c[11]" id="c11">' .
+ '<label for="c11"></label>' .
+ '<input type="checkbox" name="c[12]" id="c12">' .
+ '<label for="c12"></label>' .
+ '<input type="checkbox" name="c[13]" id="c13">' .
+ '<label for="c13"></label>' .
+ '<input type="checkbox" name="c[14]" id="c14">' .
+ '<label for="c14"></label>' .
+ '<input type="checkbox" name="c[15]" id="c15">' .
+ '<label for="c15"></label>' .
+ '</div>' .
+ '</div>' .
+ '</div>' .
+ '<input type="hidden" name="v" value="' . $key . '">' .
+ '<input type="submit" value="Check IQ" class="captcha-submit">' .
+ '</form>' .
+ '</div>'
+ ];
+
+ $frontend->loadheader(
+ $get,
+ $filters,
+ $page
+ );
+
+ echo $frontend->load("search.html", $payload);
+ die();
+ }
+
+ private function randomchars(){
+
+ $chars =
+ array_merge(
+ range("A", "Z"),
+ range("a", "z"),
+ range(0, 9)
+ );
+
+ $chars[] = "_";
+
+ $c = count($chars) - 1;
+
+ $key = "";
+
+ for($i=0; $i<20; $i++){
+
+ $key .= $chars[random_int(0, $c)];
+ }
+
+ return $key;
+ }
+}
diff --git a/lib/curlproxy.php b/lib/curlproxy.php
new file mode 100644
index 0000000..313ab01
--- /dev/null
+++ b/lib/curlproxy.php
@@ -0,0 +1,660 @@
+<?php
+
+class proxy{
+
+ public const req_web = 0;
+ public const req_image = 1;
+
+ public function __construct($cache = true){
+
+ $this->cache = $cache;
+ }
+
+ public function do404(){
+
+ http_response_code(404);
+ header("Content-Type: image/png");
+
+ $handle = fopen("lib/img404.png", "r");
+ echo fread($handle, filesize("lib/img404.png"));
+ fclose($handle);
+
+ die();
+ return;
+ }
+
+ public function getabsoluteurl($path, $relative){
+
+ if($this->validateurl($path)){
+
+ return $path;
+ }
+
+ if(substr($path, 0, 2) == "//"){
+
+ return "https:" . $path;
+ }
+
+ $url = null;
+
+ $relative = parse_url($relative);
+ $url = $relative["scheme"] . "://";
+
+ if(
+ isset($relative["user"]) &&
+ isset($relative["pass"])
+ ){
+
+ $url .= $relative["user"] . ":" . $relative["pass"] . "@";
+ }
+
+ $url .= $relative["host"];
+
+ if(isset($relative["path"])){
+
+ $relative["path"] = explode(
+ "/",
+ $relative["path"]
+ );
+
+ unset($relative["path"][count($relative["path"]) - 1]);
+ $relative["path"] = implode("/", $relative["path"]);
+
+ $url .= $relative["path"];
+ }
+
+ if(
+ strlen($path) !== 0 &&
+ $path[0] !== "/"
+ ){
+
+ $url .= "/";
+ }
+
+ $url .= $path;
+
+ return $url;
+ }
+
+ public function validateurl($url){
+
+ $url_parts = parse_url($url);
+
+ // check if required parts are there
+ if(
+ !isset($url_parts["scheme"]) ||
+ !(
+ $url_parts["scheme"] == "http" ||
+ $url_parts["scheme"] == "https"
+ ) ||
+ !isset($url_parts["host"])
+ ){
+ return false;
+ }
+
+ $ip =
+ str_replace(
+ ["[", "]"], // handle ipv6
+ "",
+ $url_parts["host"]
+ );
+
+ // if its not an IP
+ if(!filter_var($ip, FILTER_VALIDATE_IP)){
+
+ // resolve domain's IP
+ $ip = gethostbyname($url_parts["host"] . ".");
+ }
+
+ // check if its localhost
+ if(
+ filter_var(
+ $ip,
+ FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
+ ) === false
+ ){
+
+ return false;
+ }
+
+ return true;
+ }
+
+ public function get($url, $reqtype = self::req_web, $acceptallcodes = false, $referer = null, $redirectcount = 0){
+
+ if($redirectcount === 5){
+
+ throw new Exception("Too many redirects");
+ }
+
+ if($url == "https://i.imgur.com/removed.png"){
+
+ throw new Exception("Encountered imgur 404");
+ }
+
+ // sanitize URL
+ if($this->validateurl($url) === false){
+
+ throw new Exception("Invalid URL");
+ }
+
+ $this->clientcache();
+
+ $curl = curl_init();
+
+ curl_setopt($curl, CURLOPT_URL, $url);
+ curl_setopt($curl, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curl, CURLOPT_HEADER, 1);
+
+ switch($reqtype){
+ case self::req_web:
+ curl_setopt(
+ $curl,
+ CURLOPT_HTTPHEADER,
+ [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"
+ ]
+ );
+ break;
+
+ case self::req_image:
+
+ if($referer === null){
+ $referer = explode("/", $url, 4);
+ array_pop($referer);
+
+ $referer = implode("/", $referer);
+ }
+
+ curl_setopt(
+ $curl,
+ CURLOPT_HTTPHEADER,
+ [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: image/avif,image/webp,*/*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Referer: {$referer}"
+ ]
+ );
+ break;
+ }
+
+ curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curl, CURLOPT_TIMEOUT, 30);
+
+ // limit size of payloads
+ curl_setopt($curl, CURLOPT_BUFFERSIZE, 1024);
+ curl_setopt($curl, CURLOPT_NOPROGRESS, false);
+ curl_setopt(
+ $curl,
+ CURLOPT_PROGRESSFUNCTION,
+ function($downloadsize, $downloaded, $uploadsize, $uploaded
+ ){
+
+ // if $downloaded exceeds 100MB, fuck off
+ return ($downloaded > 100000000) ? 1 : 0;
+ });
+
+ $body = curl_exec($curl);
+
+ if(curl_errno($curl)){
+
+ throw new Exception(curl_error($curl));
+ }
+
+ curl_close($curl);
+
+ $headers = [];
+ $http = null;
+
+ while(true){
+
+ $header = explode("\n", $body, 2);
+ $body = $header[1];
+
+ if($http === null){
+
+ // http/1.1 200 ok
+ $header = explode("/", $header[0], 2);
+ $header = explode(" ", $header[1], 3);
+
+ $http = [
+ "version" => (float)$header[0],
+ "code" => (int)$header[1]
+ ];
+
+ continue;
+ }
+
+ if(trim($header[0]) == ""){
+
+ // reached end of headers
+ break;
+ }
+
+ $header = explode(":", $header[0], 2);
+
+ // malformed headers
+ if(count($header) !== 2){ continue; }
+
+ $headers[strtolower(trim($header[0]))] = trim($header[1]);
+ }
+
+ // check http code
+ if(
+ $http["code"] >= 300 &&
+ $http["code"] <= 309
+ ){
+
+ // redirect
+ if(!isset($headers["location"])){
+
+ throw new Exception("Broken redirect");
+ }
+
+ $redirectcount++;
+
+ return $this->get($this->getabsoluteurl($headers["location"], $url), $reqtype, $acceptallcodes, $referer, $redirectcount);
+ }else{
+ if(
+ $acceptallcodes === false &&
+ $http["code"] > 300
+ ){
+
+ throw new Exception("Remote server returned an error code! ({$http["code"]})");
+ }
+ }
+
+ // check if data is okay
+ switch($reqtype){
+
+ case self::req_image:
+
+ $format = false;
+
+ if(isset($headers["content-type"])){
+
+ if(stripos($headers["content-type"], "text/html") !== false){
+
+ throw new Exception("Server returned html");
+ }
+
+ if(
+ preg_match(
+ '/image\/([^ ]+)/i',
+ $headers["content-type"],
+ $match
+ )
+ ){
+
+ $format = strtolower($match[1]);
+
+ if(substr(strtolower($format), 0, 2) == "x-"){
+
+ $format = substr($format, 2);
+ }
+ }
+ }
+
+ return [
+ "http" => $http,
+ "format" => $format,
+ "headers" => $headers,
+ "body" => $body
+ ];
+ break;
+
+ default:
+
+ return [
+ "http" => $http,
+ "headers" => $headers,
+ "body" => $body
+ ];
+ break;
+ }
+
+ return;
+ }
+
+ public function stream_linear_image($url, $referer = null){
+
+ $this->stream($url, $referer, "image");
+ }
+
+ public function stream_linear_audio($url, $referer = null){
+
+ $this->stream($url, $referer, "audio");
+ }
+
+ private function stream($url, $referer, $format){
+
+ $this->clientcache();
+
+ $this->url = $url;
+ $this->format = $format;
+
+ // sanitize URL
+ if($this->validateurl($url) === false){
+
+ throw new Exception("Invalid URL");
+ }
+
+ $curl = curl_init();
+
+ // set headers
+ if($referer === null){
+ $referer = explode("/", $url, 4);
+ array_pop($referer);
+
+ $referer = implode("/", $referer);
+ }
+
+ switch($format){
+
+ case "image":
+ curl_setopt(
+ $curl,
+ CURLOPT_HTTPHEADER,
+ [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: image/avif,image/webp,*/*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Referer: {$referer}"
+ ]
+ );
+ break;
+
+ case "audio":
+ curl_setopt(
+ $curl,
+ CURLOPT_HTTPHEADER,
+ [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Referer: {$referer}"
+ ]
+ );
+ break;
+ }
+
+ // follow redirects
+ curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
+ curl_setopt($curl, CURLOPT_MAXREDIRS, 5);
+ curl_setopt($curl, CURLOPT_AUTOREFERER, 5);
+
+ // set url
+ curl_setopt($curl, CURLOPT_URL, $url);
+ curl_setopt($curl, CURLOPT_ENCODING, ""); // default encoding
+
+ // timeout + disable ssl
+ curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10);
+ curl_setopt($curl, CURLOPT_TIMEOUT, 30);
+
+ curl_setopt(
+ $curl,
+ CURLOPT_WRITEFUNCTION,
+ function($c, $data){
+
+ if(curl_getinfo($c, CURLINFO_HTTP_CODE) !== 200){
+
+ throw new Exception("Serber returned a non-200 code");
+ }
+
+ echo $data;
+ return strlen($data);
+ }
+ );
+
+ $this->empty_header = false;
+ $this->cont = false;
+ $this->headers_tmp = [];
+ $this->headers = [];
+ curl_setopt(
+ $curl,
+ CURLOPT_HEADERFUNCTION,
+ function($c, $header){
+
+ $head = trim($header);
+ $len = strlen($head);
+
+ if($len === 0){
+
+ $this->empty_header = true;
+ $this->headers_tmp = [];
+ }else{
+
+ $this->empty_header = false;
+ $this->headers_tmp[] = $head;
+ }
+
+ foreach($this->headers_tmp as $h){
+
+ // parse headers
+ $h = explode(":", $h, 2);
+
+ if(count($h) !== 2){
+
+ if(curl_getinfo($c, CURLINFO_HTTP_CODE) !== 200){
+
+ // not HTTP 200, probably a redirect
+ $this->cont = false;
+ }else{
+
+ $this->cont = true;
+ }
+
+ // is HTTP 200, just ignore that line
+ continue;
+ }
+
+ $this->headers[strtolower(trim($h[0]))] = trim($h[1]);
+ }
+
+ if(
+ $this->cont &&
+ $this->empty_header
+ ){
+
+ // get content type
+ if(isset($this->headers["content-type"])){
+
+ $octet_check = stripos($this->headers["content-type"], "octet-stream");
+
+ if(
+ stripos($this->headers["content-type"], $this->format) === false &&
+ $octet_check === false
+ ){
+
+ throw new Exception("Resource reported invalid Content-Type");
+ }
+
+ }else{
+
+ throw new Exception("Resource is not an {$this->format} (no Content-Type)");
+ }
+
+ $filetype = explode("/", $this->headers["content-type"]);
+
+ if(!isset($filetype[1])){
+
+ throw new Exception("Malformed Content-Type header");
+ }
+
+ if($octet_check !== false){
+
+ $filetype[1] = "jpeg";
+ }
+
+ header("Content-Type: {$this->format}/{$filetype[1]}");
+
+ // give payload size
+ if(isset($this->headers["content-length"])){
+
+ header("Content-Length: {$this->headers["content-length"]}");
+ }
+
+ // give filename
+ $this->getfilenameheader($this->headers, $this->url, $filetype[1]);
+ }
+
+ return strlen($header);
+ }
+ );
+
+ curl_exec($curl);
+
+ if(curl_errno($curl)){
+
+ throw new Exception(curl_error($curl));
+ }
+
+ curl_close($curl);
+ }
+
+ public function getfilenameheader($headers, $url, $filetype = "jpg"){
+
+ // get filename from content-disposition header
+ if(isset($headers["content-disposition"])){
+
+ preg_match(
+ '/filename=([^;]+)/',
+ $headers["content-disposition"],
+ $filename
+ );
+
+ if(isset($filename[1])){
+
+ header("Content-Disposition: filename=\"" . trim($filename[1], "\"'") . "." . $filetype . "\"");
+ return;
+ }
+ }
+
+ // get filename from URL
+ $filename = parse_url($url, PHP_URL_PATH);
+
+ if($filename === null){
+
+ // everything failed! rename file to domain name
+ header("Content-Disposition: filename=\"" . parse_url($url, PHP_URL_HOST) . "." . $filetype . "\"");
+ return;
+ }
+
+ // remove extension from filename
+ $filename =
+ explode(
+ ".",
+ basename($filename)
+ );
+
+ if(count($filename) > 1){
+ array_pop($filename);
+ }
+
+ $filename = implode(".", $filename);
+
+ header("Content-Disposition: inline; filename=\"" . $filename . "." . $filetype . "\"");
+ return;
+ }
+
+ public function getimageformat($payload, &$imagick){
+
+ $finfo = new finfo(FILEINFO_MIME_TYPE);
+ $format = $finfo->buffer($payload["body"]);
+
+ if($format === false){
+
+ if($payload["format"] === false){
+
+ header("X-Error: Could not parse format");
+ $this->favicon404();
+ }
+
+ $format = $payload["format"];
+ }else{
+
+ $format_tmp = explode("/", $format, 2);
+
+ if($format_tmp[0] == "image"){
+
+ $format_tmp = strtolower($format_tmp[1]);
+
+ if(substr($format_tmp, 0, 2) == "x-"){
+
+ $format_tmp = substr($format_tmp, 2);
+ }
+
+ $format = $format_tmp;
+ }
+ }
+
+ switch($format){
+
+ case "tiff": $format = "gif"; break;
+ case "vnd.microsoft.icon": $format = "ico"; break;
+ case "icon": $format = "ico"; break;
+ case "svg+xml": $format = "svg"; break;
+ }
+
+ $imagick = new Imagick();
+
+ if(
+ !in_array(
+ $format,
+ array_map("strtolower", $imagick->queryFormats())
+ )
+ ){
+
+ // format could not be found, but imagemagick can
+ // sometimes detect it? shit's fucked
+ $format = false;
+ }
+
+ return $format;
+ }
+
+ public function clientcache(){
+
+ if($this->cache === false){
+
+ return;
+ }
+
+ header("Last-Modified: Thu, 01 Oct 1970 00:00:00 GMT");
+ $headers = getallheaders();
+
+ if(
+ isset($headers["If-Modified-Since"]) ||
+ isset($headers["If-Unmodified-Since"])
+ ){
+
+ http_response_code(304); // 304: Not Modified
+ die();
+ }
+ }
+}
diff --git a/lib/favicon404.png b/lib/favicon404.png
new file mode 100644
index 0000000..fa8f4d1
--- /dev/null
+++ b/lib/favicon404.png
Binary files differ
diff --git a/lib/frontend.php b/lib/frontend.php
new file mode 100644
index 0000000..9f819ba
--- /dev/null
+++ b/lib/frontend.php
@@ -0,0 +1,1356 @@
+<?php
+
+class frontend{
+
+ public function load($template, $replacements = []){
+
+ $replacements["server_name"] = htmlspecialchars(config::SERVER_NAME);
+ $replacements["version"] = config::VERSION;
+
+ if(isset($_COOKIE["theme"])){
+
+ $theme = str_replace(["/". "."], "", $_COOKIE["theme"]);
+
+ if(
+ $theme != "Dark" &&
+ !is_file("static/themes/" . $theme . ".css")
+ ){
+
+ $theme = config::DEFAULT_THEME;
+ }
+ }else{
+
+ $theme = config::DEFAULT_THEME;
+ }
+
+ if($theme != "Dark"){
+
+ $replacements["style"] = '<link rel="stylesheet" href="/static/themes/' . rawurlencode($theme) . '.css?v' . config::VERSION . '">';
+ }else{
+
+ $replacements["style"] = "";
+ }
+
+ if(isset($_COOKIE["scraper_ac"])){
+
+ $replacements["ac"] = '?ac=' . htmlspecialchars($_COOKIE["scraper_ac"]);
+ }else{
+
+ $replacements["ac"] = '';
+ }
+
+ if(
+ isset($replacements["timetaken"]) &&
+ $replacements["timetaken"] !== null
+ ){
+
+ $replacements["timetaken"] = '<div class="timetaken">Took ' . number_format(microtime(true) - $replacements["timetaken"], 2) . 's</div>';
+ }
+
+ $handle = fopen("template/{$template}", "r");
+ $data = fread($handle, filesize("template/{$template}"));
+ fclose($handle);
+
+ $data = explode("\n", $data);
+ $html = "";
+
+ for($i=0; $i<count($data); $i++){
+
+ $html .= trim($data[$i]);
+ }
+
+ foreach($replacements as $key => $value){
+
+ $html =
+ str_replace(
+ "{%{$key}%}",
+ $value,
+ $html
+ );
+ }
+
+ return trim($html);
+ }
+
+ public function loadheader(array $get, array $filters, string $page){
+
+ echo
+ $this->load("header.html", [
+ "title" => trim(htmlspecialchars($get["s"]) . " ({$page})"),
+ "description" => ucfirst($page) . ' search results for &quot;' . htmlspecialchars($get["s"]) . '&quot;',
+ "index" => "no",
+ "search" => htmlspecialchars($get["s"]),
+ "tabs" => $this->generatehtmltabs($page, $get["s"]),
+ "filters" => $this->generatehtmlfilters($filters, $get)
+ ]);
+
+ $headers_raw = getallheaders();
+ $header_keys = [];
+ $user_agent = "";
+ $bad_header = false;
+
+ // block bots that present X-Forwarded-For, Via, etc
+ foreach($headers_raw as $headerkey => $headervalue){
+
+ $headerkey = strtolower($headerkey);
+ if($headerkey == "user-agent"){
+
+ $user_agent = $headervalue;
+ continue;
+ }
+
+ // check header key
+ if(in_array($headerkey, config::FILTERED_HEADER_KEYS)){
+
+ $bad_header = true;
+ break;
+ }
+ }
+
+ // SSL check
+ $bad_ssl = false;
+ if(
+ isset($_SERVER["https"]) &&
+ $_SERVER["https"] == "on" &&
+ isset($_SERVER["SSL_CIPHER"]) &&
+ in_array($_SERVER["SSL_CIPHER"], config::FILTERED_HEADER_KEYS)
+ ){
+
+ $bad_ssl = true;
+ }
+
+ if(
+ $bad_header === true ||
+ $bad_ssl === true ||
+ $user_agent == "" ||
+ // user agent check
+ preg_match(
+ config::HEADER_REGEX,
+ $user_agent
+ )
+ ){
+
+ // bot detected !!
+ apcu_inc("captcha_gen");
+
+ $this->drawerror(
+ "Tshh, blocked!",
+ 'Your browser, IP or IP range has been blocked from this 4get instance. If this is an error, please <a href="/about">contact the administrator</a>.'
+ );
+ die();
+ }
+ }
+
+ public function drawerror($title, $error, $timetaken = null){
+
+ if($timetaken === null){
+
+ $timetaken = microtime(true);
+ }
+
+ echo
+ $this->load("search.html", [
+ "timetaken" => $timetaken,
+ "class" => "",
+ "right-left" => "",
+ "right-right" => "",
+ "left" =>
+ '<div class="infobox">' .
+ '<h1>' . htmlspecialchars($title) . '</h1>' .
+ $error .
+ '</div>'
+ ]);
+ die();
+ }
+
+ public function drawscrapererror($error, $get, $target, $timetaken = null){
+
+ if($timetaken === null){
+
+ $timetaken = microtime(true);
+ }
+
+ $this->drawerror(
+ "Shit",
+ 'This scraper returned an error:' .
+ '<div class="code">' . htmlspecialchars($error) . '</div>' .
+ 'Things you can try:' .
+ '<ul>' .
+ '<li>Use a different scraper</li>' .
+ '<li>Remove keywords that could cause errors</li>' .
+ '<li><a href="/instances?target=' . $target . "&" . $this->buildquery($get, false) . '">Try your search on another 4get instance</a></li>' .
+ '</ul><br>' .
+ 'If the error persists, please <a href="/about">contact the administrator</a>.',
+ $timetaken
+ );
+ }
+
+ public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true, $customhtml = null){
+
+ $payload =
+ '<div class="text-result">';
+
+ // add favicon, link and archive links
+ $payload .= $this->drawlink($site["url"]);
+
+ /*
+ Draw title + description + filetype
+ */
+ $payload .=
+ '<a href="' . htmlspecialchars($site["url"]) . '" class="hover" rel="noreferrer nofollow"';
+
+ if($tabindex === false){
+
+ $payload .= ' tabindex="-1"';
+ }
+
+ $payload .= '>';
+
+ if($site["thumb"]["url"] !== null){
+
+ $payload .=
+ '<div class="thumb-wrap';
+
+ switch($site["thumb"]["ratio"]){
+
+ case "16:9":
+ $size = "landscape";
+ break;
+
+ case "9:16":
+ $payload .= " portrait";
+ $size = "portrait";
+ break;
+
+ case "1:1":
+ $payload .= " square";
+ $size = "square";
+ break;
+ }
+
+ $payload .=
+ '">' .
+ '<img class="thumb" src="' . $this->htmlimage($site["thumb"]["url"], $size) . '" alt="thumb">';
+
+ if($duration !== null){
+
+ $payload .=
+ '<div class="duration">' .
+ htmlspecialchars($duration) .
+ '</div>';
+ }
+
+ $payload .=
+ '</div>';
+ }
+
+ $payload .=
+ '<div class="title">';
+
+ if(
+ isset($site["type"]) &&
+ $site["type"] != "web"
+ ){
+
+ $payload .= '<div class="type">' . strtoupper($site["type"]) . '</div>';
+ }
+
+ $payload .=
+ $this->highlighttext($keywords, $site["title"]) .
+ '</div>';
+
+ if($greentext !== null){
+
+ $payload .=
+ '<div class="greentext">' .
+ htmlspecialchars($greentext) .
+ '</div>';
+ }
+
+ if($site["description"] !== null){
+
+ $payload .=
+ '<div class="description">' .
+ $this->highlighttext($keywords, $site["description"]) .
+ '</div>';
+ }
+
+ $payload .= $customhtml;
+
+ $payload .= '</a>';
+
+ /*
+ Sublinks
+ */
+ if(
+ isset($site["sublink"]) &&
+ !empty($site["sublink"])
+ ){
+
+ usort($site["sublink"], function($a, $b){
+
+ return strlen($a["description"]) > strlen($b["description"]);
+ });
+
+ $payload .=
+ '<div class="sublinks">' .
+ '<table>';
+
+ $opentr = false;
+ for($i=0; $i<count($site["sublink"]); $i++){
+
+ if(($i % 2) === 0){
+
+ $opentr = true;
+ $payload .= '<tr>';
+ }else{
+
+ $opentr = false;
+ }
+
+ $payload .=
+ '<td>' .
+ '<a href="' . htmlspecialchars($site["sublink"][$i]["url"]) . '" rel="noreferrer nofollow">' .
+ '<div class="title">' .
+ htmlspecialchars($site["sublink"][$i]["title"]) .
+ '</div>';
+
+ if(!empty($site["sublink"][$i]["date"])){
+
+ $payload .=
+ '<div class="greentext">' .
+ date("jS M y @ g:ia", $site["sublink"][$i]["date"]) .
+ '</div>';
+ }
+
+ if(!empty($site["sublink"][$i]["description"])){
+
+ $payload .=
+ '<div class="description">' .
+ $this->highlighttext($keywords, $site["sublink"][$i]["description"]) .
+ '</div>';
+ }
+
+ $payload .= '</a></td>';
+
+ if($opentr === false){
+
+ $payload .= '</tr>';
+ }
+ }
+
+ if($opentr === true){
+
+ $payload .= '<td></td></tr>';
+ }
+
+ $payload .= '</table></div>';
+ }
+
+ if(
+ isset($site["table"]) &&
+ !empty($site["table"])
+ ){
+
+ $payload .= '<table class="info-table">';
+
+ foreach($site["table"] as $title => $value){
+
+ $payload .=
+ '<tr>' .
+ '<td>' . htmlspecialchars($title) . '</td>' .
+ '<td>' . htmlspecialchars($value) . '</td>' .
+ '</tr>';
+ }
+
+ $payload .= '</table>';
+ }
+
+ return $payload . '</div>';
+ }
+
+ public function highlighttext($keywords, $text){
+
+ $text = htmlspecialchars($text);
+
+ $keywords = explode(" ", $keywords);
+ $regex = [];
+
+ foreach($keywords as $word){
+
+ $regex[] = "\b" . preg_quote($word, "/") . "\b";
+ }
+
+ $regex = "/" . implode("|", $regex) . "/i";
+
+ return
+ preg_replace(
+ $regex,
+ '<b>${0}</b>',
+ $text
+ );
+ }
+
+ function highlightcode($text){
+
+ // https://www.php.net/highlight_string
+ ini_set("highlight.comment", "c-comment");
+ ini_set("highlight.default", "c-default");
+ ini_set("highlight.html", "c-default");
+ ini_set("highlight.keyword", "c-keyword");
+ ini_set("highlight.string", "c-string");
+
+ $text =
+ trim(
+ preg_replace(
+ '/<code [^>]+>/',
+ "",
+ str_replace(
+ [
+ "<br />",
+ "&nbsp;",
+ "<pre>",
+ "</pre>",
+ "</code>"
+ ],
+ [
+ "\n",
+ " ",
+ "",
+ "",
+ ""
+ ],
+ explode(
+ "&lt;?php",
+ highlight_string("<?php " . $text, true),
+ 2
+ )[1]
+ )
+ )
+ );
+
+ // replace colors
+ $classes = ["c-comment", "c-default", "c-keyword", "c-string"];
+
+ foreach($classes as $class){
+
+ $text = str_replace('<span style="color: ' . $class . '">', '<span class="' . $class . '">', $text);
+ }
+
+ return $text;
+ }
+
+ public function drawlink($link){
+
+ /*
+ Add favicon
+ */
+ $host = parse_url($link);
+ $esc =
+ explode(
+ ".",
+ $host["host"],
+ 2
+ );
+
+ if(
+ count($esc) === 2 &&
+ $esc[0] == "www"
+ ){
+
+ $esc = $esc[1];
+ }else{
+
+ $esc = $esc[0];
+ }
+
+ $esc = substr($esc, 0, 2);
+
+ $urlencode = urlencode($link);
+
+ $payload =
+ '<div class="url">' .
+ '<button class="favicon" tabindex="-1">' .
+ '<img src="/favicon?s=' . htmlspecialchars($host["scheme"] . "://" . $host["host"]) . '" alt="' . htmlspecialchars($esc) . '">' .
+ //'<img src="/404.php" alt="' . htmlspecialchars($esc) . '">' .
+ '</button>' .
+ '<div class="favicon-dropdown">';
+
+ /*
+ Add archive links
+ */
+ if(
+ $host["host"] == "boards.4chan.org" ||
+ $host["host"] == "boards.4channel.org"
+ ){
+
+ $archives = [];
+ $path = explode("/", $host["path"]);
+ $count = count($path);
+ // /pol/thread/417568063/post-shitty-memes-if-you-want-to
+
+ if($count !== 0){
+
+ $isboard = true;
+
+ switch($path[1]){
+
+ case "con":
+ break;
+
+ case "q":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "qa":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "qb":
+ $archives[] = "arch.b4k.co";
+ break;
+
+ case "trash":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "a":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "c":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "w":
+ break;
+
+ case "m":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "cgl":
+ $archives[] = "desuarchive.org";
+ $archives[] = "warosu.org";
+ break;
+
+ case "f":
+ $archives[] = "archive.4plebs.org";
+ break;
+
+ case "n":
+ break;
+
+ case "jp":
+ $archives[] = "warosu.org";
+ break;
+
+ case "vt":
+ $archives[] = "warosu.org";
+ break;
+
+ case "v":
+ $archives[] = "arch.b4k.co";
+ break;
+
+ case "vg":
+ $archives[] = "arch.b4k.co";
+ break;
+
+ case "vm":
+ $archives[] = "arch.b4k.co";
+ break;
+
+ case "vmg":
+ $archives[] = "arch.b4k.co";
+ break;
+
+ case "vp":
+ $archives[] = "arch.b4k.co";
+ break;
+
+ case "vr":
+ $archives[] = "desuarchive.org";
+ $archives[] = "warosu.org";
+ break;
+
+ case "vrpg":
+ $archives[] = "arch.b4k.co";
+ break;
+
+ case "vst":
+ $archives[] = "arch.b4k.co";
+ break;
+
+ case "co":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "g":
+ $archives[] = "desuarchive.org";
+ $archives[] = "arch.b4k.co";
+ break;
+
+ case "tv":
+ $archives[] = "archive.4plebs.org";
+ break;
+
+ case "k":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "o":
+ $archives[] = "archive.4plebs.org";
+ break;
+
+ case "an":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "tg":
+ $archives[] = "desuarchive.org";
+ $archives[] = "archive.4plebs.org";
+ break;
+
+ case "sp":
+ $archives[] = "archive.4plebs.org";
+ break;
+
+ case "xs":
+ $archives[] = "eientei.xyz";
+ break;
+
+ case "pw":
+ break;
+
+ case "sci":
+ $archives[] = "warosu.org";
+ $archives[] = "eientei.xyz";
+ break;
+
+ case "his":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "int":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "out":
+ break;
+
+ case "toy":
+ break;
+
+ case "i":
+ $archives[] = "archiveofsins.com";
+ $archives[] = "eientei.xyz";
+ break;
+
+ case "po":
+ break;
+
+ case "p":
+ break;
+
+ case "ck":
+ $archives[] = "warosu.org";
+ break;
+
+ case "ic":
+ $archives[] = "warosu.org";
+ break;
+
+ case "wg":
+ break;
+
+ case "lit":
+ $archives[] = "warosu.org";
+ break;
+
+ case "mu":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "fa":
+ $archives[] = "warosu.org";
+ break;
+
+ case "3":
+ $archives[] = "warosu.org";
+ $archives[] = "eientei.xyz";
+ break;
+
+ case "gd":
+ break;
+
+ case "diy":
+ $archives[] = "warosu.org";
+ break;
+
+ case "wsg":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "qst":
+ break;
+
+ case "biz":
+ $archives[] = "warosu.org";
+ break;
+
+ case "trv":
+ $archives[] = "archive.4plebs.org";
+ break;
+
+ case "fit":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "x":
+ $archives[] = "archive.4plebs.org";
+ break;
+
+ case "adv":
+ $archives[] = "archive.4plebs.org";
+ break;
+
+ case "lgbt":
+ $archives[] = "archiveofsins.com";
+ break;
+
+ case "mlp":
+ $archives[] = "desuarchive.org";
+ $archives[] = "arch.b4k.co";
+ break;
+
+ case "news":
+ break;
+
+ case "wsr":
+ break;
+
+ case "vip":
+ break;
+
+ case "b":
+ $archives[] = "thebarchive.com";
+ break;
+
+ case "r9k":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "pol":
+ $archives[] = "archive.4plebs.org";
+ break;
+
+ case "bant":
+ $archives[] = "thebarchive.com";
+ break;
+
+ case "soc":
+ $archives[] = "archiveofsins.com";
+ break;
+
+ case "s4s":
+ $archives[] = "archive.4plebs.org";
+ break;
+
+ case "s":
+ $archives[] = "archiveofsins.com";
+ break;
+
+ case "hc":
+ $archives[] = "archiveofsins.com";
+ break;
+
+ case "hm":
+ $archives[] = "archiveofsins.com";
+ break;
+
+ case "h":
+ $archives[] = "archiveofsins.com";
+ break;
+
+ case "e":
+ break;
+
+ case "u":
+ $archives[] = "archiveofsins.com";
+ break;
+
+ case "d":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "t":
+ $archives[] = "archiveofsins.com";
+ break;
+
+ case "hr":
+ $archives[] = "archive.4plebs.org";
+ break;
+
+ case "gif":
+ break;
+
+ case "aco":
+ $archives[] = "desuarchive.org";
+ break;
+
+ case "r":
+ $archives[] = "archiveofsins.com";
+ break;
+
+ default:
+ $isboard = false;
+ break;
+ }
+
+ if($isboard === true){
+
+ $archives[] = "archived.moe";
+ }
+
+ $trail = "";
+
+ if(
+ isset($path[2]) &&
+ isset($path[3]) &&
+ $path[2] == "thread"
+ ){
+
+ $trail .= "/" . $path[1] . "/thread/" . $path[3];
+ }elseif($isboard){
+
+ $trail = "/" . $path[1] . "/";
+ }
+
+ for($i=0; $i<count($archives); $i++){
+
+ $payload .=
+ '<a href="https://' . $archives[$i] . $trail . '" class="list" target="_BLANK">' .
+ '<img src="/favicon?s=https://' . $archives[$i] . '" alt="' . $archives[$i][0] . $archives[$i][1] . '">' .
+ $archives[$i] .
+ '</a>';
+ }
+ }
+ }
+
+ $payload .=
+ '<a href="https://web.archive.org/web/' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.org" alt="ar">Archive.org</a>' .
+ '<a href="https://archive.ph/newest/' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.is" alt="ar">Archive.is</a>' .
+ '<a href="https://ghostarchive.org/search?term=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://ghostarchive.org" alt="gh">Ghostarchive</a>' .
+ '<a href="https://arquivo.pt/wayback/' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=https://arquivo.pt" alt="ar">Arquivo.pt</a>' .
+ '<a href="https://www.bing.com/search?q=url%3A' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://bing.com" alt="bi">Bing cache</a>' .
+ '<a href="https://megalodon.jp/?url=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://megalodon.jp" alt="me">Megalodon</a>' .
+ '</div>';
+
+ /*
+ Draw link
+ */
+ $parts = explode("/", $link);
+ $clickurl = "";
+
+ // remove trailing /
+ $c = count($parts) - 1;
+ if($parts[$c] == ""){
+
+ $parts[$c - 1] = $parts[$c - 1] . "/";
+ unset($parts[$c]);
+ }
+
+ // merge https://site together
+ $parts = [
+ $parts[0] . $parts[1] . '//' . $parts[2],
+ ...array_slice($parts, 3, count($parts) - 1)
+ ];
+
+ $c = count($parts);
+ for($i=0; $i<$c; $i++){
+
+ if($i !== 0){ $clickurl .= "/"; }
+
+ $clickurl .= $parts[$i];
+
+ if($i === $c - 1){
+
+ $parts[$i] = rtrim($parts[$i], "/");
+ }
+
+ $payload .=
+ '<a class="part" href="' . htmlspecialchars($clickurl) . '" rel="noreferrer nofollow" tabindex="-1">' .
+ htmlspecialchars(urldecode($parts[$i])) .
+ '</a>';
+
+ if($i !== $c - 1){
+
+ $payload .= '<span class="separator"></span>';
+ }
+ }
+
+ return $payload . '</div>';
+ }
+
+ public function getscraperfilters($page){
+
+ $get_scraper = isset($_COOKIE["scraper_$page"]) ? $_COOKIE["scraper_$page"] : null;
+
+ if(
+ isset($_GET["scraper"]) &&
+ is_string($_GET["scraper"])
+ ){
+
+ $get_scraper = $_GET["scraper"];
+ }else{
+
+ if(
+ isset($_GET["npt"]) &&
+ is_string($_GET["npt"])
+ ){
+
+ $get_scraper = explode(".", $_GET["npt"], 2)[0];
+
+ $get_scraper =
+ preg_replace(
+ '/[0-9]+$/',
+ "",
+ $get_scraper
+ );
+ }
+ }
+
+ // add search field
+ $filters =
+ [
+ "s" => [
+ "option" => "_SEARCH"
+ ]
+ ];
+
+ // define default scrapers
+ switch($page){
+
+ case "web":
+ $filters["scraper"] = [
+ "display" => "Scraper",
+ "option" => [
+ "ddg" => "DuckDuckGo",
+ "brave" => "Brave",
+ "yandex" => "Yandex",
+ "google" => "Google",
+ //"google_api" => "Google API",
+ "google_cse" => "Google CSE",
+ "startpage" => "Startpage",
+ "qwant" => "Qwant",
+ "ghostery" => "Ghostery",
+ "yep" => "Yep",
+ "greppr" => "Greppr",
+ "crowdview" => "Crowdview",
+ "mwmbl" => "Mwmbl",
+ "mojeek" => "Mojeek",
+ "baidu" => "Baidu",
+ "coccoc" => "Cốc Cốc",
+ //"solofield" => "Solofield",
+ "marginalia" => "Marginalia",
+ "wiby" => "wiby",
+ "curlie" => "Curlie"
+ ]
+ ];
+ break;
+
+ case "images":
+ $filters["scraper"] = [
+ "display" => "Scraper",
+ "option" => [
+ "ddg" => "DuckDuckGo",
+ "yandex" => "Yandex",
+ "brave" => "Brave",
+ "google" => "Google",
+ "google_cse" => "Google CSE",
+ "startpage" => "Startpage",
+ "qwant" => "Qwant",
+ "yep" => "Yep",
+ "baidu" => "Baidu",
+ //"solofield" => "Solofield",
+ "pinterest" => "Pinterest",
+ "cara" => "Cara",
+ "flickr" => "Flickr",
+ "fivehpx" => "500px",
+ "vsco" => "VSCO",
+ "imgur" => "Imgur",
+ "ftm" => "FindThatMeme",
+ //"sankakucomplex" => "SankakuComplex"
+ ]
+ ];
+ break;
+
+ case "videos":
+ $filters["scraper"] = [
+ "display" => "Scraper",
+ "option" => [
+ "yt" => "YouTube",
+ "vimeo" => "Vimeo",
+ //"odysee" => "Odysee",
+ "sepiasearch" => "Sepia Search",
+ //"fb" => "Facebook videos",
+ "ddg" => "DuckDuckGo",
+ "brave" => "Brave",
+ "yandex" => "Yandex",
+ "google" => "Google",
+ "startpage" => "Startpage",
+ "qwant" => "Qwant",
+ "baidu" => "Baidu",
+ "coccoc" => "Cốc Cốc"
+ //"solofield" => "Solofield"
+ ]
+ ];
+ break;
+
+ case "news":
+ $filters["scraper"] = [
+ "display" => "Scraper",
+ "option" => [
+ "ddg" => "DuckDuckGo",
+ "brave" => "Brave",
+ "google" => "Google",
+ "startpage" => "Startpage",
+ "qwant" => "Qwant",
+ "yep" => "Yep",
+ "mojeek" => "Mojeek",
+ "baidu" => "Baidu"
+ ]
+ ];
+ break;
+
+ case "music":
+ $filters["scraper"] = [
+ "display" => "Scraper",
+ "option" => [
+ "sc" => "SoundCloud"
+ //"spotify" => "Spotify"
+ ]
+ ];
+ break;
+ }
+
+ // get scraper name from user input, or default out to preferred scraper
+ $scraper_out = null;
+ $first = true;
+
+ foreach($filters["scraper"]["option"] as $scraper_name => $scraper_pretty){
+
+ if($first === true){
+
+ $first = $scraper_name;
+ }
+
+ if($scraper_name == $get_scraper){
+
+ $scraper_out = $scraper_name;
+ }
+ }
+
+ if($scraper_out === null){
+
+ $scraper_out = $first;
+ }
+
+ include "scraper/$scraper_out.php";
+ $lib = new $scraper_out();
+
+ // set scraper on $_GET
+ $_GET["scraper"] = $scraper_out;
+
+ // set nsfw on $_GET
+ if(
+ isset($_COOKIE["nsfw"]) &&
+ !isset($_GET["nsfw"])
+ ){
+
+ $_GET["nsfw"] = $_COOKIE["nsfw"];
+ }
+
+ return
+ [
+ $lib,
+ array_merge_recursive(
+ $filters,
+ $lib->getfilters($page)
+ )
+ ];
+ }
+
+ public function parsegetfilters($parameters, $whitelist){
+
+ $sanitized = [];
+
+ // add npt token
+ if(
+ isset($parameters["npt"]) &&
+ is_string($parameters["npt"])
+ ){
+
+ $sanitized["npt"] = $parameters["npt"];
+ }else{
+
+ $sanitized["npt"] = false;
+ }
+
+ // we're iterating over $whitelist, so
+ // you can't polluate $sanitized with useless
+ // parameters
+ foreach($whitelist as $parameter => $value){
+
+ if(isset($parameters[$parameter])){
+
+ if(!is_string($parameters[$parameter])){
+
+ $sanitized[$parameter] = null;
+ continue;
+ }
+
+ // parameter is already set, use that value
+ $sanitized[$parameter] = $parameters[$parameter];
+ }else{
+
+ // parameter is not set, add it
+ if(is_string($value["option"])){
+
+ // special field: set default value manually
+ switch($value["option"]){
+
+ case "_DATE":
+ // no date set
+ $sanitized[$parameter] = false;
+ break;
+
+ case "_SEARCH":
+ // no search set
+ $sanitized[$parameter] = "";
+ break;
+ }
+
+ }else{
+
+ // set a default value
+ $sanitized[$parameter] = array_keys($value["option"])[0];
+ }
+ }
+
+ // sanitize input
+ if(is_array($value["option"])){
+ if(
+ !in_array(
+ $sanitized[$parameter],
+ $keys = array_keys($value["option"])
+ )
+ ){
+
+ $sanitized[$parameter] = $keys[0];
+ }
+ }else{
+
+ // sanitize search & string
+ switch($value["option"]){
+
+ case "_DATE":
+ if($sanitized[$parameter] !== false){
+
+ $sanitized[$parameter] = strtotime($sanitized[$parameter]);
+ if($sanitized[$parameter] <= 0){
+
+ $sanitized[$parameter] = false;
+ }
+ }
+ break;
+
+ case "_SEARCH":
+ // get search string
+ $sanitized["s"] = trim($sanitized[$parameter]);
+ }
+ }
+ }
+
+ // invert dates if needed
+ if(
+ isset($sanitized["older"]) &&
+ isset($sanitized["newer"]) &&
+ $sanitized["newer"] !== false &&
+ $sanitized["older"] !== false &&
+ $sanitized["newer"] > $sanitized["older"]
+ ){
+
+ // invert
+ [
+ $sanitized["older"],
+ $sanitized["newer"]
+ ] = [
+ $sanitized["newer"],
+ $sanitized["older"]
+ ];
+ }
+
+ return $sanitized;
+ }
+
+ public function s_to_timestamp($seconds){
+
+ if(is_string($seconds)){
+
+ return "LIVE";
+ }
+
+ return ($seconds >= 60) ? ltrim(gmdate("H:i:s", $seconds), ":0") : gmdate("0:s", $seconds);
+ }
+
+ public function generatehtmltabs($page, $query){
+
+ $html = null;
+
+ foreach(["web", "images", "videos", "news", "music"] as $type){
+
+ $html .= '<a href="/' . $type . '?s=' . urlencode($query);
+
+ if(!empty($params)){
+
+ $html .= $params;
+ }
+
+ $html .= '" class="tab';
+
+ if($type == $page){
+
+ $html .= ' selected';
+ }
+
+ $html .= '">' . ucfirst($type) . '</a>';
+ }
+
+ return $html;
+ }
+
+ public function generatehtmlfilters($filters, $params){
+
+ $html = null;
+
+ foreach($filters as $filter_name => $filter_values){
+
+ if(!isset($filter_values["display"])){
+
+ continue;
+ }
+
+ $output = true;
+ $tmp =
+ '<div class="filter">' .
+ '<div class="title">' . htmlspecialchars($filter_values["display"]) . '</div>';
+
+ if(is_array($filter_values["option"])){
+
+ $tmp .= '<select name="' . $filter_name . '">';
+
+ foreach($filter_values["option"] as $option_name => $option_title){
+
+ $tmp .= '<option value="' . $option_name . '"';
+
+ if($params[$filter_name] == $option_name){
+
+ $tmp .= ' selected';
+ }
+
+ $tmp .= '>' . htmlspecialchars($option_title) . '</option>';
+ }
+
+ $tmp .= '</select>';
+ }else{
+
+ switch($filter_values["option"]){
+
+ case "_DATE":
+ $tmp .= '<input type="date" name="' . $filter_name . '"';
+
+ if($params[$filter_name] !== false){
+
+ $tmp .= ' value="' . date("Y-m-d", $params[$filter_name]) . '"';
+ }
+
+ $tmp .= '>';
+ break;
+
+ default:
+ $output = false;
+ break;
+ }
+ }
+
+ $tmp .= '</div>';
+
+ if($output === true){
+
+ $html .= $tmp;
+ }
+ }
+
+ return $html;
+ }
+
+ public function buildquery($gets, $ommit = false){
+
+ $out = [];
+ foreach($gets as $key => $value){
+
+ if(
+ $value == null ||
+ $value == false ||
+ $key == "npt" ||
+ $key == "extendedsearch" ||
+ $value == "any" ||
+ $value == "all" ||
+ $key == "spellcheck" ||
+ (
+ $ommit === true &&
+ $key == "s"
+ )
+ ){
+
+ continue;
+ }
+
+ if(
+ $key == "older" ||
+ $key == "newer"
+ ){
+
+ $value = date("Y-m-d", (int)$value);
+ }
+
+ $out[$key] = $value;
+ }
+
+ return http_build_query($out);
+ }
+
+ public function htmlimage($image, $format){
+
+ if(
+ preg_match(
+ '/^data:/',
+ $image
+ )
+ ){
+
+ return htmlspecialchars($image);
+ }
+
+ return "/proxy?i=" . urlencode($image) . "&s=" . $format;
+ }
+
+ public function htmlnextpage($gets, $npt, $page){
+
+ $query = $this->buildquery($gets);
+
+ return $page . "?" . $query . "&npt=" . $npt;
+ }
+}
diff --git a/lib/fuckhtml.php b/lib/fuckhtml.php
new file mode 100644
index 0000000..3ea256f
--- /dev/null
+++ b/lib/fuckhtml.php
@@ -0,0 +1,622 @@
+<?php
+class fuckhtml{
+
+ public function __construct($html = null, $isfile = false){
+
+ if($html !== null){
+
+ $this->load($html, $isfile);
+ }
+ }
+
+ public function load($html, $isfile = false){
+
+ if(is_array($html)){
+
+ if(!isset($html["innerHTML"])){
+
+ throw new Exception("(load) Supplied array doesn't contain an innerHTML index");
+ }
+ $html = $html["innerHTML"];
+ }
+
+ if($isfile){
+
+ $handle = fopen($html, "r");
+ $fetch = fread($handle, filesize($html));
+ fclose($handle);
+
+ $this->html = $fetch;
+ }else{
+
+ $this->html = $html;
+ }
+
+ $this->strlen = strlen($this->html);
+ }
+
+ public function getloadedhtml(){
+
+ return $this->html;
+ }
+
+ public function getElementsByTagName(string $tagname){
+
+ $out = [];
+
+ /*
+ Scrape start of the tag. Example
+ <div class="mydiv"> ...
+ */
+
+ if($tagname == "*"){
+
+ $tagname = '[A-Za-z0-9._-]+';
+ }else{
+
+ $tagname = preg_quote(strtolower($tagname));
+ }
+
+ preg_match_all(
+ '/<\s*(' . $tagname . ')(\s(?:[^>\'"]*|"[^"]*"|\'[^\']*\')+)?\s*>/i',
+ /* '/<\s*(' . $tagname . ')(\s[\S\s]*?)?>/i', */
+ $this->html,
+ $starting_tags,
+ PREG_OFFSET_CAPTURE
+ );
+
+ for($i=0; $i<count($starting_tags[0]); $i++){
+
+ /*
+ Parse attributes
+ */
+ $attributes = [];
+
+ preg_match_all(
+ '/([^\/\s\\=]+)(?:\s*=\s*("[^"]*"|\'[^\']*\'|[^\s]*))?/i',
+ $starting_tags[2][$i][0],
+ $regex_attributes
+ );
+
+ for($k=0; $k<count($regex_attributes[0]); $k++){
+
+ if(trim($regex_attributes[2][$k]) == ""){
+
+ $attributes[$regex_attributes[1][$k]] =
+ "true";
+
+ continue;
+ }
+
+ $attributes[strtolower($regex_attributes[1][$k])] =
+ trim($regex_attributes[2][$k], "'\" \n\r\t\v\x00");
+ }
+
+ $out[] = [
+ "tagName" => strtolower($starting_tags[1][$i][0]),
+ "startPos" => $starting_tags[0][$i][1],
+ "endPos" => 0,
+ "startTag" => $starting_tags[0][$i][0],
+ "attributes" => $attributes,
+ "innerHTML" => null
+ ];
+ }
+
+ /*
+ Get innerHTML
+ */
+ // get closing tag positions
+ preg_match_all(
+ '/<\s*\/\s*(' . $tagname . ')\s*>/i',
+ $this->html,
+ $regex_closing_tags,
+ PREG_OFFSET_CAPTURE
+ );
+
+ // merge opening and closing tags together
+ for($i=0; $i<count($regex_closing_tags[1]); $i++){
+
+ $out[] = [
+ "tagName" => strtolower($regex_closing_tags[1][$i][0]),
+ "endTag" => $regex_closing_tags[0][$i][0],
+ "startPos" => $regex_closing_tags[0][$i][1]
+ ];
+ }
+
+ usort(
+ $out,
+ function($a, $b){
+
+ return $a["startPos"] > $b["startPos"];
+ }
+ );
+
+ // compute the indent level for each element
+ $level = [];
+ $count = count($out);
+
+ for($i=0; $i<$count; $i++){
+
+ if(!isset($level[$out[$i]["tagName"]])){
+
+ $level[$out[$i]["tagName"]] = 0;
+ }
+
+ if(isset($out[$i]["startTag"])){
+
+ // encountered starting tag
+ $level[$out[$i]["tagName"]]++;
+ $out[$i]["level"] = $level[$out[$i]["tagName"]];
+ }else{
+
+ // encountered closing tag
+ $out[$i]["level"] = $level[$out[$i]["tagName"]];
+ $level[$out[$i]["tagName"]]--;
+ }
+ }
+
+ // if the indent level is the same for a div,
+ // we encountered _THE_ closing tag
+ for($i=0; $i<$count; $i++){
+
+ if(!isset($out[$i]["startTag"])){
+
+ continue;
+ }
+
+ for($k=$i; $k<$count; $k++){
+
+ if(
+ isset($out[$k]["endTag"]) &&
+ $out[$i]["tagName"] == $out[$k]["tagName"] &&
+ $out[$i]["level"]
+ === $out[$k]["level"]
+ ){
+
+ $startlen = strlen($out[$i]["startTag"]);
+ $endlen = strlen($out[$k]["endTag"]);
+
+ $out[$i]["endPos"] = $out[$k]["startPos"] + $endlen;
+
+ $out[$i]["innerHTML"] =
+ substr(
+ $this->html,
+ $out[$i]["startPos"] + $startlen,
+ $out[$k]["startPos"] - ($out[$i]["startPos"] + $startlen)
+ );
+
+ $out[$i]["outerHTML"] =
+ substr(
+ $this->html,
+ $out[$i]["startPos"],
+ $out[$k]["startPos"] - $out[$i]["startPos"] + $endlen
+ );
+
+ break;
+ }
+ }
+ }
+
+ // filter out ending divs
+ for($i=0; $i<$count; $i++){
+
+ if(isset($out[$i]["endTag"])){
+
+ unset($out[$i]);
+ }
+
+ unset($out[$i]["startTag"]);
+ }
+
+ return array_values($out);
+ }
+
+ public function getElementsByAttributeName(string $name, $collection = null){
+
+ if($collection === null){
+
+ $collection = $this->getElementsByTagName("*");
+ }elseif(is_string($collection)){
+
+ $collection = $this->getElementsByTagName($collection);
+ }
+
+ $return = [];
+ foreach($collection as $elem){
+
+ foreach($elem["attributes"] as $attrib_name => $attrib_value){
+
+ if($attrib_name == $name){
+
+ $return[] = $elem;
+ continue 2;
+ }
+ }
+ }
+
+ return $return;
+ }
+
+ public function getElementsByFuzzyAttributeValue(string $name, string $value, $collection = null){
+
+ $elems = $this->getElementsByAttributeName($name, $collection);
+
+ $value =
+ explode(
+ " ",
+ trim(
+ preg_replace(
+ '/\s+/',
+ " ",
+ $value
+ )
+ )
+ );
+
+ $return = [];
+
+ foreach($elems as $elem){
+
+ foreach($elem["attributes"] as $attrib_name => $attrib_value){
+
+ $attrib_value =
+ explode(
+ " ",
+ trim(
+ preg_replace(
+ '/\s+/',
+ " ",
+ $attrib_value
+ )
+ )
+ );
+
+ $ac = count($attrib_value);
+ $nc = count($value);
+ $cr = 0;
+
+ for($i=0; $i<$nc; $i++){
+
+ for($k=0; $k<$ac; $k++){
+
+ if($value[$i] == $attrib_value[$k]){
+
+ $cr++;
+ }
+ }
+ }
+
+ if($cr === $nc){
+
+ $return[] = $elem;
+ continue 2;
+ }
+ }
+ }
+
+ return $return;
+ }
+
+ public function getElementsByAttributeValue(string $name, string $value, $collection = null){
+
+ $elems = $this->getElementsByAttributeName($name, $collection);
+
+ $return = [];
+
+ foreach($elems as $elem){
+
+ foreach($elem["attributes"] as $attrib_name => $attrib_value){
+
+ if($attrib_value == $value){
+
+ $return[] = $elem;
+ continue 2;
+ }
+ }
+ }
+
+ return $return;
+ }
+
+ public function getElementById(string $idname, $collection = null){
+
+ $id = $this->getElementsByAttributeValue("id", $idname, $collection);
+
+ if(count($id) !== 0){
+
+ return $id[0];
+ }
+
+ return false;
+ }
+
+ public function getElementsByClassName(string $classname, $collection = null){
+
+ return $this->getElementsByFuzzyAttributeValue("class", $classname, $collection);
+ }
+
+ public function getTextContent($html, $whitespace = false, $trim = true){
+
+ if(is_array($html)){
+
+ if(!isset($html["innerHTML"])){
+
+ throw new Exception("(getTextContent) Supplied array doesn't contain an innerHTML index");
+ }
+
+ $html = $html["innerHTML"];
+ }
+
+ $html = preg_split('/\n|<\/?br>/i', $html);
+
+ $out = "";
+ for($i=0; $i<count($html); $i++){
+
+ $tmp =
+ html_entity_decode(
+ strip_tags(
+ $html[$i]
+ ),
+ ENT_QUOTES | ENT_XML1, "UTF-8"
+ );
+
+ if($trim){
+
+ $tmp = trim($tmp);
+ }
+
+ $out .= $tmp;
+
+ if($whitespace === true){
+
+ $out .= "\n";
+ }else{
+
+ $out .= " ";
+ }
+ }
+
+ if($trim){
+
+ return trim($out);
+ }
+
+ return $out;
+ }
+
+ public function parseJsObject(string $json){
+
+ $bracket = false;
+ $is_close_bracket = false;
+ $escape = false;
+ $lastchar = false;
+ $json_out = null;
+ $last_char = null;
+
+ $keyword_check = null;
+
+ for($i=0; $i<strlen($json); $i++){
+
+ switch($json[$i]){
+
+ case "\"":
+ case "'":
+ if($escape === true){
+
+ break;
+ }
+
+ if($json[$i] == $bracket){
+
+ $bracket = false;
+ $is_close_bracket = true;
+
+ }else{
+
+ if($bracket === false){
+
+ $bracket = $json[$i];
+ }
+ }
+ break;
+
+ default:
+ $is_close_bracket = false;
+ break;
+ }
+
+ if(
+ $json[$i] == "\\" &&
+ !(
+ $lastchar !== false &&
+ $lastchar . $json[$i] == "\\\\"
+ )
+ ){
+
+ $escape = true;
+ }else{
+
+ $escape = false;
+ }
+
+ if(
+ $bracket === false &&
+ $is_close_bracket === false
+ ){
+
+ // do keyword check
+ $keyword_check .= $json[$i];
+
+ if(in_array($json[$i], [":", "{"])){
+
+ $keyword_check = substr($keyword_check, 0, -1);
+
+ if(
+ preg_match(
+ '/function|array|return/i',
+ $keyword_check
+ )
+ ){
+
+ $json_out =
+ preg_replace(
+ '/[{"]*' . preg_quote($keyword_check, "/") . '$/',
+ "",
+ $json_out
+ );
+ }
+
+ $keyword_check = null;
+ }
+
+ // here we know we're not iterating over a quoted string
+ switch($json[$i]){
+
+ case "[":
+ case "{":
+ $json_out .= $json[$i];
+ break;
+
+ case "]":
+ case "}":
+ case ",":
+ case ":":
+ if(!in_array($last_char, ["[", "{", "}", "]", "\""])){
+
+ $json_out .= "\"";
+ }
+
+ $json_out .= $json[$i];
+ break;
+
+ default:
+ if(in_array($last_char, ["{", "[", ",", ":"])){
+
+ $json_out .= "\"";
+ }
+
+ $json_out .= $json[$i];
+ break;
+ }
+ }else{
+
+ $json_out .= $json[$i];
+ }
+
+ $last_char = $json[$i];
+ }
+
+ return json_decode($json_out, true);
+ }
+
+ public function parseJsString($string){
+
+ return
+ preg_replace_callback(
+ '/\\\u[A-Fa-f0-9]{4}|\\\x[A-Fa-f0-9]{2}|\\\n|\\\r/',
+ function($match){
+
+ switch($match[0][1]){
+
+ case "u":
+ return json_decode('"' . $match[0] . '"');
+ break;
+
+ case "x":
+ return mb_convert_encoding(
+ stripcslashes($match[0]),
+ "utf-8",
+ "windows-1252"
+ );
+ break;
+
+ default:
+ return " ";
+ break;
+ }
+ },
+ $string
+ );
+ }
+
+ public function extract_json($json){
+
+ $len = strlen($json);
+ $array_level = 0;
+ $object_level = 0;
+ $in_quote = null;
+ $start = null;
+
+ for($i=0; $i<$len; $i++){
+
+ switch($json[$i]){
+
+ case "[":
+ if($in_quote === null){
+
+ $array_level++;
+ if($start === null){
+
+ $start = $i;
+ }
+ }
+ break;
+
+ case "]":
+ if($in_quote === null){
+
+ $array_level--;
+ }
+ break;
+
+ case "{":
+ if($in_quote === null){
+
+ $object_level++;
+ if($start === null){
+
+ $start = $i;
+ }
+ }
+ break;
+
+ case "}":
+ if($in_quote === null){
+
+ $object_level--;
+ }
+ break;
+
+ case "\"":
+ case "'":
+ if(
+ $i !== 0 &&
+ $json[$i - 1] !== "\\"
+ ){
+ // found a non-escaped quote
+
+ if($in_quote === null){
+
+ // open quote
+ $in_quote = $json[$i];
+ }elseif($in_quote === $json[$i]){
+
+ // close quote
+ $in_quote = null;
+ }
+ }
+ break;
+ }
+
+ if(
+ $start !== null &&
+ $array_level === 0 &&
+ $object_level === 0
+ ){
+
+ return substr($json, $start, $i - $start + 1);
+ break;
+ }
+ }
+ }
+}
diff --git a/lib/img404.png b/lib/img404.png
new file mode 100644
index 0000000..e8588cf
--- /dev/null
+++ b/lib/img404.png
Binary files differ
diff --git a/lib/type-todo.php b/lib/type-todo.php
new file mode 100644
index 0000000..f813543
--- /dev/null
+++ b/lib/type-todo.php
@@ -0,0 +1,132 @@
+
+ public function type($get){
+
+ $search = $get["s"];
+ $bang = $get["bang"];
+
+ if(empty($search)){
+
+ if(!empty($bang)){
+
+ // !youtube
+ $conn = pg_connect("host=localhost dbname=4get user=postgres password=postgres");
+
+ pg_prepare($conn, "bang_get", "SELECT bang,name FROM bangs WHERE bang LIKE $1 ORDER BY bang ASC LIMIT 8");
+ $q = pg_execute($conn, "bang_get", ["$bang%"]);
+
+ $results = [];
+ while($row = pg_fetch_array($q, null, PGSQL_ASSOC)){
+
+ $results[] = [
+ "s" => "!" . $row["bang"],
+ "n" => $row["name"]
+ ];
+ }
+
+ return $results;
+ }else{
+
+ // everything is empty
+ // lets just return a bang list
+ return [
+ [
+ "s" => "!w",
+ "n" => "Wikipedia",
+ "u" => "https://en.wikipedia.org/wiki/Special:Search?search={%q%}"
+ ],
+ [
+ "s" => "!4ch",
+ "n" => "4chan Board",
+ "u" => "https://find.4chan.org/?q={%q%}"
+ ],
+ [
+ "s" => "!a",
+ "n" => "Amazon",
+ "u" => "https://www.amazon.com/s?k={%q%}"
+ ],
+ [
+ "s" => "!e",
+ "n" => "eBay",
+ "u" => "https://www.ebay.com/sch/items/?_nkw={%q%}"
+ ],
+ [
+ "s" => "!so",
+ "n" => "Stack Overflow",
+ "u" => "http://stackoverflow.com/search?q={%q%}"
+ ],
+ [
+ "s" => "!gh",
+ "n" => "GitHub",
+ "u" => "https://github.com/search?utf8=%E2%9C%93&q={%q%}"
+ ],
+ [
+ "s" => "!tw",
+ "n" => "Twitter",
+ "u" => "https://twitter.com/search?q={%q%}"
+ ],
+ [
+ "s" => "!r",
+ "n" => "Reddit",
+ "u" => "https://www.reddit.com/search?q={%q%}"
+ ],
+ ];
+ }
+ }
+
+ // now we know search isnt empty
+ if(!empty($bang)){
+
+ // check if the bang exists
+ $conn = pg_connect("host=localhost dbname=4get user=postgres password=postgres");
+
+ pg_prepare($conn, "bang_get_single", "SELECT bang,name FROM bangs WHERE bang = $1 LIMIT 1");
+ $q = pg_execute($conn, "bang_get_single", [$bang]);
+
+ $row = pg_fetch_array($q, null, PGSQL_ASSOC);
+
+ if(isset($row["bang"])){
+
+ $bang = "!$bang ";
+ }else{
+
+ $bang = "";
+ }
+ }
+
+ try{
+ $res = $this->get(
+ "https://duckduckgo.com/ac/",
+ [
+ "q" => strtolower($search)
+ ],
+ ddg::req_xhr
+ );
+
+ $res = json_decode($res, true);
+
+ }catch(Exception $e){
+
+ throw new Exception("Failed to get /ac/");
+ }
+
+ $arr = [];
+ for($i=0; $i<count($res); $i++){
+
+ if($i === 8){break;}
+
+ if(empty($bang)){
+
+ $arr[] = [
+ "s" => $res[$i]["phrase"]
+ ];
+ }else{
+
+ $arr[] = [
+ "s" => $bang . $res[$i]["phrase"],
+ "n" => $row["name"]
+ ];
+ }
+ }
+
+ return $arr;
+ }