From cdf958d29333d448f4521f4d2faa2592b58e9b27 Mon Sep 17 00:00:00 2001 From: lolcat Date: Sun, 10 Aug 2025 21:55:15 -0400 Subject: fix wikipedia crash --- api/v1/ac.php | 243 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ api/v1/images.php | 39 +++++++++ api/v1/index.php | 10 +++ api/v1/music.php | 39 +++++++++ api/v1/news.php | 39 +++++++++ api/v1/videos.php | 39 +++++++++ api/v1/web.php | 52 ++++++++++++ 7 files changed, 461 insertions(+) create mode 100644 api/v1/ac.php create mode 100644 api/v1/images.php create mode 100644 api/v1/index.php create mode 100644 api/v1/music.php create mode 100644 api/v1/news.php create mode 100644 api/v1/videos.php create mode 100644 api/v1/web.php (limited to 'api/v1') diff --git a/api/v1/ac.php b/api/v1/ac.php new file mode 100644 index 0000000..50d3095 --- /dev/null +++ b/api/v1/ac.php @@ -0,0 +1,243 @@ +scrapers = [ + "brave" => "https://search.brave.com/api/suggest?q={searchTerms}", + "ddg" => "https://duckduckgo.com/ac/?q={searchTerms}&type=list", + "yandex" => "https://suggest.yandex.com/suggest-ff.cgi?part={searchTerms}&uil=en&v=3&sn=5&lr=21276&yu=4861394161661655015", + "google" => "https://www.google.com/complete/search?client=mobile-gws-lite&q={searchTerms}", + "qwant" => "https://api.qwant.com/v3/suggest/?q={searchTerms}&client=opensearch", + "yep" => "https://api.yep.com/ac/?query={searchTerms}", + "marginalia" => "https://search.marginalia.nu/suggest/?partial={searchTerms}", + "yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}", + "sc" => "", + "startpage" => "https://www.startpage.com/suggestions?q={searchTerms}&format=opensearch&segment=startpage.defaultffx&lui=english", + "kagi" => "https://kagi.com/api/autosuggest?q={searchTerms}", + "ghostery" => "https://ghosterysearch.com/suggest?q={searchTerms}" + ]; + + /* + Sanitize input + */ + if(!isset($_GET["s"])){ + + $this->do404("Missing search(s) parameter"); + } + + if(is_string($_GET["s"]) === false){ + + $this->do404("Invalid search(s) parameter"); + } + + if(strlen($_GET["s"]) > 500){ + + $this->do404("Search(s) exceeds the 500 char length"); + } + + /* + Get $scraper + */ + if(!isset($_GET["scraper"])){ + + if(isset($_COOKIE["scraper_ac"])){ + + $scraper = $_COOKIE["scraper_ac"]; + }else{ + + $scraper = "brave"; // default option + } + }else{ + + $scraper = $_GET["scraper"]; + } + + if($scraper == "disabled"){ + + // this shouldnt happen, but let's handle it anyways + $this->doempty(); + } + + // make sure it exists + if(!isset($this->scrapers[$scraper])){ + + $scraper = "brave"; // default option + } + + // return results + switch($scraper){ + + case "google": + case "yt": + // handle google cause they want to be a special snowflake :( + $js = $this->get($this->scrapers[$scraper], $_GET["s"]); + + preg_match( + '/\((\[.*\])\)/', + $js, + $js + ); + + if(!isset($js[1])){ + + $this->doempty(); + } + + $js = json_decode($js[1]); + $json = []; + + foreach($js[1] as $item){ + + $json[] = htmlspecialchars_decode(strip_tags($item[0])); + } + + echo json_encode( + [ + $_GET["s"], + $json + ], + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE + ); + break; + + case "sc": + // soundcloud + chdir("../../"); + include "scraper/sc.php"; + $sc = new sc(); + + $token = $sc->get_token("raw_ip::::"); + + $js = $this->get( + "https://api-v2.soundcloud.com/search/queries?q={searchTerms}&client_id=" . $token . "&limit=10&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en", + $_GET["s"] + ); + + $js = json_decode($js, true); + + if(!isset($js["collection"])){ + + $this->doempty(); + } + + $json = []; + foreach($js["collection"] as $item){ + + $json[] = $item["query"]; + } + + echo json_encode( + [ + $_GET["s"], + $json + ], + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE + ); + break; + + case "marginalia": + $json = $this->get($this->scrapers[$scraper], $_GET["s"]); + + $json = json_decode($json, true); + if($json === null){ + + + $this->doempty(); + } + + echo json_encode( + [ + $_GET["s"], + $json + ], + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE + ); + break; + + default: + // if it respects the openSearch protocol + $json = json_decode($this->get($this->scrapers[$scraper], $_GET["s"]), true); + + echo json_encode( + [ + $_GET["s"], + $json[1] // ensure it contains valid key 0 + ], + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE + ); + break; + } + } + + private function get($url, $query){ + + try{ + $curlproc = curl_init(); + + $url = str_replace("{searchTerms}", urlencode($query), $url); + + curl_setopt($curlproc, CURLOPT_URL, $url); + + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", + "Accept: application/json, text/javascript, */*; q=0.01", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "DNT: 1", + "Connection: keep-alive", + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-site"] + ); + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + + }catch(Exception $error){ + + do404("Curl error: " . $error->getMessage()); + } + } + + private function do404($error){ + + echo json_encode( + ["error" => $error], + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE + ); + die(); + } + + private function doempty(){ + + echo json_encode( + [ + $_GET["s"], + [] + ], + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE + ); + die(); + } +} diff --git a/api/v1/images.php b/api/v1/images.php new file mode 100644 index 0000000..de2c5a9 --- /dev/null +++ b/api/v1/images.php @@ -0,0 +1,39 @@ + "The server administrator disabled the API!"]); + return; +} + +include "lib/frontend.php"; +$frontend = new frontend(); + +/* + Captcha +*/ +include "lib/bot_protection.php"; +$null = null; +new bot_protection($null, $null, $null, "images", false); + +[$scraper, $filters] = $frontend->getscraperfilters( + "images", + isset($_GET["scraper"]) ? $_GET["scraper"] : null +); + +$get = $frontend->parsegetfilters($_GET, $filters); + +try{ + echo json_encode( + $scraper->image($get), + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE + ); + +}catch(Exception $e){ + + echo json_encode(["status" => $e->getMessage()]); +} diff --git a/api/v1/index.php b/api/v1/index.php new file mode 100644 index 0000000..dae86ab --- /dev/null +++ b/api/v1/index.php @@ -0,0 +1,10 @@ + "Unknown endpoint" + ] +); diff --git a/api/v1/music.php b/api/v1/music.php new file mode 100644 index 0000000..58985e3 --- /dev/null +++ b/api/v1/music.php @@ -0,0 +1,39 @@ + "The server administrator disabled the API!"]); + return; +} + +include "lib/frontend.php"; +$frontend = new frontend(); + +/* + Captcha +*/ +include "lib/bot_protection.php"; +$null = null; +new bot_protection($null, $null, $null, "music", false); + +[$scraper, $filters] = $frontend->getscraperfilters( + "music", + isset($_GET["scraper"]) ? $_GET["scraper"] : null +); + +$get = $frontend->parsegetfilters($_GET, $filters); + +try{ + echo json_encode( + $scraper->music($get), + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE + ); + +}catch(Exception $e){ + + echo json_encode(["status" => $e->getMessage()]); +} diff --git a/api/v1/news.php b/api/v1/news.php new file mode 100644 index 0000000..ab38781 --- /dev/null +++ b/api/v1/news.php @@ -0,0 +1,39 @@ + "The server administrator disabled the API!"]); + return; +} + +include "lib/frontend.php"; +$frontend = new frontend(); + +/* + Captcha +*/ +include "lib/bot_protection.php"; +$null = null; +new bot_protection($null, $null, $null, "news", false); + +[$scraper, $filters] = $frontend->getscraperfilters( + "news", + isset($_GET["scraper"]) ? $_GET["scraper"] : null +); + +$get = $frontend->parsegetfilters($_GET, $filters); + +try{ + echo json_encode( + $scraper->news($get), + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE + ); + +}catch(Exception $e){ + + echo json_encode(["status" => $e->getMessage()]); +} diff --git a/api/v1/videos.php b/api/v1/videos.php new file mode 100644 index 0000000..1d23780 --- /dev/null +++ b/api/v1/videos.php @@ -0,0 +1,39 @@ + "The server administrator disabled the API!"]); + return; +} + +include "lib/frontend.php"; +$frontend = new frontend(); + +/* + Captcha +*/ +include "lib/bot_protection.php"; +$null = null; +new bot_protection($null, $null, $null, "videos", false); + +[$scraper, $filters] = $frontend->getscraperfilters( + "videos", + isset($_GET["scraper"]) ? $_GET["scraper"] : null +); + +$get = $frontend->parsegetfilters($_GET, $filters); + +try{ + echo json_encode( + $scraper->video($get), + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE + ); + +}catch(Exception $e){ + + echo json_encode(["status" => $e->getMessage()]); +} diff --git a/api/v1/web.php b/api/v1/web.php new file mode 100644 index 0000000..6a9c030 --- /dev/null +++ b/api/v1/web.php @@ -0,0 +1,52 @@ + "The server administrator disabled the API!"]); + return; +} + +include "lib/frontend.php"; +$frontend = new frontend(); + +/* + Captcha +*/ +include "lib/bot_protection.php"; +$null = null; +new bot_protection($null, $null, $null, "web", false); + +[$scraper, $filters] = $frontend->getscraperfilters( + "web", + isset($_GET["scraper"]) ? $_GET["scraper"] : null +); + +$get = $frontend->parsegetfilters($_GET, $filters); + +if( + isset($_GET["extendedsearch"]) && + $_GET["extendedsearch"] == "yes" +){ + + $get["extendedsearch"] = "yes"; +}else{ + + $get["extendedsearch"] = "no"; +} + +try{ + + echo + json_encode( + $scraper->web($get), + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE + ); + +}catch(Exception $e){ + + echo json_encode(["status" => $e->getMessage()]); +} -- cgit v1.2.3