aboutsummaryrefslogtreecommitdiffstats
path: root/scraper
diff options
context:
space:
mode:
Diffstat (limited to 'scraper')
-rw-r--r--scraper/baidu.php2229
-rw-r--r--scraper/brave.php1860
-rw-r--r--scraper/cara.php847
-rw-r--r--scraper/coccoc.php672
-rw-r--r--scraper/crowdview.php145
-rw-r--r--scraper/curlie.php309
-rw-r--r--scraper/ddg.php2246
-rw-r--r--scraper/facebook.php820
-rw-r--r--scraper/fivehpx.php262
-rw-r--r--scraper/flickr.php415
-rw-r--r--scraper/ftm.php161
-rw-r--r--scraper/ghostery.php320
-rw-r--r--scraper/google.php2989
-rw-r--r--scraper/google_cse.php1054
-rw-r--r--scraper/greppr.php435
-rw-r--r--scraper/imgur.php282
-rw-r--r--scraper/marginalia.php580
-rw-r--r--scraper/mojeek.php1194
-rw-r--r--scraper/mwmbl.php236
-rw-r--r--scraper/pinterest.php439
-rw-r--r--scraper/qwant.php993
-rw-r--r--scraper/sc.php512
-rw-r--r--scraper/sepiasearch.php541
-rw-r--r--scraper/solofield.php668
-rw-r--r--scraper/spotify.php726
-rw-r--r--scraper/startpage.php1584
-rw-r--r--scraper/vimeo.php754
-rw-r--r--scraper/vsco.php257
-rw-r--r--scraper/wiby.php246
-rw-r--r--scraper/yandex.php1248
-rw-r--r--scraper/yep.php741
-rw-r--r--scraper/yt.php1727
32 files changed, 27492 insertions, 0 deletions
diff --git a/scraper/baidu.php b/scraper/baidu.php
new file mode 100644
index 0000000..efb14ca
--- /dev/null
+++ b/scraper/baidu.php
@@ -0,0 +1,2229 @@
+<?php
+
+class baidu{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("baidu");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ $this->handles = [];
+ $this->proc = null;
+ $this->handle_category = null;
+ $this->handle_increment = 0;
+ $this->sublink_increment = 0;
+
+ $this->cookie = null;
+ }
+
+ public function getfilters($page){
+
+ switch($page){
+
+ case "web":
+ return
+ [
+ "newer" => [
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ]
+ ];
+ break;
+
+ case "images":
+ return
+ [
+ "sort" => [
+ "display" => "Sort",
+ "option" => [
+ "relevance" => "Relevance", // no param
+ "latest" => "Latest", // &latest=1
+ "hot" => "Hot" // &hot=1
+ ]
+ ],
+ "size" => [
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "7" => "Extra large (1080px+)", // &z=7
+ "6" => "Large (600px~1080px)", // &z=6
+ "5" => "Medium (300px~600px)", // &z=5
+ "4" => "Small (1px~300px)" // &z=4
+ ]
+ ],
+ "ratio" => [
+ "display" => "Ratio",
+ "option" => [
+ "any" => "Any ratio",
+ "1" => "Tall vertical", // &imgratio=1
+ "2" => "Vertical", // &imgratio=2
+ "3" => "Square", // &imgratio=3
+ "4" => "Horizontal", // &imgratio=4
+ "5" => "Wide horizontal" // &imgratio=5
+ ]
+ ],
+ "format" => [
+ "display" => "Format",
+ "option" => [
+ "any" => "Any format",
+ "3" => "JPG", // &imgformat=3
+ "5" => "JPEG", // &imgformat=5
+ "4" => "PNG", // &imgformat=4
+ "2" => "BMP", // &imgformat=2
+ "6" => "GIF (Animated)" // &imgformat=6
+ ]
+ ],
+ "color" => [
+ "display" => "Color",
+ "option" => [
+ "any" => "Any color",
+ "1024" => "White", // &ic=1024
+ "2048" => "Black & White",
+ "512" => "Black",
+ "64" => "Magenta",
+ "16" => "Blue",
+ "1" => "Red",
+ "2" => "Yellow",
+ "32" => "Purple",
+ "4" => "Green",
+ "8" => "Teal",
+ "256" => "Orange",
+ "128" => "Brown"
+ ]
+ ],
+ "type" => [
+ "display" => "Type",
+ "option" => [
+ "any" => "Any type",
+ "hd" => "HD", // &hd=1
+ "isImgSet" => "Photo album", // &isImgSet=1
+ "copyright" => "Copyright" // &copyright=1
+ ]
+ ]
+ ];
+ break;
+
+ case "videos":
+ return [];
+ break;
+
+ case "news":
+ return [
+ "category" => [
+ "display" => "Category",
+ "option" => [
+ "any" => "All news",
+ "media" => "Media websites", // &medium=1
+ "baijiahao" => "Baidu Baijiahao" // &medium=2
+ ]
+ ]
+ ];
+ break;
+ }
+ }
+
+ private function get($proxy, $url, $get = [], $referer = false){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ $cookies_tmp = [];
+ curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
+
+ $length = strlen($header);
+
+ $header = explode(":", $header, 2);
+
+ if(trim(strtolower($header[0])) == "set-cookie"){
+
+ $cookie_tmp = explode("=", trim($header[1]), 2);
+
+ $cookies_tmp[trim($cookie_tmp[0])] =
+ explode(";", $cookie_tmp[1], 2)[0];
+ }
+
+ return $length;
+ });
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ if($referer === false){
+ if($this->cookie === null){
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: cross-site",
+ "Priority: u=0, i"]
+ );
+ }else{
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Cookie: {$this->cookie}",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: cross-site",
+ "Priority: u=0, i"]
+ );
+ }
+ }else{
+
+ if($this->cookie === null){
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: application/json, text/plain, */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "Referer: {$referer}",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin"]
+ );
+ }else{
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: application/json, text/plain, */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "Referer: {$referer}",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Cookie: {$this->cookie}",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin"]
+ );
+ }
+ }
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ // store cookie
+ if(strlen($this->cookie) !== 0){
+
+ $this->cookie .= "; ";
+ }
+
+ foreach($cookies_tmp as $cookie_name => $cookie_value){
+
+ $this->cookie .= $cookie_name . "=" . $cookie_value . "; ";
+ }
+
+ $this->cookie = rtrim($this->cookie, " ;");
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ private function redirect_add_url($proxy, $url){
+
+ if(
+ preg_match(
+ '/^https?:\/\/(?:www\.)?baidu\.com\/link\?/',
+ $url
+ ) === 0
+ ){
+
+ // not a baidu redirect
+ return;
+ }
+
+ $curlproc = curl_init();
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=0, i"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ curl_setopt($curlproc, CURLOPT_HEADER, true);
+ curl_setopt($curlproc, CURLOPT_NOBODY, true);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ curl_multi_add_handle($this->proc, $curlproc);
+ $this->handles[$this->handle_category][$this->handle_increment][$this->sublink_increment] = $curlproc;
+ }
+
+ private function resolve_urls($proxy, &$collection, $categories){
+
+ $this->proc = curl_multi_init();
+ curl_multi_select($this->proc);
+
+ foreach($categories as $category){
+
+ $this->sublink_increment = 0;
+ $this->handle_increment = 0;
+ $this->handle_category = $category;
+
+ foreach($collection[$category] as $item){
+
+ $this->sublink_increment = 0;
+ $this->redirect_add_url($proxy, $item["url"]);
+
+ if(isset($item["sublink"])){
+
+ foreach($item["sublink"] as $sublink){
+
+ $this->sublink_increment++;
+ $this->redirect_add_url($proxy, $sublink["url"]);
+ }
+ }
+
+ $this->handle_increment++;
+ }
+ }
+
+ do{
+ $status = curl_multi_exec($this->proc, $active);
+
+ }while($active && $status == CURLM_OK);
+
+ //
+ // if we reach this, we're done downloading garbage
+ //
+
+ foreach($this->handles as $category => $v){
+
+ foreach($v as $index => $data){
+
+ foreach($this->handles[$category][$index] as $sublinkindex => $handle){
+
+ preg_match(
+ '/location: ?(.*)$/im',
+ curl_multi_getcontent($handle),
+ $location
+ );
+
+ if(isset($location[1])){
+
+ if($sublinkindex === 0){
+
+ $collection[$category][$index]["url"] = trim($location[1]);
+ }else{
+
+ $collection[$category][$index]["sublink"][$sublinkindex - 1]["url"] = trim($location[1]);
+ }
+ }
+
+ curl_multi_remove_handle($this->proc, $handle);
+ curl_close($handle);
+ }
+ }
+ }
+
+ curl_multi_close($this->proc);
+ }
+
+ private function resolve_images($proxy, &$data){
+
+ // get the image viewer that contains all of the images direct URLs
+ // for some reason, getting the second image's url in the set
+ // doesnt trigger the captcha
+
+ if(
+ !isset($data["image"][1]["url"]) ||
+ preg_match(
+ '/^https:\/\/image\.baidu\.com\/search\/detail/',
+ $data["image"][1]["url"]
+ ) === 0
+ ){
+
+ // we have an already resolved image link, do nothing
+ return;
+ }
+
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ $data["image"][1]["url"],
+ []
+ );
+ }catch(Exception $error){
+
+ // fallback to the limited dataset we have
+ return;
+ }
+
+ $this->fuckhtml->load($html);
+
+ $script =
+ $this->fuckhtml
+ ->getElementById(
+ "image-detail-data",
+ "script"
+ );
+
+ if($script){
+
+ $json =
+ json_decode(
+ $script["innerHTML"],
+ true
+ );
+
+ if(
+ !isset($json["data"]["images"]) ||
+ count($json["data"]["images"]) === 0
+ ){
+
+ // do nothing
+ return;
+ }
+
+ //
+ // Discard all previously scraped images and use data
+ // from the newly downloaded image carousel
+ // the imageset !!should!! be the same
+ //
+ $data["image"] = [];
+
+ foreach($json["data"]["images"] as $image){
+
+ parse_str(parse_url($image["thumburl"], PHP_URL_QUERY), $thumb_size);
+
+ $data["image"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $image["titleShow"]
+ ),
+ "source" => [
+ [
+ "url" => $image["objurl"],
+ "width" => (int)$image["width"],
+ "height" => (int)$image["height"]
+ ],
+ [ // thumbnail
+ "url" => $image["thumburl"],
+ "width" => (int)$thumb_size["w"],
+ "height" => (int)$thumb_size["h"]
+ ]
+ ],
+ "url" => $image["fromUrl"]
+ ];
+ }
+ }
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ [$json, $proxy] = $this->backend->get($get["npt"], "web");
+
+ $json = json_decode($json, true);
+ $this->cookie = $json["cookie"];
+ $npt_data = $json["req"];
+
+ $npt_data["pn"] = $npt_data["pn"] + 20;
+
+ try{
+
+ $html = $this->get(
+ $proxy,
+ "https://www.baidu.com/s",
+ $npt_data
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ }else{
+
+ //
+ // Get authentication token
+ //
+ $proxy = $this->backend->get_ip();
+
+ // running this will give us shit in $this->cookie
+ // @TODO probably not needed? I get blocked anyways ffs
+ //$this->get($proxy, "https://www.baidu.com", []);
+
+ $npt_data = [
+ "wd" => $get["s"],
+ "rn" => 20
+ ];
+
+ // &gpc=stf%3D0%2C1752638400|stftype%3D2
+ if(
+ $get["older"] !== false ||
+ $get["newer"] !== false
+ ){
+
+ if($get["older"] === false){
+
+ $get["older"] = 0;
+ }
+
+ $npt_data["gpc"] = "stf={$get["older"]},{$get["newer"]}|stftype=2";
+ }
+
+ try{
+
+ $html = $this->get(
+ $proxy,
+ "https://www.baidu.com/s",
+ $npt_data
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ $npt_data["pn"] = 0;
+ }
+
+ return $this->parse_search($proxy, "web", $npt_data, $html);
+ }
+
+ private function parse_search($proxy, $pagetype, $npt_data, $html){
+
+ // @HACK
+ // remove newlines from the html, cause it fucks with fuckhtml
+ $html = str_replace(["\n", "\r"], "", $html);
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $this->fuckhtml->load($html);
+
+ $this->detect_ass();
+
+ $datafields =
+ $this->fuckhtml
+ ->getElementsByAttributeName(
+ "id",
+ "div"
+ );
+
+ //
+ // Get next page
+ //
+ $npt =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "n",
+ "a"
+ );
+
+ if(count($npt) !== 0){
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode([
+ "req" => $npt_data,
+ "cookie" => $this->cookie
+ ]),
+ $pagetype,
+ $proxy
+ );
+ }
+
+ //
+ // Get related searches
+ //
+ $related_container =
+ $this->fuckhtml
+ ->getElementById(
+ "rs_new",
+ $datafields
+ );
+
+ if($related_container){
+
+ $this->fuckhtml->load($related_container);
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "c-color-link",
+ "a"
+ );
+
+ foreach($as as $a){
+
+ $text =
+ explode(
+ ">",
+ $this->fuckhtml
+ ->getTextContent(
+ $a
+ ),
+ 2
+ );
+
+ $out["related"][] = $text[count($text) - 1];
+ }
+ }
+
+ foreach($datafields as $datafield){
+
+ if(
+ !isset($datafield["attributes"]["id"]) ||
+ preg_match(
+ '/^[0-9]+$/',
+ $datafield["attributes"]["id"]
+ ) === 0
+ ){
+
+ // not a search result
+ continue;
+ }
+
+ $this->fuckhtml->load($datafield);
+ $div =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "div"
+ );
+
+ //
+ // Don't parse as a search result if it's a card
+ //
+ $card =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "cosc-card",
+ $div
+ );
+
+ if(count($card) !== 0){
+
+ //
+ // Parse chinese youtube shorts
+ //
+ $ytshorts_probe =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "tts-b-item",
+ $div
+ );
+
+ if(count($ytshorts_probe) !== 0){
+
+ $videos =
+ $this->fuckhtml
+ ->getElementsByAttributeValue(
+ "data-show",
+ "list",
+ $div
+ );
+
+ foreach($videos as $video){
+
+ $this->fuckhtml->load($video);
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "cosc-title-slot",
+ "span"
+ );
+
+ if(count($title) === 0){
+
+ continue;
+ }
+
+ $url =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ if(count($url) === 0){
+
+ continue;
+ }
+
+ $image =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "cos-image-body",
+ "img"
+ );
+
+ if(count($image) === 0){
+
+ $image = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }else{
+
+ $image = [
+ "ratio" => "1:1",
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $image[0]["attributes"]["src"]
+ )
+ ];
+ }
+
+ // get duration
+ $divs =
+ $this->fuckhtml
+ ->getElementsByAttributeName(
+ "class",
+ "div"
+ );
+
+ $duration = null;
+ foreach($divs as $probe){
+
+ if(strpos($probe["attributes"]["class"], "tag-bottom-right") !== false){
+
+ $duration =
+ $this->hms2int(
+ $this->fuckhtml
+ ->getTextContent(
+ $probe
+ )
+ );
+ break;
+ }
+ }
+
+ $out["video"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ ),
+ "description" => null,
+ "date" => null,
+ "duration" => $duration,
+ "views" => null,
+ "thumb" => $image,
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $url[0]["attributes"]["href"]
+ )
+ ];
+ }
+ }
+
+ //
+ // Parse image carousel
+ //
+ $is_image_carousel = false;
+ foreach($div as $d){
+
+ if(
+ isset($d["attributes"]["class"]) &&
+ strpos($d["attributes"]["class"], "image-container") !== false
+ ){
+
+ $is_image_carousel = true;
+ break;
+ }
+ }
+
+ if($is_image_carousel){
+
+ preg_match(
+ '/<!--s-data:([\S\s]*)-->/U',
+ $datafield["innerHTML"],
+ $matches
+ );
+
+ if(isset($matches[1])){
+
+ // weird behavior with the smaller image carousel where --cos* CSS variables are escaped wrong
+ $json =
+ $this->fuckhtml
+ ->parseJsObject(
+ str_replace(
+ "-\-",
+ "--",
+ $matches[1]
+ )
+ );
+
+ if(
+ $json !== null &&
+ isset($json["imageList"][0]["images"])
+ ){
+
+ // parse image carousel
+ foreach($json["imageList"][0]["images"] as $image){
+
+ parse_str(parse_url($image["thumburl"], PHP_URL_QUERY), $thumb_size);
+
+ $out["image"][] = [
+ "title" => "image",
+ "source" => [
+ [
+ "url" => $image["objurl"],
+ "width" => (int)$image["width"],
+ "height" => (int)$image["height"]
+ ],
+ [ // thumbnail
+ "url" => $image["thumburl"],
+ "width" => (int)$thumb_size["w"],
+ "height" => (int)$thumb_size["h"]
+ ]
+ ],
+ "url" => $image["jumpUrl"]
+ ];
+ }
+ }
+ }
+ }
+ continue;
+ }
+
+ if(!isset($datafield["attributes"]["mu"])){
+
+ // dont scrape if we dont have the direct link
+ continue;
+ }
+
+ // class:FYB_RD -> News garbage, IGNORE
+
+ $result =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "result",
+ [$datafield]
+ );
+
+ if(count($result) !== 0){
+
+ //
+ // Parse normal search result
+ //
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "sc-link",
+ "a"
+ );
+
+ if(count($title) === 0){
+
+ // should not happen
+ continue;
+ }
+
+ $title =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ )
+ );
+
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "c-color",
+ $div
+ );
+
+ if(count($description) !== 0){
+
+ $this->fuckhtml->load($description[0]);
+
+ $description =
+ $this->fuckhtml
+ ->getElementsByAttributeName(
+ "class",
+ "span"
+ );
+
+ $found_desc = false;
+ foreach($description as $desc){
+
+ if(stripos($desc["attributes"]["class"], "summary-text") !== false){
+
+ $found_desc = true;
+ $description =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $desc
+ )
+ );
+ break;
+ }
+ }
+
+ if($found_desc === false){
+
+ $description = null;
+ }
+
+ $this->fuckhtml->load($datafield);
+ }else{
+
+ $description = null;
+ }
+
+ // parse date
+ $date_probe =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "cos-color-text-minor",
+ "span"
+ );
+
+ if(count($date_probe) !== 0){
+
+ $date =
+ $this->parse_time(
+ $this->fuckhtml
+ ->getTextContent(
+ $date_probe[0]
+ )
+ );
+ }else{
+
+ $date = null;
+ }
+
+ // parse image
+ $img =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "img"
+ );
+
+ if(count($img) !== 0){
+
+ $image = [
+ "ratio" => "16:9",
+ "url" =>
+ $this->unfuckthumb(
+ $this->fuckhtml
+ ->getTextContent(
+ $img[0]["attributes"]["src"]
+ )
+ )
+ ];
+ }else{
+
+ $image = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ // get page type
+ $pagetype_probe =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "b"
+ );
+
+ $pagetype = "web";
+ foreach($pagetype_probe as $probe){
+
+ $pagetype =
+ strtolower(
+ trim(
+ $this->fuckhtml
+ ->getTextContent(
+ $probe
+ ),
+ " 【】"
+ )
+ );
+ }
+
+ // get extra links
+ $sublinks = [];
+
+ foreach($div as $d){
+
+ if(
+ isset($d["attributes"]["class"]) &&
+ strpos($d["attributes"]["class"], "exta-link") !== false
+ ){
+
+ $this->fuckhtml->load($d);
+
+ $links =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "cos-space-mt-xs",
+ "div"
+ );
+
+ foreach($links as $link){
+
+ $this->fuckhtml->load($link);
+ $s_title =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "h3"
+ );
+
+ if(count($s_title) === 0){
+
+ // should not happen
+ continue;
+ }
+
+ $data2 =
+ json_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $s_title[0]["attributes"]["data-click"]
+ ),
+ true
+ );
+
+ if(!isset($data2["clk_info"])){
+
+ // wtf
+ continue;
+ }
+
+ $data2 =
+ json_decode(
+ $data2["clk_info"],
+ true
+ );
+
+ if(!isset($data2["url"])){
+
+ // no link, fuck off
+ continue;
+ }
+
+ $url =
+ rawurldecode(
+ $data2["url"]
+ );
+
+ $data =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "p"
+ );
+
+ $s_description = null;
+
+ if(count($data) !== 0){
+
+ $data =
+ json_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $data[0]["attributes"]["sub-show-log"]
+ ),
+ true
+ );
+
+ if(isset($data["ext"]["content"])){
+
+ $s_description = $data["ext"]["content"];
+ }
+ }
+
+ $sublinks[] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $s_title[0]
+ ),
+ "description" => $s_description,
+ "url" => $url,
+ "date" => null
+ ];
+ }
+ break;
+ }
+ }
+
+ $out["web"][] = [
+ "title" => $title,
+ "description" => $description,
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $datafield["attributes"]["mu"]
+ ),
+ "date" => $date,
+ "type" => $pagetype,
+ "thumb" => $image,
+ "sublink" => $sublinks,
+ "table" => []
+ ];
+
+ continue;
+ }
+
+ // parse special result
+ $result =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "result-op",
+ [$datafield]
+ );
+
+ if(count($result) !== 0){
+
+ //
+ // Parse video carousel
+ //
+ if(
+ isset($datafield["attributes"]["tpl"]) &&
+ stripos($datafield["attributes"]["tpl"], "video") !== false
+ ){
+
+ preg_match(
+ '/<!--s-data:([\S\s]*)-->/U',
+ $datafield["innerHTML"],
+ $matches
+ );
+
+ if(isset($matches[1])){
+
+ $json =
+ json_decode(
+ $matches[1],
+ true
+ );
+
+ if($json !== null){
+
+ foreach($json["videoList"] as $video){
+
+ $out["video"][] = [
+ "title" => $video["title"],
+ "description" =>
+ $this->titledots(
+ $video["desc"]
+ ),
+ "date" =>
+ $this->parse_time(
+ $video["pubTime"]
+ ),
+ "duration" =>
+ $this->hms2int(
+ $video["duration"]
+ ),
+ "views" =>
+ $this->parse_viewcount(
+ $video["playCount"]
+ ),
+ "thumb" => [
+ "ratio" => "16:9",
+ "url" => $video["poster"]
+ ],
+ "url" => $video["bindProps"]["link"]
+ ];
+ }
+ }
+ }
+ continue;
+ }
+
+ //
+ // Special result div (wiki entries, rich divs)
+ //
+ $title =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "h3"
+ );
+
+ if(count($title) === 0){
+
+ // should have a title somewhere
+ continue;
+ }
+
+ $title =
+ explode(
+ ">",
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ ),
+ 2
+ );
+
+ if(count($title) === 2){
+
+ $title = $title[1];
+ }else{
+
+ $title = $title[0];
+ }
+
+ // probe for wiki-like entry
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "sc-paragraph",
+ "p"
+ );
+
+ if(count($description) === 0){
+
+ // try and get grey description
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "c-color-gray2",
+ "p"
+ );
+
+ if(count($description) === 0){
+
+ // probe for special social media description
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "c-color-text",
+ "div"
+ );
+
+ if(isset($description[0]["attributes"]["aria-label"])){
+
+ $description =
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ ["attributes"]
+ ["aria-label"]
+ );
+ }else{
+
+ // check for news tab description
+ $span =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "c-font-normal",
+ "span"
+ );
+
+ $description = null;
+
+ foreach($span as $s){
+
+ if(isset($s["attributes"]["aria-label"])){
+
+ $description =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $span[count($span) - 1]
+ )
+ );
+
+ break;
+ }
+ }
+ }
+ }else{
+
+ $description =
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ );
+ }
+
+ }else{
+
+ preg_match(
+ '/<!--s-text-->([\S\s]*)<!--\/s-text-->/U',
+ $description[count($description) - 1]["innerHTML"],
+ $matches
+ );
+
+ if(isset($matches[1])){
+
+ $description =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $matches[1]
+ )
+ );
+ }else{
+
+ $description = null;
+ }
+ }
+
+ // get thumbnail
+ $thumb =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "img"
+ );
+
+ if(count($thumb) !== 0){
+
+ $thumb = [
+ "ratio" => "1:1",
+ "url" =>
+ $this->unfuckthumb(
+ $this->fuckhtml
+ ->getTextContent(
+ $thumb[0]["attributes"]["src"]
+ )
+ )
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ // get sublinks
+ preg_match(
+ '/<!--s-data:([\S\s]*)-->/U',
+ $datafield["innerHTML"],
+ $matches
+ );
+
+ $sublinks = [];
+
+ if(isset($matches[1])){
+
+ $json =
+ json_decode(
+ $matches[1],
+ true
+ );
+
+ if($json !== null){
+
+ if(isset($json["buttons"])){
+
+ foreach($json["buttons"] as $button){
+
+ $sublinks[] = [
+ "title" => $button["text"],
+ "description" => null,
+ "date" => null,
+ "url" => $button["url"]
+ ];
+ }
+ }elseif(isset($json["mthreadList"])){
+
+ foreach($json["mthreadList"] as $thread){
+
+ $sublinks[] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $thread["title"]
+ ),
+ "description" => null,
+ "date" => null,
+ "url" => $thread["ttsInfo"]["titleUrl"]
+ ];
+ }
+ }
+ }
+ }
+
+ // get URL
+ // handle http://fakeurl.baidu.com bullshit
+ $url =
+ $this->fuckhtml
+ ->getTextContent(
+ $datafield["attributes"]["mu"]
+ );
+
+ if(
+ preg_match(
+ '/^https?:\/\/(?:fakeurl|nourl)(?:\.ubs)?\.baidu\.com/',
+ $url
+ )
+ ){
+
+ // we got some bullshit, get jumpUrl instead
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ if(count($as) !== 0){
+
+ $url =
+ $this->fuckhtml
+ ->getTextContent(
+ $as[0]["attributes"]["href"]
+ );
+ }
+ }
+
+ // get xueshu sublinks
+ // get list
+ $xueshu_list =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "op-xueshu-links-d20-list",
+ $div
+ );
+
+ if(count($xueshu_list) !== 0){
+
+ $this->fuckhtml->load($xueshu_list[0]);
+
+ $rows =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "c-row",
+ "div"
+ );
+
+ // remove "read more" bullshit
+ foreach($rows as $row){
+
+ if(strpos($row["attributes"]["class"], "op-xueshu-links-more") !== false){
+
+ $xueshu_list[0]["innerHTML"] =
+ str_replace(
+ $row["outerHTML"],
+ "",
+ $xueshu_list[0]["innerHTML"]
+ );
+ }
+ }
+
+ $this->fuckhtml->load($xueshu_list[0]);
+
+ foreach($rows as $row){
+
+ $this->fuckhtml->load($row);
+
+ if(strpos($row["attributes"]["class"], "op-xueshu-links-more") !== false){
+
+ continue;
+ }
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ foreach($as as $a){
+
+ $sublinks[] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $a
+ )
+ ),
+ "description" => null,
+ "date" => null,
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ )
+ ];
+ }
+ }
+ }
+
+ $out["web"][] = [
+ "title" => $title,
+ "description" => $description,
+ "url" => $url,
+ "date" => null,
+ "type" => "web",
+ "thumb" => $thumb,
+ "sublink" => $sublinks,
+ "table" => []
+ ];
+ continue;
+ }
+ }
+
+ //
+ // Remove tracking URLs and fetch additonal image resources
+ //
+ $this->resolve_urls($proxy, $out, ["web", "video"]);
+ $this->resolve_images($proxy, $out);
+
+ return $out;
+ }
+
+ public function image($get){
+
+ // https://image.baidu.com/search/acjson?word=asmr&rn=60&pn=0&newReq=1
+ //$json = file_get_contents("scraper/baidu_img.json");
+
+ if($get["npt"]){
+
+ [$params, $proxy] = $this->backend->get($get["npt"], "images");
+ $params = json_decode($params, true);
+
+ $params["pn"] = $params["pn"] + 60;
+
+ }else{
+
+ $proxy = $this->backend->get_ip();
+ $params = [
+ "word" => $get["s"],
+ "rn" => 60, // results/page
+ "pn" => 0, // item increment (0 * 60)
+ "newReq" => 1 // otherwise json is fucked up
+ ];
+
+ switch($get["sort"]){
+
+ case "latest": $params["latest"] = 1; break;
+ case "hot": $params["hot"] = 1; break;
+ }
+
+ if($get["size"] != "any"){
+
+ $params["z"] = $get["size"];
+ }
+
+ if($get["ratio"] != "any"){
+
+ $params["imgratio"] = $get["ratio"];
+ }
+
+ if($get["format"] != "any"){
+
+ $params["imgformat"] = $get["format"];
+ }
+
+ if($get["color"] != "any"){
+
+ $params["ic"] = $get["color"];
+ }
+
+ switch($get["type"]){
+
+ case "hd": $params["hd"] = 1; break;
+ case "isImgSet": $params["isImgSet"] = 1; break;
+ case "copyright": $params["copyright"] = 1; break;
+ }
+ }
+
+ try{
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://image.baidu.com/search/acjson",
+ $params,
+ "https://image.baidu.com/search/index?tn=baiduimage&word=" . urlencode($get["s"])
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ // detect captcha first
+ $this->fuckhtml->load($json);
+ $this->detect_ass();
+
+ // fallback to json decode error
+ throw new Exception("Failed to decode JSON");
+ }
+
+ if(
+ isset($json["message"]) &&
+ $json["message"] != "success"
+ ){
+
+ throw new Exception("Baidu returned an error: {$json["message"]}");
+ }
+
+ if(!isset($json["data"]["images"])){
+
+ throw new Exception("Baidu did not return an image object");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ foreach($json["data"]["images"] as $image){
+
+ parse_str(parse_url($image["thumburl"], PHP_URL_QUERY), $thumb_size);
+
+ $out["image"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $image["titleShow"]
+ ),
+ "source" => [
+ [
+ "url" => $image["objurl"],
+ "width" => (int)$image["width"],
+ "height" => (int)$image["height"]
+ ],
+ [ // thumbnail
+ "url" => $image["thumburl"],
+ "width" => (int)$thumb_size["w"],
+ "height" => (int)$thumb_size["h"]
+ ]
+ ],
+ "url" => $image["fromUrl"]
+ ];
+ }
+
+ //
+ // Detect if there's a next page
+ //
+ if((int)$json["data"]["totalNum"] >= $params["pn"] + 60){
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($params),
+ "images",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+ public function video($get){
+
+ // https://www.baidu.com/sf/vsearch?pd=video&tn=vsearch&wd=jak%2Band%2Bdaxter&async=1&pn=0
+ // increase &pn +20 for pagination
+
+ //$html = file_get_contents("scraper/baidu_vid.html");
+
+ if($get["npt"]){
+
+ [$params, $proxy] = $this->backend->get($get["npt"], "videos");
+ $params = json_decode($params, true);
+
+ $params["pn"] = $params["pn"] + 10;
+ }else{
+
+ $proxy = $this->backend->get_ip();
+ $params = [
+ "pd" => "video",
+ "tn" => "vsearch",
+ "wd" => $get["s"],
+ "async" => 1,
+ "pn" => 0
+ ];
+ }
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://www.baidu.com/sf/vsearch",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get search page");
+ }
+
+ $html =
+ str_replace(
+ ["\r", "\n"],
+ "",
+ $html
+ );
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ $html = explode("<script>", $html);
+
+ foreach($html as $result){
+
+ $result = trim($result);
+
+ $this->fuckhtml->load($result);
+
+ // get URL
+ preg_match(
+ '/<!-- *([^ ]*) *-->/',
+ $result,
+ $matches
+ );
+
+ if(!isset($matches[1])){
+
+ // no link, give up
+ continue;
+ }
+
+ $link = $matches[1];
+
+ // get title
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "video-title",
+ "a"
+ );
+
+ if(count($title) === 0){
+
+ // should not happen
+ continue;
+ }
+
+ $title =
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ );
+
+ // get thumbnail
+ $img =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "border-radius",
+ "img"
+ );
+
+ if(count($img) !== 0){
+
+ $thumb = [
+ "url" =>
+ $this->unfuckthumb(
+ $this->fuckhtml
+ ->getTextContent(
+ $img[0]["attributes"]["src"]
+ )
+ ),
+ "ratio" => "16:9"
+ ];
+ }else{
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ $span =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "span"
+ );
+
+ // get duration
+ $duration =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "video_play_timer",
+ $span
+ );
+
+ if(count($duration) !== 0){
+
+ $duration =
+ $this->hms2int(
+ $this->fuckhtml
+ ->getTextContent(
+ $duration[0]
+ )
+ );
+ }else{
+
+ $duration = null;
+ }
+
+ // get author
+ // 来源:哔哩哔哩
+ $author =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "wetSource",
+ $span
+ );
+
+ if(count($author) !== 0){
+
+ $author =
+ explode(
+ ":",
+ $this->fuckhtml
+ ->getTextContent(
+ $author[0]
+ ),
+ 2
+ )[1];
+ }else{
+
+ $author = null;
+ }
+
+ // get date posted
+ //发布时间:2024-05-06
+
+ // AND get description
+ // 简介:Our first look
+ $infospans =
+ array_merge(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "c-font-normal",
+ $span
+ ),
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "c-font-normal",
+ "div"
+ )
+ );
+
+ $date = null;
+ $description = null;
+
+ foreach($infospans as $infospan){
+
+ $infospan =
+ explode(
+ ":",
+ $this->fuckhtml
+ ->getTextContent(
+ $infospan
+ ),
+ 2
+ );
+
+ if(count($infospan) !== 2){
+
+ // should not happen
+ continue;
+ }
+
+ $infospan[1] =
+ $this->fuckhtml
+ ->getTextContent(
+ $infospan[1]
+ );
+
+ switch($infospan[0]){
+
+ case "发布时间": // date posted
+ $date = $this->parse_time($infospan[1]);
+ break;
+
+ case "简介": // description
+ $description = $infospan[1];
+ break;
+ }
+ }
+
+ $out["video"][] = [
+ "title" => $this->titledots($title),
+ "description" => $this->titledots($description),
+ "author" => [
+ "name" => $author,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => $date,
+ "duration" => $duration,
+ "views" => null,
+ "thumb" => $thumb,
+ "url" => $link
+ ];
+ }
+
+ if(count($out["video"]) === 10){
+
+ // assume there's another page after this
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($params),
+ "videos",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+ public function news($get){
+
+ //$proxy = $this->backend->get_ip();
+ //$html = file_get_contents("scraper/baidu.html");
+ //$npt_data = [];
+
+ if($get["npt"]){
+
+ [$json, $proxy] = $this->backend->get($get["npt"], "news");
+
+ $json = json_decode($json, true);
+ $this->cookie = $json["cookie"];
+ $npt_data = $json["req"];
+
+ $npt_data["pn"] = $npt_data["pn"] + 20;
+
+ try{
+
+ $html = $this->get(
+ $proxy,
+ "https://www.baidu.com/s",
+ $npt_data
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ }else{
+
+ //
+ // Get authentication token
+ //
+ $proxy = $this->backend->get_ip();
+
+ $npt_data = [
+ "wd" => $get["s"],
+ "rn" => 20,
+ "tn" => "news"
+ ];
+
+ // @TODO add filters
+
+ try{
+
+ $html = $this->get(
+ $proxy,
+ "https://www.baidu.com/s",
+ $npt_data
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ $npt_data["pn"] = 0;
+ }
+
+ $data = $this->parse_search($proxy, "news", $npt_data, $html);
+
+ $out = [
+ "status" => "ok",
+ "npt" => $data["npt"],
+ "news" => []
+ ];
+
+ foreach($data["web"] as $article){
+
+ $out["news"][] = [
+ "title" => $article["title"],
+ "author" => null,
+ "description" => $article["description"],
+ "date" => $article["date"],
+ "thumb" => [
+ "url" => $article["thumb"]["url"],
+ "ratio" => $article["thumb"]["url"] !== null ? "16:9" : null,
+ ],
+ "url" => $article["url"]
+ ];
+ }
+
+ return $out;
+ }
+
+ private function unfuckthumb($url){
+
+ // probe for proxy URL
+ $parsed_url = parse_url($url);
+ if(
+ preg_match(
+ '/^https?:\/\/gimg(?:[0-9]+)?\.baidu\.com/',
+ $url
+ )
+ ){
+
+ $parts = explode("src=", $url);
+ if(count($parts) !== 2){
+
+ // shits fucked
+ return $url;
+ }
+
+ return urldecode(explode("&", $parts[1])[0]);
+ }
+
+ $q = explode("&", $url, 2);
+
+ if(count($q) !== 2){
+
+ // shits fucked, again
+ return $url;
+ }
+
+ // baidu devs are fucking retarded and dont follow spec:
+ // &fmt=auto?s=BB32F3A050471AEC72886934030090C4&sec=1753203600&t=0fb2194775d3bd3d1bb114b818479e0a
+ parse_str(str_replace("?", "&", $q[1]), $query);
+
+ if(isset($query["size"])){ unset($query["size"]); }
+ if(isset($query["q"])){ $query["q"] = "90"; }
+
+ $query = http_build_query($query);
+
+ return
+ str_replace(
+ $q[1],
+ $query,
+ $url
+ );
+ }
+
+ private function titledots($title){
+
+ return trim($title, " .\t\n\r\0\x0B…");
+ }
+
+ private function hms2int($time){
+
+ $parts = explode(":", $time, 3);
+ $time = 0;
+
+ if(count($parts) === 3){
+
+ // hours
+ $time = $time + ((int)$parts[0] * 3600);
+ array_shift($parts);
+ }
+
+ if(count($parts) === 2){
+
+ // minutes
+ $time = $time + ((int)$parts[0] * 60);
+ array_shift($parts);
+ }
+
+ // seconds
+ $time = $time + (int)$parts[0];
+
+ return $time;
+ }
+
+ private function parse_viewcount($views){
+
+ if(
+ // 10k (wtf lol)
+ preg_match(
+ '/([0-9]+)万次/',
+ $views,
+ $matches
+ )
+ ){
+
+ return (int)$matches[1] * 10000;
+ }
+
+ if(
+ // units
+ preg_match(
+ '/([0-9]+)次/',
+ $views,
+ $matches
+ )
+ ){
+
+ return (int)$matches[1];
+ }
+
+ return null;
+ }
+
+ private function parse_time($time){
+
+ // 2023年8月7日 => yyyy/m/d
+ if(
+ preg_match(
+ '/([0-9]{4})年([0-9]{1,2})月([0-9]{1,2})日/',
+ $time,
+ $matches
+ )
+ ){
+
+ return strtotime("{$matches[1]}/{$matches[2]}/{$matches[3]}");
+ }
+
+ // 昨天11:45 => yesterday at 11:45
+ // 昨天 => yesterday
+ if(
+ preg_match(
+ '/昨天(.*)/',
+ $time,
+ $matches
+ )
+ ){
+
+ return strtotime("Yesterday {$matches[1]}");
+ }
+
+ // 3天前 => 3 days ago
+ if(
+ preg_match(
+ '/([0-9]{1,4})天前/',
+ $time,
+ $matches
+ )
+ ){
+
+ return strtotime("{$matches[1]} days ago");
+ }
+
+ // 1个月前 => 1 month ago
+ if(
+ preg_match(
+ '/([0-9]{1,4})个月前/',
+ $time,
+ $matches
+ )
+ ){
+
+ return strtotime("{$matches[1]} months ago");
+ }
+
+ // attempt to parse as-is
+ $time = strtotime($time);
+
+ if($time !== false){
+
+ return $time;
+ }
+
+ return null;
+ }
+
+ private function detect_ass(){
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ if(
+ count($as) === 0 ||
+ preg_match(
+ '/^https?:\/\/wappass\.baidu\.com\/static\/captcha/',
+ $this->fuckhtml
+ ->getTextContent(
+ $as[0]["attributes"]["href"]
+ )
+ )
+ ){
+
+ throw new Exception("Baidu returned a Captcha");
+ }
+ }
+}
diff --git a/scraper/brave.php b/scraper/brave.php
new file mode 100644
index 0000000..e6f5908
--- /dev/null
+++ b/scraper/brave.php
@@ -0,0 +1,1860 @@
+<?php
+
+class brave{
+
+ public function __construct(){
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ include "lib/backend.php";
+ $this->backend = new backend("brave");
+ }
+
+ public function getfilters($page){
+
+ switch($page){
+
+ case "web":
+ return [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "all" => "All Regions",
+ "ar" => "Argentina",
+ "au" => "Australia",
+ "at" => "Austria",
+ "be" => "Belgium",
+ "br" => "Brazil",
+ "ca" => "Canada",
+ "cl" => "Chile",
+ "cn" => "China",
+ "dk" => "Denmark",
+ "fi" => "Finland",
+ "fr" => "France",
+ "de" => "Germany",
+ "hk" => "Hong Kong",
+ "in" => "India",
+ "id" => "Indonesia",
+ "it" => "Italy",
+ "jp" => "Japan",
+ "kr" => "Korea",
+ "my" => "Malaysia",
+ "mx" => "Mexico",
+ "nl" => "Netherlands",
+ "nz" => "New Zealand",
+ "no" => "Norway",
+ "pl" => "Poland",
+ "pt" => "Portugal",
+ "ph" => "Philippines",
+ "ru" => "Russia",
+ "sa" => "Saudi Arabia",
+ "za" => "South Africa",
+ "es" => "Spain",
+ "se" => "Sweden",
+ "ch" => "Switzerland",
+ "tw" => "Taiwan",
+ "tr" => "Turkey",
+ "gb" => "United Kingdom",
+ "us" => "United States"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "newer" => [
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ],
+ "spellcheck" => [
+ "display" => "Spellcheck",
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ]
+ ];
+ break;
+
+ case "images":
+ case "videos":
+ case "news":
+ return [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "all" => "All regions",
+ "ar" => "Argentina",
+ "au" => "Australia",
+ "at" => "Austria",
+ "be" => "Belgium",
+ "br" => "Brazil",
+ "ca" => "Canada",
+ "cl" => "Chile",
+ "cn" => "China",
+ "dk" => "Denmark",
+ "fi" => "Finland",
+ "fr" => "France",
+ "de" => "Germany",
+ "hk" => "Hong Kong",
+ "in" => "India",
+ "id" => "Indonesia",
+ "it" => "Italy",
+ "jp" => "Japan",
+ "kr" => "Korea",
+ "my" => "Malaysia",
+ "mx" => "Mexico",
+ "nl" => "Netherlands",
+ "nz" => "New Zealand",
+ "no" => "Norway",
+ "pl" => "Poland",
+ "pt" => "Portugal",
+ "ph" => "Philippines",
+ "ru" => "Russia",
+ "sa" => "Saudi Arabia",
+ "za" => "South Africa",
+ "es" => "Spain",
+ "se" => "Sweden",
+ "ch" => "Switzerland",
+ "tw" => "Taiwan",
+ "tr" => "Turkey",
+ "gb" => "United Kingdom",
+ "us" => "United States"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "spellcheck" => [
+ "display" => "Spellcheck",
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ]
+ ];
+ break;
+ }
+ }
+
+ private function get($proxy, $url, $get = [], $nsfw, $country){
+
+ switch($nsfw){
+
+ case "yes": $nsfw = "off"; break;
+ case "maybe": $nsfw = "moderate"; break;
+ case "no": $nsfw = "strict"; break;
+ }
+
+ if($country == "any"){
+
+ $country = "all";
+ }
+
+ $headers = [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Cookie: safesearch={$nsfw}; country={$country}; useLocation=0; summarizer=0",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"
+ ];
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ private function get_js(){
+
+ $script_disc =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "script"
+ );
+
+ $data = null;
+ foreach($script_disc as &$discs){
+
+ if(
+ preg_match(
+ '/kit\.start\(/',
+ $discs["innerHTML"]
+ )
+ ){
+
+ $data =
+ explode(
+ "data:",
+ $discs["innerHTML"],
+ 2
+ );
+
+ if(count($data) !== 2){
+
+ throw new Exception("Failed to split up data field");
+ }
+
+ $data = $data[1];
+ break;
+ }
+ }
+
+ if($data === null){
+
+ throw new Exception("Could not grep JavaScript object");
+ }
+
+ $data =
+ $this->fuckhtml
+ ->parseJsObject(
+ $this->fuckhtml
+ ->extract_json(
+ $data
+ )
+ );
+
+ if($data === null){
+
+ throw new Exception("Failed to decode JavaScript object");
+ }
+
+ return $data;
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ // get next page data
+ [$q, $proxy] = $this->backend->get($get["npt"], "web");
+
+ $q = json_decode($q, true);
+
+ $search = $q["q"];
+ $q["spellcheck"] = "0";
+
+ $nsfw = $q["nsfw"];
+ unset($q["nsfw"]);
+
+ $country = $q["country"];
+ unset($q["country"]);
+
+ }else{
+
+ // get _GET data instead
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ if(strlen($search) > 2048){
+
+ throw new Exception("Search term is too long!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $nsfw = $get["nsfw"];
+ $country = $get["country"];
+ $older = $get["older"];
+ $newer = $get["newer"];
+ $spellcheck = $get["spellcheck"];
+
+ $q = [
+ "q" => $search
+ ];
+
+ /*
+ Pass older/newer filters to brave
+ */
+ if($newer !== false){
+
+ $newer = date("Y-m-d", $newer);
+
+ if($older === false){
+
+ $older = date("Y-m-d", time());
+ }
+ }
+
+ if(
+ is_string($older) === false &&
+ $older !== false
+ ){
+
+ $older = date("Y-m-d", $older);
+
+ if($newer === false){
+
+ $newer = "1970-01-02";
+ }
+ }
+
+ if($older !== false){
+
+ $q["tf"] = "{$newer}to{$older}";
+ }
+
+ // spellcheck
+ if($spellcheck == "no"){
+
+ $q["spellcheck"] = "0";
+ }
+ }
+ /*
+ $handle = fopen("scraper/brave.html", "r");
+ $html = fread($handle, filesize("scraper/brave.html"));
+ fclose($handle);*/
+
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://search.brave.com/search",
+ $q,
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch search page");
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ // load html
+ $this->fuckhtml->load($html);
+
+ /*
+ Get next page "token"
+ */
+ $nextpage =
+ $this->fuckhtml
+ ->getElementById(
+ "pagination",
+ "div"
+ );
+
+ if($nextpage){
+
+ $this->fuckhtml->load($nextpage);
+
+ $nextpage =
+ $this->fuckhtml
+ ->getElementsByClassName("button", "a");
+
+ if(count($nextpage) !== 0){
+
+ $nextpage =
+ $nextpage[count($nextpage) - 1];
+
+ if(
+ strtolower(
+ $this->fuckhtml
+ ->getTextContent(
+ $nextpage
+ )
+ ) == "next"
+ ){
+
+ preg_match(
+ '/offset=([0-9]+)/',
+ $this->fuckhtml->getTextContent($nextpage["attributes"]["href"]),
+ $nextpage
+ );
+
+ $q["offset"] = (int)$nextpage[1];
+ $q["nsfw"] = $nsfw;
+ $q["country"] = $country;
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($q),
+ "web",
+ $proxy
+ );
+ }
+ }
+ }
+
+ // do some magic
+ $this->fuckhtml->load($html);
+ $data = $this->get_js();
+
+ if(
+ isset($data[2]["data"]["title"]) &&
+ stripos($data[2]["data"]["title"], "PoW Captcha") !== false
+ ){
+
+ throw new Exception("Brave returned a PoW captcha");
+ }
+
+ if(!isset($data[1]["data"]["body"]["response"])){
+
+ throw new Exception("Brave did not return a result object");
+ }
+
+ $data = $data[1]["data"]["body"]["response"];
+
+ /*
+ Get web results
+ */
+ if(!isset($data["web"]["results"])){
+
+ return $out;
+ }
+
+ foreach($data["web"]["results"] as $result){
+
+ if(
+ isset($result["thumbnail"]) &&
+ is_array($result["thumbnail"])
+ ){
+
+ $thumb = [
+ "ratio" => $result["thumbnail"]["logo"] == "false" ? "16:9" : "1:1",
+ "url" => $result["thumbnail"]["original"]
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ // get sublinks
+ $sublink = [];
+ if(
+ isset($result["cluster"]) &&
+ is_array($result["cluster"])
+ ){
+
+ foreach($result["cluster"] as $cluster){
+
+ $sublink[] = [
+ "title" => $this->titledots($cluster["title"]),
+ "description" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $cluster["description"]
+ )
+ ),
+ "url" => $cluster["url"],
+ "date" => null
+ ];
+ }
+ }
+
+ // more sublinks
+ if(
+ isset($result["deep_results"]) &&
+ is_array($result["deep_results"])
+ ){
+
+ foreach($result["deep_results"]["buttons"] as $r){
+
+ $sublink[] = [
+ "title" => $this->titledots($r["title"]),
+ "description" => null,
+ "url" => $r["url"],
+ "date" => null
+ ];
+ }
+ }
+
+ // parse table elements
+ $table = [];
+
+ /*
+ [locations] => void 0 Done
+ [video] => void 0 Done
+ [movie] => void 0 Done
+ [faq] => void 0
+ [recipe] => void 0
+ [qa] => void 0 Not needed
+ [book] => void 0
+ [rating] => void 0
+ [article] => void 0
+ [product] => void 0 Done
+ [product_cluster] => void 0
+ [cluster_type] => void 0
+ [cluster] => void 0 Done
+ [creative_work] => void 0 Done
+ [music_recording] => void 0
+ [review] => void 0 Done
+ [software] => void 0 Done
+ [content_type] => void 0
+ [descriptionLength] => 271
+ */
+
+ // product
+ // creative_work
+ $ref = null;
+
+ if(isset($result["product"])){
+
+ $ref = &$result["product"];
+ }elseif(isset($result["creative_work"])){
+
+ $ref = &$result["creative_work"];
+ }
+
+ if($ref !== null){
+
+ if(isset($ref["offers"])){
+
+ foreach($ref["offers"] as $offer){
+
+ $price = null;
+
+ if(isset($offer["price"])){
+
+ if((float)$offer["price"] == 0){
+
+ $price = "Free";
+ }else{
+
+ $price = $offer["price"];
+ }
+ }
+
+ if($price !== "Free"){
+ if(isset($offer["priceCurrency"])){
+
+ $price .= " " . $offer["priceCurrency"];
+ }
+ }
+
+ if($price !== null){
+
+ $table["Price"] = trim($price);
+ }
+ }
+ }
+
+ if(isset($ref["rating"])){
+
+ $rating = null;
+ if(isset($ref["rating"]["ratingValue"])){
+
+ $rating = $ref["rating"]["ratingValue"];
+
+ if(isset($ref["rating"]["bestRating"])){
+
+ $rating .= "/" . $ref["rating"]["bestRating"];
+ }
+ }
+
+ if(isset($ref["rating"]["reviewCount"])){
+
+ $isnull = $rating === null ? false : true;
+
+ if($isnull){
+
+ $rating .= " (";
+ }
+
+ $rating .= number_format($ref["rating"]["reviewCount"]) . " hits";
+
+ if($isnull){
+
+ $rating .= ")";
+ }
+ }
+
+ if($rating !== null){
+
+ $table["Rating"] = $rating;
+ }
+ }
+ }
+
+ // review
+ if(
+ isset($result["review"]) &&
+ is_array($result["review"])
+ ){
+
+ if(isset($result["review"]["rating"]["ratingValue"])){
+
+ $table["Rating"] =
+ $result["review"]["rating"]["ratingValue"] . "/" .
+ $result["review"]["rating"]["bestRating"];
+ }
+ }
+
+ // software
+ if(
+ isset($result["software"]) &&
+ is_array($result["software"])
+ ){
+
+ if(isset($result["software"]["author"])){
+ $table["Author"] = $result["software"]["author"];
+ }
+
+ if(isset($result["software"]["stars"])){
+ $table["Stars"] = number_format($result["software"]["stars"]);
+ }
+
+ if(isset($result["software"]["forks"])){
+ $table["Forks"] = number_format($result["software"]["forks"]);
+ }
+
+ if(
+ isset($result["software"]["programmingLanguage"]) &&
+ $result["software"]["programmingLanguage"] != ""
+ ){
+ $table["Programming languages"] = $result["software"]["programmingLanguage"];
+ }
+ }
+
+ // location
+ if(
+ isset($result["location"]) &&
+ is_array($result["location"])
+ ){
+
+ if(isset($result["location"]["postal_address"]["displayAddress"])){
+
+ $table["Address"] = $result["location"]["postal_address"]["displayAddress"];
+ }
+
+ if(
+ isset($result["location"]["rating"]) &&
+ $result["location"]["rating"] != "void 0"
+ ){
+
+ $table["Rating"] =
+ $result["location"]["rating"]["ratingValue"] . "/" .
+ $result["location"]["rating"]["bestRating"] . " (" .
+ number_format($result["location"]["rating"]["reviewCount"]) . " votes)";
+ }
+
+ if(
+ isset($result["location"]["contact"]["telephone"]) &&
+ $result["location"]["contact"]["telephone"] != "void 0"
+ ){
+
+ $table["Phone number"] =
+ $result["location"]["contact"]["telephone"];
+ }
+
+ if(
+ isset($result["location"]["price_range"]) &&
+ $result["location"]["price_range"] != "void 0"
+ ){
+
+ $table["Price"] =
+ $result["location"]["price_range"];
+ }
+ }
+
+ // video
+ if(
+ isset($result["video"]) &&
+ is_array($result["video"])
+ ){
+
+ foreach($result["video"] as $key => $value){
+
+ if(is_string($result["video"][$key]) === false){
+
+ continue;
+ }
+
+ $table[ucfirst($key)] = $value;
+ }
+ }
+
+ // movie
+ if(
+ isset($result["video"]) &&
+ is_array($result["movie"])
+ ){
+
+ if(isset($result["movie"]["release"])){
+
+ $table["Release date"] = $result["movie"]["release"];
+ }
+
+ if(isset($result["movie"]["directors"])){
+
+ $directors = [];
+
+ foreach($result["movie"]["directors"] as $director){
+
+ $directors[] = $director["name"];
+ }
+
+ if(count($directors) !== 0){
+
+ $table["Directors"] = implode(", ", $directors);
+ }
+ }
+
+ if(isset($result["movie"]["actors"])){
+
+ $actors = [];
+
+ foreach($result["movie"]["actors"] as $actor){
+
+ $actors[] = $actor["name"];
+ }
+
+ if(count($actors) !== 0){
+ $table["Actors"] = implode(", ", $actors);
+ }
+ }
+
+ if(isset($result["movie"]["rating"])){
+
+ $table["Rating"] =
+ $result["movie"]["rating"]["ratingValue"] . "/" .
+ $result["movie"]["rating"]["bestRating"] . " (" .
+ number_format($result["movie"]["rating"]["reviewCount"]) . " votes)";
+ }
+
+ if(isset($result["movie"]["duration"])){
+
+ $table["Duration"] =
+ $result["movie"]["duration"];
+ }
+
+ if(isset($result["movie"]["genre"])){
+
+ $genres = [];
+
+ foreach($result["movie"]["genre"] as $genre){
+
+ $genres[] = $genre;
+ }
+
+ if(count($genres) !== 0){
+ $table["Genre"] = implode(", ", $genres);
+ }
+ }
+ }
+
+ if(
+ isset($result["age"]) &&
+ $result["age"] != "void 0" &&
+ $result["age"] != ""
+ ){
+
+ $date = strtotime($result["age"]);
+ }else{
+
+ $date = null;
+ }
+
+ $out["web"][] = [
+ "title" =>
+ $this->titledots(
+ $result["title"]
+ ),
+ "description" =>
+ isset($result["review"]["description"]) ?
+ $this->limitstrlen(
+ strip_tags(
+ $result["review"]["description"]
+ )
+ ) :
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $result["description"]
+ )
+ ),
+ "url" => $result["url"],
+ "date" => $date,
+ "type" => "web",
+ "thumb" => $thumb,
+ "sublink" => $sublink,
+ "table" => $table
+ ];
+ }
+
+ /*
+ Get spelling autocorrect
+ */
+ if(
+ isset($data["query"]["bo_altered_diff"][0][0]) &&
+ $data["query"]["bo_altered_diff"][0][0] == "true"
+ ){
+ $using = [];
+
+ foreach($data["query"]["bo_altered_diff"] as $diff){
+
+ $using[] = $diff[1];
+ }
+
+ $out["spelling"] = [
+ "type" => "including",
+ "using" => implode(" ", $using),
+ "correction" => $get["s"]
+ ];
+ }
+
+ /*
+ Get wikipedia heads
+ */
+ if(isset($data["infobox"]["results"][0])){
+
+ foreach($data["infobox"]["results"] as $info){
+
+ if($info["subtype"] == "code"){
+
+ $description =
+ $this->stackoverflow_parse($info["data"]["answer"]["text"]);
+
+ if(isset($info["data"]["answer"]["author"])){
+
+ $description[] = [
+ "type" => "quote",
+ "value" => "Answer from " . $info["data"]["answer"]["author"]
+ ];
+ }
+ }else{
+
+ $description = [];
+
+ if(
+ isset($info["description"]) &&
+ $info["description"] != ""
+ ){
+ $description[] = [
+ "type" => "quote",
+ "value" => $info["description"]
+ ];
+ }
+
+ if(
+ isset($info["long_desc"]) &&
+ $info["long_desc"] != ""
+ ){
+ $description[] = [
+ "type" => "text",
+ "value" => $this->titledots($info["long_desc"])
+ ];
+ }
+
+ // parse ratings
+ if(
+ isset($info["ratings"]) &&
+ $info["ratings"] != "void 0" &&
+ is_array($info["ratings"]) &&
+ count($info["ratings"]) !== 0
+ ){
+
+ $description[] = [
+ "type" => "title",
+ "value" => "Ratings"
+ ];
+
+ foreach($info["ratings"] as $rating){
+
+ $description[] = [
+ "type" => "link",
+ "url" => $rating["profile"]["url"],
+ "value" => $rating["profile"]["name"]
+ ];
+
+ $description[] = [
+ "type" => "text",
+ "value" => ": " . $rating["ratingValue"] . "/" . $rating["bestRating"] . "\n"
+ ];
+ }
+ }
+ }
+
+ $table = [];
+ if(isset($info["attributes"])){
+
+ foreach($info["attributes"] as $row){
+
+ if(
+ $row[1] == "null" &&
+ count($table) !== 0
+ ){
+
+ break;
+ }
+
+ if($row[1] == "null"){
+
+ continue;
+ }
+
+ $table[
+ $this->fuckhtml->getTextContent($row[0])
+ ] =
+ $this->fuckhtml->getTextContent($row[1]);
+ }
+ }
+
+ $sublink = [];
+ if(isset($info["profiles"])){
+
+ foreach($info["profiles"] as $row){
+
+ $name = $this->fuckhtml->getTextContent($row["name"]);
+
+ if(strtolower($name) == "steampowered"){
+
+ $name = "Steam";
+ }
+
+ $sublink[
+ $this->fuckhtml->getTextContent($name)
+ ] =
+ $this->fuckhtml->getTextContent($row["url"]);
+ }
+ }
+
+ $out["answer"][] = [
+ "title" => $this->fuckhtml->getTextContent($info["title"]),
+ "description" => $description,
+ "url" => $info["url"],
+ "thumb" => isset($info["images"][0]["original"]) ? $info["images"][0]["original"] : null,
+ "table" => $table,
+ "sublink" => $sublink
+ ];
+
+ break; // only iterate once, we get garbage most of the time
+ }
+ }
+
+ /*
+ Get videos
+ */
+ if(isset($data["videos"]["results"])){
+
+ foreach($data["videos"]["results"] as $video){
+
+ $out["video"][] = [
+ "title" => $this->titledots($video["title"]),
+ "description" => $this->titledots($video["description"]),
+ "date" => isset($video["age"]) && $video["age"] != "void 0" ? strtotime($video["age"]) : null,
+ "duration" => isset($video["video"]["duration"]) && $video["video"]["duration"] != "void 0" ? $this->hms2int($video["video"]["duration"]) : null,
+ "views" => isset($video["video"]["views"]) && $video["video"]["views"] != "void 0" ? (int)$video["video"]["views"] : null,
+ "thumb" =>
+ isset($video["thumbnail"]["src"]) ?
+ [
+ "ratio" => "16:9",
+ "url" => $this->unshiturl($video["thumbnail"]["src"])
+ ] :
+ [
+ "ratio" => null,
+ "url" => null
+ ],
+ "url" => $video["url"]
+ ];
+ }
+ }
+
+ /*
+ Get news
+ */
+ if(isset($data["news"]["results"])){
+
+ foreach($data["news"]["results"] as $news){
+
+ $out["news"][] = [
+ "title" => $this->titledots($news["title"]),
+ "description" => $this->titledots($news["description"]),
+ "date" => isset($news["age"]) ? strtotime($news["age"]) : null,
+ "thumb" =>
+ isset($video["thumbnail"]["src"]) ?
+ [
+ "ratio" => "16:9",
+ "url" => $this->unshiturl($video["thumbnail"]["src"])
+ ] :
+ [
+ "ratio" => null,
+ "url" => null
+ ],
+ "url" => $news["url"]
+ ];
+ }
+ }
+
+ /*
+ Get discussions
+ */
+ $disc_out = [];
+
+ if(isset($data["discussions"]["results"])){
+
+ foreach($data["discussions"]["results"] as $disc){
+
+ $table = [];
+
+ if(isset($disc["data"]["num_votes"])){
+
+ $table["Votes"] = number_format($disc["data"]["num_votes"]);
+ }
+
+ if(isset($disc["data"]["num_answers"])){
+
+ $table["Comments"] = number_format($disc["data"]["num_answers"]);
+ }
+
+ $disc_out[] = [
+ "title" =>
+ $this->titledots(
+ $disc["title"]
+ ),
+ "description" =>
+ $this->limitstrlen(
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $disc["description"]
+ )
+ )
+ ),
+ "url" => $disc["url"],
+ "date" => isset($disc["age"]) ? strtotime($disc["age"]) : null,
+ "type" => "web",
+ "thumb" => [
+ "ratio" => null,
+ "url" => null
+ ],
+ "sublink" => [],
+ "table" => $table
+ ];
+ }
+ }
+
+ // append discussions at position 2
+ array_splice($out["web"], 1, 0, $disc_out);
+
+ return $out;
+ }
+
+ public function news($get){
+
+ if($get["npt"]){
+
+ [$req, $proxy] = $this->backend->get($get["npt"], "news");
+
+ $req = json_decode($req, true);
+
+ $search = $req["q"];
+ $country = $req["country"];
+ $nsfw = $req["nsfw"];
+ $offset = $req["offset"];
+ $spellcheck = $req["spellcheck"];
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://search.brave.com/news",
+ [
+ "q" => $search,
+ "offset" => $offset,
+ "spellcheck" => $spellcheck
+ ],
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch search page");
+ }
+
+ }else{
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ if(strlen($search) > 2048){
+
+ throw new Exception("Search term is too long!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $nsfw = $get["nsfw"];
+ $country = $get["country"];
+ $spellcheck = $get["spellcheck"] == "yes" ? "1" : "0";
+
+ /*
+ $handle = fopen("scraper/brave-news.html", "r");
+ $html = fread($handle, filesize("scraper/brave-news.html"));
+ fclose($handle);*/
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://search.brave.com/news",
+ [
+ "q" => $search,
+ "spellcheck" => $spellcheck
+ ],
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch search page");
+ }
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "news" => []
+ ];
+
+ // load html
+ $this->fuckhtml->load($html);
+
+ // get npt
+ $out["npt"] =
+ $this->generatenextpagetoken(
+ $search,
+ $nsfw,
+ $country,
+ $spellcheck,
+ "news",
+ $proxy
+ );
+
+ $this->fuckhtml->load($html);
+ $json = $this->get_js();
+
+ foreach(
+ $json[1]["data"]["body"]["response"]["news"]["results"]
+ as $news
+ ){
+
+ if(
+ !isset($news["thumbnail"]["src"]) ||
+ $news["thumbnail"]["src"] == "void 0"
+ ){
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }else{
+
+ $thumb = [
+ "url" => $this->unshiturl($news["thumbnail"]["src"]),
+ "ratio" => "16:9"
+ ];
+ }
+
+ $out["news"][] = [
+ "title" => $news["title"],
+ "author" => null,
+ "description" => $news["description"],
+ "date" => !isset($news["age"]) || $news["age"] == "void 0" || $news["age"] == "null" ? null : strtotime($news["age"]),
+ "thumb" => $thumb,
+ "url" => $news["url"]
+ ];
+ }
+
+ return $out;
+ }
+
+ public function image($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ if(strlen($search) > 2048){
+
+ throw new Exception("Search term is too long!");
+ }
+
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $spellcheck = $get["spellcheck"] == "yes" ? "1" : "0";
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ try{
+ $html =
+ $this->get(
+ $this->backend->get_ip(), // no nextpage right now, pass proxy directly
+ "https://search.brave.com/images",
+ [
+ "q" => $search,
+ "spellcheck" => $spellcheck
+ ],
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch search page");
+ }
+ /*
+ $handle = fopen("scraper/brave-image.html", "r");
+ $html = fread($handle, filesize("scraper/brave-image.html"));
+ fclose($handle);*/
+
+ $this->fuckhtml->load($html);
+ $json = $this->get_js();
+
+ foreach(
+ $json[1]
+ ["data"]
+ ["body"]
+ ["response"]
+ ["results"]
+ as $result
+ ){
+
+ $out["image"][] = [
+ "title" => $result["title"],
+ "source" => [
+ [
+ "url" => $result["properties"]["url"],
+ "width" => null,
+ "height" => null
+ ],
+ [
+ "url" => $result["thumbnail"]["src"],
+ "width" => null,
+ "height" => null
+ ]
+ ],
+ "url" => $result["url"]
+ ];
+ }
+
+ return $out;
+ }
+
+ public function video($get){
+
+ if($get["npt"]){
+
+ [$npt, $proxy] = $this->backend->get($get["npt"], "videos");
+
+ $npt = json_decode($npt, true);
+ $search = $npt["q"];
+ $offset = $npt["offset"];
+ $spellcheck = $npt["spellcheck"];
+ $country = $npt["country"];
+ $nsfw = $npt["nsfw"];
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://search.brave.com/videos",
+ [
+ "q" => $search,
+ "offset" => $offset,
+ "spellcheck" => $spellcheck
+ ],
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch search page");
+ }
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ if(strlen($search) > 2048){
+
+ throw new Exception("Search term is too long!");
+ }
+
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $spellcheck = $get["spellcheck"] == "yes" ? "1" : "0";
+
+ $proxy = $this->backend->get_ip();
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://search.brave.com/videos",
+ [
+ "q" => $search,
+ "spellcheck" => $spellcheck
+ ],
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch search page");
+ }
+ }
+
+ $this->fuckhtml->load($html);
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ // get npt
+ $out["npt"] =
+ $this->generatenextpagetoken(
+ $search,
+ $nsfw,
+ $country,
+ $spellcheck,
+ "videos",
+ $proxy
+ );
+
+ /*
+ $handle = fopen("scraper/brave-video.html", "r");
+ $html = fread($handle, filesize("scraper/brave-video.html"));
+ fclose($handle);*/
+
+ $this->fuckhtml->load($html);
+ $json = $this->get_js();
+
+ foreach(
+ $json
+ [1]
+ ["data"]
+ ["body"]
+ ["response"]
+ ["results"]
+ as $result
+ ){
+
+ if($result["video"]["author"] != "null"){
+
+ $author = [
+ "name" => $result["video"]["author"]["name"] == "null" ? null : $result["video"]["author"]["name"],
+ "url" => $result["video"]["author"]["url"] == "null" ? null : $result["video"]["author"]["url"],
+ "avatar" => $result["video"]["author"]["img"] == "null" ? null : $result["video"]["author"]["img"]
+ ];
+ }else{
+
+ $author = [
+ "name" => null,
+ "url" => null,
+ "avatar" => null
+ ];
+ }
+
+ if($result["thumbnail"] != "null"){
+
+ $thumb = [
+ "url" => $result["thumbnail"]["original"],
+ "ratio" => "16:9"
+ ];
+ }else{
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ $out["video"][] = [
+ "title" => $result["title"],
+ "description" => $result["description"] == "null" ? null : $this->titledots($result["description"]),
+ "author" => $author,
+ "date" => ($result["age"] == "null" || $result["age"] == "void 0") ? null : strtotime($result["age"]),
+ "duration" => $result["video"]["duration"] == "null" ? null : $this->hms2int($result["video"]["duration"]),
+ "views" => $result["video"]["views"] == "null" ? null : (int)$result["video"]["views"],
+ "thumb" => $thumb,
+ "url" => $result["url"]
+ ];
+ }
+
+ return $out;
+ }
+
+ private function stackoverflow_parse($html){
+
+ $i = 0;
+ $answer = [];
+
+ $this->fuckhtml->load($html);
+
+ foreach(
+ $this->fuckhtml->getElementsByTagName("*")
+ as $snippet
+ ){
+
+ switch($snippet["tagName"]){
+
+ case "p":
+ $this->fuckhtml->load($snippet["innerHTML"]);
+
+ $codetags =
+ $this->fuckhtml
+ ->getElementsByTagName("*");
+
+ $tmphtml = $snippet["innerHTML"];
+
+ foreach($codetags as $tag){
+
+ if(!isset($tag["outerHTML"])){
+
+ continue;
+ }
+
+ $tmphtml =
+ explode(
+ $tag["outerHTML"],
+ $tmphtml,
+ 2
+ );
+
+ $value = $this->fuckhtml->getTextContent($tmphtml[0], false, false);
+ $this->appendtext($value, $answer, $i);
+
+ $type = null;
+ switch($tag["tagName"]){
+
+ case "code": $type = "inline_code"; break;
+ case "em": $type = "italic"; break;
+ case "blockquote": $type = "quote"; break;
+ default: $type = "text";
+ }
+
+ if($type !== null){
+ $value = $this->fuckhtml->getTextContent($tag, false, true);
+
+ if(trim($value) != ""){
+
+ if(
+ $i !== 0 &&
+ $type == "title"
+ ){
+
+ $answer[$i - 1]["value"] = rtrim($answer[$i - 1]["value"]);
+ }
+
+ $answer[] = [
+ "type" => $type,
+ "value" => $value
+ ];
+ $i++;
+ }
+ }
+
+ if(count($tmphtml) === 2){
+
+ $tmphtml = $tmphtml[1];
+ }else{
+
+ break;
+ }
+ }
+
+ if(is_array($tmphtml)){
+
+ $tmphtml = $tmphtml[0];
+ }
+
+ if(strlen($tmphtml) !== 0){
+
+ $value = $this->fuckhtml->getTextContent($tmphtml, false, false);
+ $this->appendtext($value, $answer, $i);
+ }
+ break;
+
+ case "img":
+ $answer[] = [
+ "type" => "image",
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $tag["attributes"]["src"]
+ )
+ ];
+ $i++;
+ break;
+
+ case "pre":
+
+ switch($answer[$i - 1]["type"]){
+
+ case "text":
+ case "italic":
+ $answer[$i - 1]["value"] = rtrim($answer[$i - 1]["value"]);
+ break;
+ }
+
+ $answer[] =
+ [
+ "type" => "code",
+ "value" =>
+ rtrim(
+ $this->fuckhtml
+ ->getTextContent(
+ $snippet,
+ true,
+ false
+ )
+ )
+ ];
+ $i++;
+
+ break;
+
+ case "ol":
+ $o = 0;
+
+ $this->fuckhtml->load($snippet);
+ $li =
+ $this->fuckhtml
+ ->getElementsByTagName("li");
+
+ foreach($li as $elem){
+ $o++;
+
+ $this->appendtext(
+ $o . ". " .
+ $this->fuckhtml
+ ->getTextContent(
+ $elem
+ ),
+ $answer,
+ $i
+ );
+ }
+ break;
+ }
+ }
+
+ if(
+ $i !== 0 &&
+ $answer[$i - 1]["type"] == "text"
+ ){
+
+ $answer[$i - 1]["value"] = rtrim($answer[$i - 1]["value"]);
+ }
+
+ return $answer;
+ }
+
+ private function hms2int($time){
+
+ $parts = explode(":", $time, 3);
+ $time = 0;
+
+ if(count($parts) === 3){
+
+ // hours
+ $time = $time + ((int)$parts[0] * 3600);
+ array_shift($parts);
+ }
+
+ if(count($parts) === 2){
+
+ // minutes
+ $time = $time + ((int)$parts[0] * 60);
+ array_shift($parts);
+ }
+
+ // seconds
+ $time = $time + (int)$parts[0];
+
+ return $time;
+ }
+
+ private function appendtext($payload, &$text, &$index){
+
+ if(trim($payload) == ""){
+
+ return;
+ }
+
+ if(
+ $index !== 0 &&
+ $text[$index - 1]["type"] == "text"
+ ){
+
+ $text[$index - 1]["value"] .= "\n\n" . preg_replace('/ $/', " ", $payload);
+ }else{
+
+ $text[] = [
+ "type" => "text",
+ "value" => preg_replace('/ $/', " ", $payload)
+ ];
+ $index++;
+ }
+ }
+
+ private function tablesublink($html_collection, &$data){
+
+ foreach($html_collection as $html){
+
+ $html["innerHTML"] = preg_replace(
+ '/<style>[\S\s]*<\/style>/i',
+ "",
+ $html["innerHTML"]
+ );
+
+ $html =
+ explode(
+ ":",
+ $this->fuckhtml->getTextContent($html),
+ 2
+ );
+
+ if(count($html) === 1){
+
+ $html = ["Rating", $html[0]];
+ }
+
+ $data["table"][trim($html[0])] = trim($html[1]);
+ }
+ }
+ /*
+ private function getimagelinkfromstyle($thumb){
+
+ $thumb =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $thumb,
+ "div"
+ );
+
+ if(count($thumb) === 0){
+
+ return [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ $thumb = $thumb[0]["attributes"]["style"];
+
+ preg_match(
+ '/background-image: ?url\((\'[^\']+\'|"[^"]+"|[^\)]+)\)/',
+ $thumb,
+ $thumb
+ );
+
+ $url = $this->fuckhtml->getTextContent($this->unshiturl(trim($thumb[1], '"\' ')));
+
+ if(parse_url($url, PHP_URL_HOST) == "cdn.search.brave.com"){
+
+ return [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ return [
+ "url" => $url,
+ "ratio" => "16:9"
+ ];
+ }*/
+
+ private function limitstrlen($text){
+
+ return explode("\n", wordwrap($text, 300, "\n"))[0];
+ }
+ /*
+ private function limitwhitespace($text){
+
+ return
+ preg_replace(
+ '/[\s]+/',
+ " ",
+ $text
+ );
+ }*/
+
+ private function titledots($title){
+
+ $substr = substr($title, -3);
+
+ if(
+ $substr == "..." ||
+ $substr == "…"
+ ){
+
+ return trim(substr($title, 0, -3));
+ }
+
+ return trim($title);
+ }
+
+ private function generatenextpagetoken($q, $nsfw, $country, $spellcheck, $page, $proxy){
+
+ $nextpage =
+ $this->fuckhtml
+ ->getElementById(
+ "pagination",
+ "div"
+ );
+
+ if($nextpage){
+
+ $this->fuckhtml->load($nextpage);
+
+ $nextpage =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "button",
+ "a"
+ );
+
+ if(count($nextpage) !== 0){
+
+ $nextpage =
+ $nextpage[count($nextpage) - 1];
+
+ if(
+ strtolower(
+ $this->fuckhtml
+ ->getTextContent(
+ $nextpage
+ )
+ ) == "next"
+ ){
+
+ preg_match(
+ '/offset=([0-9]+)/',
+ $this->fuckhtml->getTextContent($nextpage["attributes"]["href"]),
+ $nextpage
+ );
+
+ return
+ $this->backend->store(
+ json_encode(
+ [
+ "q" => $q,
+ "offset" => (int)$nextpage[1],
+ "nsfw" => $nsfw,
+ "country" => $country,
+ "spellcheck" => $spellcheck
+ ]
+ ),
+ $page,
+ $proxy
+ );
+ }
+ }
+ }
+
+ return null;
+ }
+
+ private function unshiturl($url){
+
+ // https://imgs.search.brave.com/XFnbR8Sl7ge82MBDEH7ju0UHImRovMVmQ2qnDvgNTuA/rs:fit:844:225:1/g:ce/aHR0cHM6Ly90c2U0/Lm1tLmJpbmcubmV0/L3RoP2lkPU9JUC54/UWotQXU5N2ozVndT/RDJnNG9BNVhnSGFF/SyZwaWQ9QXBp.jpeg
+
+ $tmp = explode("aHR0", $url);
+
+ if(count($tmp) !== 2){
+
+ // nothing to do
+ return $url;
+ }
+
+ return
+ base64_decode(
+ "aHR0" .
+ str_replace(["/", "_"], ["", "/"],
+ explode(
+ ".",
+ $tmp[1]
+ )[0]
+ )
+ );
+ }
+}
diff --git a/scraper/cara.php b/scraper/cara.php
new file mode 100644
index 0000000..ed3d0b5
--- /dev/null
+++ b/scraper/cara.php
@@ -0,0 +1,847 @@
+<?php
+
+class cara{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("cara");
+ }
+
+ public function getfilters($page){
+
+ return [
+ "sort" => [
+ "display" => "Sort by",
+ "option" => [
+ "Top" => "Top",
+ "MostRecent" => "Most Recent"
+ ]
+ ],
+ "type" => [
+ "display" => "Post type",
+ "option" => [
+ "any" => "Any type",
+ "portfolio" => "Portfolio", // {"posts":["portfolio"]}
+ "timeline" => "Timeline" // {"posts":["timeline"]}
+ ]
+ ],
+ "fields" => [
+ "display" => "Field/Medium",
+ "option" => [
+ "any" => "Any field",
+ "2D" => "2D Work",
+ "3D" => "3D Work",
+ "3DPrinting" => "3D Printing",
+ "Acrylic" => "Acrylic",
+ "AlcoholMarkers" => "Alcohol Markers",
+ "Animation" => "Animation",
+ "Chalk" => "Chalk",
+ "Charcoal" => "Charcoal",
+ "Colored pencil" => "Colored pencil",
+ "Conte" => "Conte",
+ "Crayon" => "Crayon",
+ "Digital" => "Digital",
+ "Gouache" => "Gouache",
+ "Ink" => "Ink",
+ "MixedMedia" => "Mixed-Media",
+ "Oil" => "Oil",
+ "Oil-based Markers" => "Oil-based Markers",
+ "Other" => "Other",
+ "Pastels" => "Pastels",
+ "Photography" => "Photography",
+ "Sculpture" => "Sculpture",
+ "Sketches" => "Sketches",
+ "Tattoos" => "Tattoos",
+ "Traditional" => "Traditional",
+ "VFX" => "VFX",
+ "Watercolor" => "Watercolor"
+ ]
+ ],
+ "category" => [
+ "display" => "Category",
+ "option" => [
+ "any" => "Any category",
+ "3DScanning" => "3D Scanning",
+ "Abstract" => "Abstract",
+ "Adoptable" => "Adoptable",
+ "Anatomy" => "Anatomy",
+ "Animals" => "Animals",
+ "Anime" => "Anime",
+ "App" => "App",
+ "ArchitecturalConcepts" => "Architectural Concepts",
+ "ArchitecturalVisualization" => "Architectural Visualization",
+ "AugmentedReality" => "Augmented Reality",
+ "Automotive" => "Automotive",
+ "BoardGameArt" => "Board Game Art",
+ "BookIllustration" => "Book Illustration",
+ "CardGameArt" => "Card Game Art",
+ "CeramicsPottery" => "Ceramics/Pottery",
+ "CharacterAnimation" => "Character Animation",
+ "CharacterDesign" => "Character Design",
+ "CharacterModeling" => "Character Modeling",
+ "ChildrensArt" => "Children's Illustration",
+ "Collectibles" => "Collectibles",
+ "ColoringPage" => "Coloring Page",
+ "ComicArt" => "Comic Art",
+ "ConceptArt" => "Concept Art",
+ "Cosplay" => "Cosplay",
+ "CostumeDesign" => "Costume Design",
+ "CoverArt" => "Cover Art",
+ "Creatures" => "Creatures",
+ "Diorama" => "Diorama",
+ "EditorialIllustration" => "Editorial Illustration",
+ "EmbroiderySewing" => "Embroidery/Sewing",
+ "EnvironmentalConceptArt" => "Environmental Concept Art",
+ "EnvironmentalConceptDesign" => "Environmental Concept Design",
+ "FanArt" => "Fan Art",
+ "Fantasy" => "Fantasy",
+ "Fashion" => "Fashion",
+ "FashionStyling" => "Fashion Styling",
+ "FiberArts" => "Fiber Arts",
+ "Furry" => "Furry",
+ "GameArt" => "Game Art",
+ "GameplayDesign" => "Gameplay Design",
+ "GamesEnvironmentArt" => "Games Environment Art",
+ "Gem" => "Gem",
+ "GraphicDesign" => "Graphic Design",
+ "Handicraft" => "Handicraft",
+ "HairStyling" => "Hair Styling",
+ "HardSurface" => "Hard Surface",
+ "Horror" => "Horror",
+ "Illustration" => "Illustration",
+ "IllustrationVisualization" => "Illustration Visualization",
+ "IndustrialDesign" => "Industrial Design",
+ "Jewelry" => "Jewelry",
+ "KnittingCrochet" => "Knitting/Crochet",
+ "Landscape" => "Landscape",
+ "LevelDesign" => "Level Design",
+ "Lighting" => "Lighting",
+ "Makeup" => "Makeup",
+ "Manga" => "Manga",
+ "MapsCartography" => "Maps/Cartography",
+ "MattePainting" => "Matte Painting",
+ "Materials" => "Materials",
+ "MechanicalDesign" => "Mechanical Design",
+ "Medical" => "Medical",
+ "Mecha" => "Mecha",
+ "MiniatureArt" => "Miniature Art",
+ "MotionGraphics" => "Motion Graphics",
+ "FrescoMurals" => "Fresco/Murals",
+ "Natural" => "Natural",
+ "Original Character" => "Original Character",
+ "Overlay" => "Overlay",
+ "PleinAir" => "Plein Air",
+ "Photogrammetry" => "Photogrammetry",
+ "PixelArt" => "Pixel Art",
+ "Portraits" => "Portraits",
+ "Props" => "Props",
+ "ProductDesign" => "Product Design",
+ "PublicDomain" => "Public Domain or Royalty Free",
+ "Real-Time3DEnvironmentArt" => "Real-Time 3D Environment Art",
+ "Realism" => "Realism",
+ "ScienceFiction" => "Science Fiction",
+ "ScientificVisualization" => "Scientific Visualization",
+ "Scripts" => "Scripts",
+ "StillLife" => "Still Life",
+ "Storyboards" => "Storyboards",
+ "Stylized" => "Stylized",
+ "Surreal" => "Surreal",
+ "TechnicalArt" => "Technical Art",
+ "Textures" => "Textures",
+ "Tools" => "Tools",
+ "Toys" => "Toys",
+ "ToyPackaging" => "Toy Packaging",
+ "Tutorials" => "Tutorials",
+ "UIArt" => "User Interface (UI) Art",
+ "UrbanSketch" => "Urban Sketch",
+ "VFXforAnimation" => "VFX for Animation",
+ "VFXforFilm" => "VFX for Film",
+ "VFXforGames" => "VFX for Games",
+ "VFXforRealTime" => "VFX for Real-Time",
+ "VFXforTV" => "VFX for TV",
+ "Vehicles" => "Vehicles",
+ "VirtualReality" => "Virtual Reality",
+ "VisualDevelopment" => "Visual Development",
+ "VoxelArt" => "Voxel Art",
+ "Vtubers" => "Vtubers",
+ "WIP" => "WIP (Work in Progress)",
+ "Web" => "Web",
+ "Weapons" => "Weapons",
+ "Wildlife" => "Wildlife",
+ "Woodcutting" => "Woodcutting"
+ ]
+ ],
+ "software" => [
+ "display" => "Software",
+ "option" => [
+ "any" => "Any software",
+ "123D" => "123D",
+ "123DCatch" => "123D Catch",
+ "3DBee" => "3DBee",
+ "3DCoat" => "3DCoat",
+ "3DCoatPrint" => "3DCoatPrint",
+ "3DCoatTextura" => "3DCoatTextura",
+ "3DEqualizer" => "3DEqualizer",
+ "3DFZephyr" => "3DF Zephyr",
+ "3Delight" => "3Delight",
+ "3dpeople" => "3dpeople",
+ "3dsMax" => "3ds Max",
+ "3DSPaint" => "3DS Paint",
+ "ACDSeeCanvas" => "ACDSee Canvas",
+ "AbletonLive" => "Ableton Live",
+ "Acrobat" => "Acrobat",
+ "AdobeDraw" => "Adobe Draw",
+ "AdobeFlash" => "Adobe Flash",
+ "AdobeFresco" => "Adobe Fresco",
+ "AdobeSubstance3Dassets" => "Adobe Substance 3D assets",
+ "AdobeXD" => "Adobe XD",
+ "AffinityDesigner" => "Affinity Designer",
+ "AffinityPhoto" => "Affinity Photo",
+ "AfterEffects" => "After Effects",
+ "Akeytsu" => "Akeytsu",
+ "Alchemy" => "Alchemy",
+ "AliasDesign" => "Alias Design",
+ "AlightMotion" => "Alight Motion",
+ "Amadine" => "Amadine",
+ "Amberlight" => "Amberlight",
+ "Animate" => "Animate",
+ "AnimationMaster" => "Animation:Master",
+ "AnimeStudio" => "Anime Studio",
+ "Apophysis" => "Apophysis",
+ "ArchiCAD" => "ArchiCAD",
+ "Arion" => "Arion",
+ "ArionFX" => "ArionFX",
+ "Arnold" => "Arnold",
+ "ArtEngine" => "ArtEngine",
+ "ArtFlow" => "ArtFlow",
+ "ArtRage" => "ArtRage",
+ "ArtstudioPro" => "Artstudio Pro",
+ "Artweaver" => "Artweaver",
+ "Aseprite" => "Aseprite",
+ "Audition" => "Audition",
+ "AutoCAD" => "AutoCAD",
+ "AutodeskSketchBook" => "Autodesk SketchBook",
+ "AvidMediaComposer" => "Avid Media Composer",
+ "AzPainter" => "AzPainter",
+ "babylonjs" => "babylon.js",
+ "BalsamiqMockup" => "Balsamiq Mockup",
+ "Bforartists" => "Bforartists",
+ "BlackInk" => "Black Ink",
+ "BlackmagicDesignFusion" => "Blackmagic Design Fusion",
+ "Blender" => "Blender",
+ "Blender DeepPaint" => "Blender DeepPaint",
+ "BlenderGreasePencil" => "Blender Grease Pencil",
+ "Blockbench" => "Blockbench",
+ "BodyPaint" => "BodyPaint",
+ "Boxcutter" => "Boxcutter",
+ "BraidMaker" => "Braid Maker",
+ "BrickLinkStudio" => "BrickLink Studio",
+ "Bridge" => "Bridge",
+ "Brushifyio" => "Brushify.io",
+ "C" => "C",
+ "C#" => "C#",
+ "C++" => "C++",
+ "CACANi" => "CACANi",
+ "CLIPSTUDIOPAINT" => "CLIP STUDIO PAINT",
+ "CLO" => "CLO",
+ "CRYENGINE" => "CRYENGINE",
+ "Callipeg" => "Callipeg",
+ "Canva" => "Canva",
+ "CaptureOne" => "Capture One",
+ "CartoonAnimator" => "Cartoon Animator",
+ "Carveco" => "Carveco",
+ "Cavalry" => "Cavalry",
+ "Chaotica" => "Chaotica",
+ "CharacterAnimator" => "Character Animator",
+ "CharacterCreator" => "Character Creator",
+ "Cinema4D" => "Cinema 4D",
+ "ClarisseiFX" => "Clarisse iFX",
+ "Coiffure" => "Coiffure",
+ "ColorsLive" => "Colors Live",
+ "Combustion" => "Combustion",
+ "Construct2" => "Construct 2",
+ "Core" => "Core",
+ "CorelPainter" => "Corel Painter",
+ "CorelDRAWGraphicsSuite" => "CorelDRAW Graphics Suite",
+ "CoronaRenderer" => "Corona Renderer",
+ "ProMotionNG" => "Cosmigo Pro Motion NG",
+ "CrazyBump" => "CrazyBump",
+ "Crocotile3D" => "Crocotile 3D",
+ "Curvy3D" => "Curvy 3D",
+ "Cycles4D" => "Cycles 4D",
+ "Darkroom" => "Darkroom",
+ "DAZStudio" => "DAZ Studio",
+ "DDO" => "DDO",
+ "DECIMA" => "DECIMA",
+ "Darktable" => "Darktable",
+ "DaVinciResolve" => "DaVinci Resolve",
+ "Dimension" => "Dimension",
+ "DragonBones" => "DragonBones",
+ "Dragonframe" => "Dragonframe",
+ "Drawpile" => "Drawpile",
+ "Dreams" => "Dreams",
+ "Dreamweaver" => "Dreamweaver",
+ "DxOPhotoLab" => "DxO PhotoLab",
+ "ECycles" => "E-Cycles",
+ "EmberGen" => "EmberGen",
+ "Encore" => "Encore",
+ "Expresii" => "Expresii",
+ "FStorm" => "FStorm",
+ "FadeIn" => "FadeIn",
+ "Feather3D" => "Feather 3D",
+ "FiberShop" => "FiberShop",
+ "Figma" => "Figma",
+ "FilmoraWondershare" => "Filmora Wondershare",
+ "FilterForge" => "Filter Forge",
+ "FinalCutPro" => "Final Cut Pro",
+ "FinalDraft" => "Final Draft",
+ "finalRender" => "finalRender",
+ "FireAlpaca" => "FireAlpaca",
+ "Fireworks" => "Fireworks",
+ "FlamePainter" => "Flame Painter",
+ "Flash" => "Flash",
+ "FlipaClip" => "FlipaClip",
+ "FlipnoteStudio" => "Flipnote Studio",
+ "Fluent" => "Fluent",
+ "ForestPack" => "Forest Pack",
+ "FormZ" => "Form-Z",
+ "Fractorium" => "Fractorium",
+ "FreeCAD" => "FreeCAD",
+ "FreeHand" => "FreeHand",
+ "Forger" => "Forger",
+ "FrostbiteEngine" => "Frostbite Engine",
+ "fSpy" => "fSpy",
+ "FumeFX" => "FumeFX",
+ "Fusion360" => "Fusion 360",
+ "GIMP" => "GIMP",
+ "GSCurveTools" => "GS CurveTools",
+ "GSToolbox" => "GS Toolbox",
+ "Gaea" => "Gaea",
+ "GameTextures" => "Game Textures",
+ "GameMakerStudio" => "GameMaker: Studio",
+ "GarageFarmNET" => "GarageFarm.NET",
+ "GeoGlyph" => "GeoGlyph",
+ "GigapixelAl" => "Gigapixel Al",
+ "Glaxnimate" => "Glaxnimate",
+ "GnomePaint" => "Gnome Paint",
+ "Godot" => "Godot",
+ "Goxel" => "Goxel",
+ "Graphite" => "Graphite",
+ "Graswald" => "Graswald",
+ "GravitySketch" => "Gravity Sketch",
+ "GuerillaRender" => "GuerillaRender",
+ "HDRLightStudio" => "HDR Light Studio",
+ "HairStrandDesigner" => "Hair Strand Designer",
+ "HairTGHairFur" => "HairTG - Hair &amp; Fur",
+ "HairTGSurfaceFeatherEdition" => "HairTG - Surface, Feather Edition",
+ "HairTGSurfaceHairEdition" => "HairTG - Surface, Hair Edition",
+ "Handplane" => "Handplane",
+ "Hansoft" => "Hansoft",
+ "HardOps" => "Hard Ops",
+ "HardMesh" => "HardMesh",
+ "Harmony" => "Harmony",
+ "HeavypaintWebbypaint" => "Heavypaint/Webbypaint",
+ "HelloPaint" => "HelloPaint",
+ "HeliconFocus" => "Helicon Focus",
+ "Hexels" => "Hexels",
+ "HiPaint" => "HiPaint",
+ "Houdini" => "Houdini",
+ "HydraRenderer" => "Hydra Renderer",
+ "iArtbook" => "iArtbook",
+ "IbisPaint" => "ibisPaint",
+ "Ideas" => "Ideas",
+ "IllustStudio" => "Illust Studio",
+ "Illustrator" => "Illustrator",
+ "IllustratorDraw" => "Illustrator Draw",
+ "InDesign" => "InDesign",
+ "Inochi2D" => "Inochi2D",
+ "InVision" => "InVision",
+ "InVisionCraft" => "InVision Craft",
+ "InfinitePainter" => "Infinite Painter",
+ "Inkscape" => "Inkscape",
+ "Inspirit" => "Inspirit",
+ "InstaLOD" => "InstaLOD",
+ "InstaMAT" => "InstaMAT",
+ "InstantLightRealtimePBR" => "Instant Light Realtime PBR",
+ "InstantMeshes" => "Instant Meshes",
+ "InstantTerra" => "Instant Terra",
+ "Inventor" => "Inventor",
+ "Iray" => "Iray",
+ "JWildfire" => "JWildfire",
+ "Java" => "Java",
+ "Jira" => "Jira",
+ "JumpPaint" => "Jump Paint by MediBang",
+ "JSPaint" => "JS Paint",
+ "Katana" => "Katana",
+ "Keyshot" => "Keyshot",
+ "KidPix" => "Kid Pix",
+ "KitBash3D" => "KitBash3D",
+ "Knald" => "Knald",
+ "Kodon" => "Kodon",
+ "KolourPaint" => "KolourPaint",
+ "Krakatoa" => "Krakatoa",
+ "KRESKA" => "KRESKA",
+ "Krita" => "Krita",
+ "LensStudio" => "Lens Studio",
+ "LibreSprite" => "LibreSprite",
+ "LightWave3D" => "LightWave 3D",
+ "Lightroom" => "Lightroom",
+ "Linearity" => "Linearity",
+ "LiquiGen" => "LiquiGen",
+ "Live2DCubism" => "Live2D Cubism",
+ "LookatmyHair" => "Look at my Hair",
+ "Lotpixel" => "Lotpixel",
+ "Lumion" => "Lumion",
+ "LuxRender" => "LuxRender",
+ "MacPaint" => "MacPaint",
+ "MagicaCSG" => "MagicaCSG",
+ "MagicaVoxel" => "MagicaVoxel",
+ "Magma" => "Magma",
+ "MakeHuman" => "MakeHuman",
+ "Malmal" => "Malmal",
+ "Mandelbulb3D" => "Mandelbulb 3D",
+ "Mandelbulber" => "Mandelbulber",
+ "MangaStudio" => "Manga Studio",
+ "Mari" => "Mari",
+ "MarmosetToolbag" => "Marmoset Toolbag",
+ "MarvelousDesigner" => "Marvelous Designer",
+ "MasterpieceStudioPro" => "Masterpiece Studio Pro",
+ "MasterpieceVR" => "MasterpieceVR",
+ "Maverick" => "Maverick",
+ "MaxwellRender" => "Maxwell Render",
+ "Maya" => "Maya",
+ "MediBangPaint" => "MediBang Paint",
+ "MediumbyAdobe" => "Medium by Adobe",
+ "Megascans" => "Megascans",
+ "mentalray" => "mental ray",
+ "MeshLab" => "MeshLab",
+ "Meshroom" => "Meshroom",
+ "MetaHumanCreator" => "MetaHuman Creator",
+ "Metashape" => "Metashape",
+ "MightyBake" => "MightyBake",
+ "MikuMikuDance" => "MikuMikuDance",
+ "Minecraft" => "Minecraft",
+ "Mischief" => "Mischief",
+ "Mixamo" => "Mixamo",
+ "Mixer" => "Mixer",
+ "MoI3D" => "MoI3D",
+ "Mocha" => "Mocha",
+ "Modo" => "Modo",
+ "Moho" => "Moho",
+ "MotionBuilder" => "MotionBuilder",
+ "Mudbox" => "Mudbox",
+ "Muse" => "Muse",
+ "MSPaint" => "MS Paint",
+ "MyPaint" => "MyPaint",
+ "NDO" => "NDO",
+ "NX" => "NX",
+ "NdotCAD" => "NdotCAD",
+ "NintendoNotes" => "Nintendo Notes",
+ "NomadSculpt" => "Nomad Sculpt",
+ "Notability" => "Notability",
+ "Nuke" => "Nuke",
+ "Nvil" => "Nvil",
+ "OctaneRender" => "Octane Render",
+ "Omniverse" => "Omniverse",
+ "OmniverseCreate" => "Omniverse Create",
+ "ON1PhotoRAW" => "ON1 Photo RAW",
+ "Open3DEngine" => "Open 3D Engine",
+ "OpenCanvas" => "OpenCanvas",
+ "OpenGL" => "OpenGL",
+ "OpenToonz" => "OpenToonz",
+ "Ornatrix" => "Ornatrix",
+ "OsciRender" => "Osci-Render",
+ "OurPaint" => "Our Paint",
+ "PBRMAX" => "PBRMAX",
+ "PFTrack" => "PFTrack",
+ "PTGui" => "PTGui",
+ "Paintbrush" => "Paintbrush",
+ "PaintNET" => "Paint.NET",
+ "PaintShopPro" => "PaintShop Pro",
+ "PaintToolSAI" => "Paint Tool SAI",
+ "PaintstormStudio" => "Paintstorm Studio",
+ "Paper" => "Paper",
+ "Pencil2D" => "Pencil2D",
+ "Penpot" => "Penpot",
+ "PhoenixFD" => "Phoenix FD",
+ "Phonto" => "Phonto",
+ "PhotoLab2" => "PhotoLab 2",
+ "Photopea" => "Photopea",
+ "Photoscan" => "Photoscan",
+ "Photoshop" => "Photoshop",
+ "PhotoshopElements" => "Photoshop Elements",
+ "PicoCAD" => "picoCAD",
+ "PicoCAD2" => "picoCAD 2",
+ "Pinta" => "Pinta",
+ "Piskel" => "Piskel",
+ "Pixilart" => "Pixilart",
+ "Pixelitor" => "Pixelitor",
+ "Pixelmator" => "Pixelmator",
+ "Pixelorama" => "Pixelorama",
+ "PixivSketch" => "pixiv Sketch",
+ "Pixquare" => "Pixquare",
+ "PlantCatalog" => "PlantCatalog",
+ "PlantFactory" => "PlantFactory",
+ "Plasticity" => "Plasticity",
+ "PNGtuberPlus" => "PNGtuber Plus",
+ "Poliigon" => "Poliigon",
+ "Polybrush" => "Polybrush",
+ "PopcornFx" => "PopcornFx",
+ "Poser" => "Poser",
+ "Premiere" => "Premiere",
+ "PremiereElements" => "Premiere Elements",
+ "PresagisCreator" => "Presagis Creator",
+ "ProTools" => "Pro Tools",
+ "Procreate" => "Procreate",
+ "ProcreateDreams" => "Procreate Dreams",
+ "Producer" => "Producer",
+ "PrometheanAI" => "Promethean AI",
+ "PureRef" => "PureRef",
+ "Python" => "Python",
+ "PyxelEdit" => "PyxelEdit",
+ "QuadRemesher" => "Quad Remesher",
+ "QuarkXPress" => "QuarkXPress",
+ "Qubicle" => "Qubicle",
+ "Quill" => "Quill",
+ "QuixelBridge" => "Quixel Bridge",
+ "QuixelMegascans" => "Quixel Megascans",
+ "QuixelMixer" => "Quixel Mixer",
+ "QuixelSuite" => "Quixel Suite",
+ "R3DSWrap" => "R3DS Wrap",
+ "R3DSZWRAP" => "R3DS ZWRAP",
+ "RDTextures" => "RD-Textures",
+ "RailClone" => "RailClone",
+ "RealFlow" => "RealFlow",
+ "RealisticPaintStudio" => "Realistic Paint Studio",
+ "RealityCapture" => "RealityCapture",
+ "RealityScan" => "RealityScan",
+ "RealtimeBoard" => "Realtime Board",
+ "Rebelle" => "Rebelle",
+ "Redshift" => "Redshift",
+ "RenderMan" => "RenderMan",
+ "RenderNetwork" => "Render Network",
+ "Revit" => "Revit",
+ "Rhino" => "Rhino",
+ "Rhinoceros" => "Rhinoceros",
+ "RizomUV" => "RizomUV",
+ "RoughAnimator" => "Rough Animator",
+ "SamsungNotes" => "Samsung Notes",
+ "SamsungPENUP" => "Samsung PENUP",
+ "ScansLibrary" => "ScansLibrary",
+ "Scrivener" => "Scrivener",
+ "Sculpt+" => "Sculpt+",
+ "Sculptris" => "Sculptris",
+ "ShaveandaHaircut" => "Shave and a Haircut",
+ "ShiVa3D" => "ShiVa3D",
+ "Shotgun" => "Shotgun",
+ "Silo" => "Silo",
+ "Silugen" => "Silugen",
+ "Sketch" => "Sketch",
+ "SketchApp" => "Sketch App",
+ "SketchBookPro" => "SketchBook Pro",
+ "SketchClub" => "SketchClub",
+ "SketchUp" => "SketchUp",
+ "Sketchable" => "Sketchable",
+ "Sketchfab" => "Sketchfab",
+ "Skyshop" => "Skyshop",
+ "Snapseed" => "Snapseed",
+ "Snowdrop" => "Snowdrop",
+ "Softimage" => "Softimage",
+ "SolidWorks" => "SolidWorks",
+ "SonySketch" => "Sony Sketch",
+ "Soundbooth" => "Soundbooth",
+ "Source2" => "Source 2",
+ "SourceControl" => "Source Control",
+ "SourceFilmmaker" => "Source Filmmaker",
+ "SpeedTree" => "SpeedTree",
+ "Speedgrade" => "Speedgrade",
+ "SpeedyPainter" => "SpeedyPainter",
+ "Spine2D" => "Spine 2D",
+ "Spriter" => "Spriter",
+ "Stingray" => "Stingray",
+ "Storyboarder" => "Storyboarder",
+ "StoryboardPro" => "Storyboard Pro",
+ "SublimeText" => "Sublime Text",
+ "Substance3DDesigner" => "Substance 3D Designer",
+ "Substance3DModeler" => "Substance 3D Modeler",
+ "Substance3DPainter" => "Substance 3D Painter",
+ "Substance3DSampler" => "Substance 3D Sampler",
+ "Substance3DStager" => "Substance 3D Stager",
+ "SubstanceB2M" => "Substance B2M",
+ "SweetHome3D" => "Sweet Home 3D",
+ "SynthEyes" => "SynthEyes",
+ "TTools" => "TTools",
+ "TVPaint" => "TVPaint",
+ "TVPaintAnimation" => "TVPaint Animation",
+ "TayasuiSketches" => "Tayasui Sketches",
+ "TayasuiSketchesMobileApp" => "Tayasui Sketches Mobile App",
+ "TayasuiSketchesPro" => "Tayasui Sketches Pro",
+ "Terragen" => "Terragen",
+ "Texturescom" => "Textures.com",
+ "Texturingxyz" => "Texturingxyz",
+ "TeyaConceptor" => "Teya Conceptor",
+ "TheGrove3D" => "The Grove 3D",
+ "TheaRender" => "Thea Render",
+ "Threejs" => "Three.js",
+ "Tiled" => "Tiled",
+ "TiltBrush" => "Tilt Brush",
+ "Tooll3" => "Tooll3",
+ "ToonBoomHarmony" => "Toon Boom Harmony",
+ "ToonBoomStudio" => "Toon Boom Studio",
+ "ToonSquid" => "ToonSquid",
+ "TopoGun" => "TopoGun",
+ "TuxPaint" => "Tux Paint",
+ "Tvori" => "Tvori",
+ "Twinmotion" => "Twinmotion",
+ "UNIGINEEngine" => "UNIGINE Engine",
+ "UVLayout" => "UVLayout",
+ "UltraFractal" => "Ultra Fractal",
+ "uMake" => "uMake",
+ "Unfold3D" => "Unfold 3D",
+ "Unity" => "Unity",
+ "UnrealEngine" => "Unreal Engine",
+ "Vengi" => "vengi",
+ "VRay" => "V-Ray",
+ "VRED" => "VRED",
+ "VTubeStudio" => "VTube Studio",
+ "Vectary" => "Vectary",
+ "VectorayGen" => "VectorayGen",
+ "Vectorworks" => "Vectorworks",
+ "VegasPro" => "Vegas Pro",
+ "VisualDesigner3D" => "Visual Designer 3D",
+ "VisualStudio" => "Visual Studio",
+ "VRoidStudio" => "VRoid Studio",
+ "Vue" => "Vue",
+ "Vuforia" => "Vuforia",
+ "WebGL" => "WebGL",
+ "WhiteboardFox" => "Whiteboard Fox",
+ "WickEditor" => "Wick Editor",
+ "Wings3D" => "Wings 3D",
+ "Word" => "Word",
+ "WorldCreator" => "World Creator",
+ "WorldMachine" => "World Machine",
+ "XParticles" => "X-Particles",
+ "Xfrog" => "Xfrog",
+ "Xgen" => "Xgen",
+ "xNormal" => "xNormal",
+ "xTex" => "xTex",
+ "XoliulShader" => "Xoliul Shader",
+ "Yafaray" => "Yafaray",
+ "Yeti" => "Yeti",
+ "ZBrush" => "ZBrush",
+ "ZBrushCore" => "ZBrushCore",
+ "ZenBrush" => "Zen Brush"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = [], $search){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: application/json, text/plain, */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ //"sentry-trace: 72b0318a7141fe18cbacbd905572eddf-a60de161b66b1e6f-1
+ //"baggage: sentry-environment=vercel-production,sentry-release=251ff5179b4de94974f36d9b8659a487bbb8a819,sentry-public_key=2b87af2b44c84643a011838ad097735f,sentry-trace_id=72b0318a7141fe18cbacbd905572eddf,sentry-transaction=GET%20%2Fsearch,sentry-sampled=true,sentry-sample_rand=0.09967130764937493,sentry-sample_rate=0.5",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ //"Referer: https://cara.app/search?q=jak+and+daxter&type=&sortBy=Top&filters=%7B%7D",
+ "Referer: https://cara.app/search?q=" . urlencode($search),
+ //"Cookie: __Host-next-auth.csrf-token=b752c4296375bccb7b480ff010e1e916c65c35c311a4a57ac6cd871468730578%7C4d3783cfb72a98f390e534abd149806432b6cf8d50555a52d00e99216a516911; __Secure-next-auth.callback-url=https%3A%2F%2Fcara.app; crumb=BV0HDt87G5+fOWE0ZDQ5MWM0ZTQ3YTZmMzM4MGU5MGNjNDNmMzY2",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "TE: trailers"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$npt, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "images"
+ );
+
+ $npt = json_decode($npt, true);
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ $npt = [
+ "q" => $get["s"],
+ "sortBy" => $get["sort"],
+ "take" => 24,
+ "skip" => 0,
+ "filters" => []
+ ];
+
+ // parse filters
+ if($get["type"] != "any"){
+
+ $npt["filters"]["posts"] = [$get["type"]];
+ }
+
+ if($get["fields"] != "any"){
+
+ $npt["filters"]["fields"] = [$get["fields"]];
+ }
+
+ if($get["category"] != "any"){
+
+ $npt["filters"]["categories"] = [$get["category"]];
+ }
+
+ if($get["software"] != "any"){
+
+ $npt["filters"]["softwares"] = [$get["software"]];
+ }
+
+ if($npt["filters"] == []){
+
+ $npt["filters"] = "{}";
+ }else{
+
+ $npt["filters"] = json_encode($npt["filters"]);
+ }
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ // https://cara.app/api/search/portfolio-posts?q=jak+and+daxter&sortBy=Top&take=24&skip=0&filters=%7B%7D
+ try{
+ $json =
+ $this->get(
+ $proxy,
+ "https://cara.app/api/search/posts",
+ $npt,
+ $npt["q"]
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $imagecount = 0;
+ foreach($json as $image){
+
+ if(count($image["images"]) === 0){
+
+ // sometimes the api returns no images for an object
+ $imagecount++;
+ continue;
+ }
+
+ $cover = null;
+ $sources = [];
+
+ foreach($image["images"] as $source){
+
+ if($source["isCoverImg"]){
+
+ $cover = [
+ "url" => "https://images.cara.app/" . $this->fix_url($source["src"]),
+ "width" => 500,
+ "height" => 500
+ ];
+ }else{
+
+ $sources[] = [
+ "url" => "https://images.cara.app/" . $this->fix_url($source["src"]),
+ "width" => null,
+ "height" => null
+ ];
+ }
+ }
+
+ if($cover !== null){
+
+ $sources[] = $cover;
+ }
+
+ $out["image"][] = [
+ "title" => str_replace("\n", " ", $image["content"]),
+ "source" => $sources,
+ "url" => "https://cara.app/post/" . $image["id"]
+ ];
+
+ $imagecount++;
+ }
+
+ if($imagecount === 24){
+
+ $npt["skip"] += 24;
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($npt),
+ "images",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+ private function fix_url($url){
+
+ return
+ str_replace(
+ [" "],
+ ["%20"],
+ $url
+ );
+ }
+}
diff --git a/scraper/coccoc.php b/scraper/coccoc.php
new file mode 100644
index 0000000..fd09556
--- /dev/null
+++ b/scraper/coccoc.php
@@ -0,0 +1,672 @@
+<?php
+
+class coccoc{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("coccoc");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+
+ private function get($proxy, $url, $get = []){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ // http2 bypass
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, [
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ //"Cookie: _contentAB_15040_vi=V-06_01; split_test_search=new_search; uid=L_bauXyZBY1B; vid=uCVQJQSTgb9QGT3o; ls=1753742684; serp_version=29223843/7621a70; savedS=direct",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: cross-site",
+ "Priority: u=0, i"
+ ]);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function getfilters($pagetype){
+
+ return [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes", // nsfw by default????
+ "no" => "No" // &safe=1
+ ]
+ ],
+ "time" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "1w" => "1 week ago",
+ "2w" => "2 weeks ago",
+ "1m" => "1 month ago",
+ "3m" => "3 months ago",
+ "6m" => "6 months ago",
+ "1Y" => "1 year ago"
+ ]
+ ],
+ "filter" => [
+ "display" => "Remove duplicates",
+ "option" => [
+ "no" => "No",
+ "yes" => "Yes" // &filter=0
+ ]
+ ]
+ ];
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ [$query, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "web"
+ );
+
+ $query = json_decode($query, true);
+ }else{
+
+ $proxy = $this->backend->get_ip();
+
+ $query = [
+ "query" => $get["s"]
+ ];
+
+ // add filters
+ if($get["nsfw"] == "no"){
+
+ $query["safe"] = 1;
+ }
+
+ if($get["time"] != "any"){
+
+ $query["tbs"] = $get["time"];
+ }
+
+ if($get["filter"] == "yes"){
+
+ $query["filter"] = 0;
+ }
+ }
+
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://coccoc.com/search",
+ $query
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get search page");
+ }
+ //$html = file_get_contents("scraper/coccoc.html");
+
+
+ $html = explode("window.composerResponse", $html, 2);
+
+ if(count($html) !== 2){
+
+ throw new Exception("Failed to grep window.composerResponse");
+ }
+
+ $html =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ ltrim($html[1], " =")
+ ),
+ true
+ );
+
+ if($html === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ if(!isset($html["search"]["search_results"])){
+
+ throw new Exception("Coc Coc did not return a search_results object");
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ // word correction
+ foreach($html["top"] as $element){
+
+ if(isset($element["spellChecker"][0]["query"])){
+
+ $out["spelling"] = [
+ "type" => "not_many",
+ "using" => $html["search"]["query"],
+ "correction" => $element["spellChecker"][0]["query"]
+ ];
+ }
+ }
+
+ foreach($html["search"]["search_results"] as $result){
+
+ if(isset($result["type"])){
+
+ switch($result["type"]){
+
+ //
+ // Related searches
+ //
+ case "related_queries":
+ $out["related"] = $result["queries"];
+ continue 2;
+
+ //
+ // Videos
+ //
+ case "video_hits":
+ foreach($result["results"] as $video){
+
+ if(
+ isset($video["image_url"]) &&
+ !empty($video["image_url"])
+ ){
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" => $video["image_url"]
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["video"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $video["title"]
+ )
+ ),
+ "description" => null,
+ "author" => [
+ "name" => $video["uploader"],
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => (int)$video["date"],
+ "duration" => (int)$video["duration"],
+ "views" => null,
+ "thumb" => $thumb,
+ "url" => $video["url"]
+ ];
+ }
+ continue 2;
+ }
+ }
+
+ if(
+ !isset($result["title"]) ||
+ !isset($result["url"])
+ ){
+
+ // should not happen
+ continue;
+ }
+
+ if(isset($result["rich"]["data"]["image_url"])){
+
+ $thumb = [
+ "url" => $result["rich"]["data"]["image_url"],
+ "ratio" => "16:9"
+ ];
+ }else{
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ $sublinks = [];
+
+ if(isset($result["rich"]["data"]["linked_docs"])){
+
+ foreach($result["rich"]["data"]["linked_docs"] as $sub){
+
+ $sublinks[] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $sub["title"]
+ )
+ ),
+ "description" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $sub["content"]
+ )
+ ),
+ "date" => null,
+ "url" => $sub["url"]
+ ];
+ }
+ }
+
+ // get date
+ if(isset($result["date"])){
+
+ $date = (int)$result["date"];
+ }else{
+
+ $date = null;
+ }
+
+ // probe for metadata
+ $table = [];
+
+ if(isset($result["rich"]["data"]["rating"])){
+
+ $table["Rating"] = $result["rich"]["data"]["rating"];
+
+ if(isset($result["rich"]["data"]["num_rating"])){
+
+ $table["Rating"] .= " (" . number_format($result["rich"]["data"]["num_rating"]) . " ratings)";
+ }
+ }
+
+ if(isset($result["rich"]["data"]["views"])){
+
+ $table["Views"] = number_format($result["rich"]["data"]["views"]);
+ }
+
+ if(isset($result["rich"]["data"]["duration"])){
+
+ $table["Duration"] = $this->int2hms($result["rich"]["data"]["duration"]);
+ }
+
+ if(isset($result["rich"]["data"]["channel_name"])){
+
+ $table["Author"] = $result["rich"]["data"]["channel_name"];
+ }
+
+ if(isset($result["rich"]["data"]["video_quality"])){
+
+ $table["Quality"] = $result["rich"]["data"]["video_quality"];
+ }
+
+ if(isset($result["rich"]["data"]["category"])){
+
+ $table["Category"] = $result["rich"]["data"]["category"];
+ }
+
+ $out["web"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $result["title"]
+ )
+ ),
+ "description" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $result["content"]
+ )
+ ),
+ "url" => $result["url"],
+ "date" => $date,
+ "type" => "web",
+ "thumb" => $thumb,
+ "sublink" => $sublinks,
+ "table" => $table
+ ];
+ }
+
+ //
+ // Get wikipedia head
+ //
+ if(isset($html["right"])){
+
+ foreach($html["right"] as $wiki){
+
+ $description = [];
+
+ if(isset($wiki["short_intro"])){
+
+ $description[] =
+ [
+ "type" => "quote",
+ "value" => $wiki["short_intro"],
+ ];
+ }
+
+ if(isset($wiki["intro"])){
+
+ $description[] =
+ [
+ "type" => "text",
+ "value" => $wiki["intro"],
+ ];
+ }
+
+ // get table elements
+ $table = [];
+
+ if(isset($wiki["fields"])){
+
+ foreach($wiki["fields"] as $element){
+
+ $table[$element["title"]] = implode(", ", $element["value"]);
+ }
+ }
+
+ // get sublinks
+ $sublinks = [];
+
+ if(isset($wiki["website"])){
+
+ if(
+ preg_match(
+ '/^http/',
+ $wiki["website"]
+ ) === 0
+ ){
+
+ $sublinks["Website"] = "https://" . $wiki["website"];
+ }else{
+
+ $sublinks["Website"] = $wiki["website"];
+ }
+ }
+
+ foreach($wiki["profiles"] as $sitename => $url){
+
+ $sitename = explode("_", $sitename);
+ $sitename = ucfirst($sitename[count($sitename) - 1]);
+
+ $sublinks[$sitename] = $url;
+ }
+
+ $out["answer"][] = [
+ "title" =>
+ $this->titledots(
+ $wiki["title"]
+ ),
+ "description" => $description,
+ "url" => null,
+ "thumb" => isset($wiki["image"]["contentUrl"]) ? $wiki["image"]["contentUrl"] : null,
+ "table" => $table,
+ "sublink" => $sublinks
+ ];
+ }
+ }
+
+ // get next page
+ if((int)$html["search"]["page"] < (int)$html["search"]["max_page"]){
+
+ // https://coccoc.com/composer?_=1754021153532&p=0&q=zbabduiqwhduwqhdnwq&reqid=bwcAs00q&s=direct&apiV=1
+ // ^json endpoint, but we can just do &page=2 lol
+
+ if(!isset($query["page"])){
+
+ $query["page"] = 2;
+ }else{
+
+ $query["page"]++;
+ }
+
+ $out["npt"] =
+ $this->backend
+ ->store(
+ json_encode($query),
+ "web",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+ public function video($get){
+
+ //$html = file_get_contents("scraper/coccoc.html");
+ if($get["npt"]){
+
+ [$query, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "videos"
+ );
+
+ $query = json_decode($query, true);
+ }else{
+
+ $proxy = $this->backend->get_ip();
+
+ $query = [
+ "query" => $get["s"],
+ "tbm" => "vid"
+ ];
+
+ // add filters
+ if($get["nsfw"] == "no"){
+
+ $query["safe"] = 1;
+ }
+
+ if($get["time"] != "any"){
+
+ $query["tbs"] = $get["time"];
+ }
+
+ if($get["filter"] == "yes"){
+
+ $query["filter"] = 0;
+ }
+ }
+
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://coccoc.com/search",
+ $query
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get search page");
+ }
+
+ $html = explode("window.composerResponse", $html, 2);
+
+ if(count($html) !== 2){
+
+ throw new Exception("Failed to grep window.composerResponse");
+ }
+
+ $html =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ ltrim($html[1], " =")
+ ),
+ true
+ );
+
+ if($html === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ if(!isset($html["search_video"]["search_results"])){
+
+ if(isset($html["search_video"]["error"]["title"])){
+
+ if($html["search_video"]["error"]["title"] == "Không tìm thấy kết quả nào"){
+
+ return $out;
+ }
+
+ throw new Exception("Coc Coc returned an error: " . $html["search_video"]["error"]["title"]);
+ }
+
+ throw new Exception("Coc Coc did not supply a search_results object");
+ }
+
+ foreach($html["search_video"]["search_results"] as $video){
+
+ if(isset($video["rich"]["data"]["image_url"])){
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" => $video["rich"]["data"]["image_url"]
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["video"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $video["title"]
+ )
+ ),
+ "description" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $video["content"]
+ )
+ ),
+ "author" => [
+ "name" =>
+ isset($video["rich"]["data"]["channel_name"]) ?
+ $video["rich"]["data"]["channel_name"] : null,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" =>
+ isset($video["date"]) ?
+ $video["date"] : null,
+ "duration" =>
+ isset($video["rich"]["data"]["duration"]) ?
+ (int)$video["rich"]["data"]["duration"] : null,
+ "views" => null,
+ "thumb" => $thumb,
+ "url" => $video["url"]
+ ];
+ }
+
+ // get next page
+ if((int)$html["search_video"]["page"] < (int)$html["search_video"]["max_page"]){
+
+ if(!isset($query["page"])){
+
+ $query["page"] = 2;
+ }else{
+
+ $query["page"]++;
+ }
+
+ $out["npt"] =
+ $this->backend
+ ->store(
+ json_encode($query),
+ "videos",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+ private function titledots($title){
+
+ return trim($title, " .\t\n\r\0\x0B…");
+ }
+
+ private function int2hms($seconds){
+
+ $hours = floor($seconds / 3600);
+ $minutes = floor(($seconds % 3600) / 60);
+ $seconds = $seconds % 60;
+
+ return sprintf("%02d:%02d:%02d", $hours, $minutes, $seconds);
+ }
+}
diff --git a/scraper/crowdview.php b/scraper/crowdview.php
new file mode 100644
index 0000000..8fb267b
--- /dev/null
+++ b/scraper/crowdview.php
@@ -0,0 +1,145 @@
+<?php
+
+class crowdview{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("crowdview");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [];
+ }
+
+ private function get($proxy, $url, $get = []){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ try{
+ $json = $this->get(
+ $proxy,
+ "https://crowdview-next-js.onrender.com/api/search-v3",
+ [
+ "query" => $search
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $json = json_decode($json, true);
+
+ if($json === NULL){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ foreach($json["results"] as $item){
+
+ $description = explode("<b>", $item["snippet"], 2);
+
+ $out["web"][] = [
+ "title" => $this->sanitize($item["title"]),
+ "description" => $this->sanitize($description[1]),
+ "url" => $item["link"],
+ "date" => strtotime($description[0]),
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+
+ private function sanitize($html){
+
+ return
+ trim(
+ $this->fuckhtml
+ ->getTextContent(
+ html_entity_decode(
+ $html
+ )
+ ),
+ ". "
+ );
+ }
+}
diff --git a/scraper/curlie.php b/scraper/curlie.php
new file mode 100644
index 0000000..61a8eb2
--- /dev/null
+++ b/scraper/curlie.php
@@ -0,0 +1,309 @@
+<?php
+
+class curlie{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("curlie");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ if($page != "web"){
+
+ return [];
+ }
+
+ return [
+ "lang" => [
+ "display" => "Language",
+ "option" => [
+ "any" => "Any language",
+ "en" => "English",
+ "de" => "German",
+ "fr" => "French",
+ "ja" => "Japanese",
+ "it" => "Italian",
+ "es" => "Spanish",
+ "ru" => "Russian",
+ "nl" => "Dutch",
+ "pl" => "Polish",
+ "tr" => "Turkish",
+ "da" => "Danish",
+ "sv" => "Swedish",
+ "no" => "Norwegian",
+ "is" => "Icelandic",
+ "fo" => "Faroese",
+ "fi" => "Finnish",
+ "et" => "Estonian",
+ "lt" => "Lithuanian",
+ "lv" => "Latvian",
+ "cy" => "Welsh",
+ "ga" => "Irish",
+ "gd" => "Scottish Gaelic",
+ "br" => "Breton",
+ "fy" => "Frisian",
+ "frr" => "North Frisian",
+ "gem" => "Saterland Frisian",
+ "lb" => "Luxembourgish",
+ "rm" => "Romansh",
+ "pt" => "Portuguese",
+ "ca" => "Catalan",
+ "gl" => "Galician",
+ "eu" => "Basque",
+ "ast" => "Asturian",
+ "an" => "Aragonese",
+ "fur" => "Friulan",
+ "sc" => "Sardinian",
+ "scn" => "Sicilian",
+ "oc" => "Occitan",
+ "be" => "Belarusian",
+ "cs" => "Czech",
+ "hu" => "Hungarian",
+ "sk" => "Slovak",
+ "uk" => "Ukrainian",
+ "csb" => "Kashubian",
+ "tt" => "Tatar",
+ "ba" => "Bashkir",
+ "os" => "Ossetian",
+ "sl" => "Slovene",
+ "sr" => "Serbian",
+ "hr" => "Croatian",
+ "bs" => "Bosnian",
+ "bg" => "Bulgarian",
+ "sq" => "Albanian",
+ "ro" => "Romanian",
+ "mk" => "Macedonian",
+ "el" => "Greek",
+ "iw" => "Hebrew",
+ "fa" => "Persian",
+ "ar" => "Arabic",
+ "ku" => "Kurdish",
+ "az" => "Azerbaijani",
+ "hy" => "Armenian",
+ "af" => "Afrikaans",
+ "sw" => "Kiswahili",
+ "uz" => "Uzbek",
+ "kk" => "Kazakh",
+ "ky" => "Kyrgyz",
+ "tg" => "Tajik",
+ "tk" => "Turkmen",
+ "ug" => "Uyghurche",
+ "hi" => "Hindi",
+ "si" => "Sinhalese",
+ "gu" => "Gujarati",
+ "ur" => "Urdu",
+ "mr" => "Marathi",
+ "pa" => "Punjabi",
+ "bn" => "Bengali",
+ "ta" => "Tamil",
+ "te" => "Telugu",
+ "kn" => "Kannada",
+ "zh_CN" => "Chinese Simplified",
+ "zh_TW" => "Chinese Traditional",
+ "ko" => "Korean",
+ "cfr" => "Taiwanese",
+ "th" => "Thai",
+ "vi" => "Vietnamese",
+ "in" => "Indonesian",
+ "ms" => "Malay",
+ "tl" => "Tagalog",
+ "eo" => "Esperanto",
+ "ia" => "Interlingua",
+ "la" => "Latin"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = []){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ [$query, $proxy] = $this->backend->get($get["npt"], "web");
+
+ try{
+ $html = $this->get(
+ $proxy,
+ "https://curlie.org/" . $query,
+ []
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ }else{
+ $proxy = $this->backend->get_ip();
+
+ $query = [
+ "q" => $get["s"],
+ "start" => 0,
+ "stime" => 92452189 // ?
+ ];
+
+ if($get["lang"] !== "any"){
+
+ $query["lang"] = $get["lang"];
+ }
+
+ try{
+ $html = $this->get(
+ $proxy,
+ "https://curlie.org/search",
+ $query
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ }
+
+ $this->fuckhtml->load($html);
+
+ $nextpage =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "next-page",
+ "a"
+ );
+
+ if(count($nextpage) !== 0){
+
+ $nextpage =
+ $this->backend->store(
+ $nextpage[0]["attributes"]["href"],
+ "web",
+ $proxy
+ );
+ }else{
+
+ $nextpage = null;
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => $nextpage,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $items =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "site-item",
+ "div"
+ );
+
+ foreach($items as $item){
+
+ $this->fuckhtml->load($item);
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByAttributeValue(
+ "target",
+ "_blank",
+ "a"
+ )[0];
+
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName("site-descr");
+
+ if(count($description) !== 0){
+
+ $description =
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ );
+ }else{
+
+ $description = null;
+ }
+
+ $out["web"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $a
+ ),
+ "description" => $description,
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ ),
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+}
diff --git a/scraper/ddg.php b/scraper/ddg.php
new file mode 100644
index 0000000..49e0d37
--- /dev/null
+++ b/scraper/ddg.php
@@ -0,0 +1,2246 @@
+<?php
+
+class ddg{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("ddg");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ /*
+ curl functions
+ */
+ private const req_web = 0;
+ private const req_xhr = 1;
+
+ private function get($proxy, $url, $get = [], $reqtype = self::req_web){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ // http2 bypass
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ switch($reqtype){
+ case self::req_web:
+ $headers =
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: same-origin",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=0, i",
+ "TE: trailers"];
+ break;
+
+ case self::req_xhr:
+ $headers =
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Referer: https://duckduckgo.com/",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: script",
+ "Sec-Fetch-Mode: no-cors",
+ "Sec-Fetch-Site: same-site",
+ "Priority: u=1"];
+ break;
+ }
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function getfilters($pagetype){
+
+ $base = [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "us-en" => "US (English)",
+ "ar-es" => "Argentina",
+ "au-en" => "Australia",
+ "at-de" => "Austria",
+ "be-fr" => "Belgium (fr)",
+ "be-nl" => "Belgium (nl)",
+ "br-pt" => "Brazil",
+ "bg-bg" => "Bulgaria",
+ "ca-en" => "Canada (en)",
+ "ca-fr" => "Canada (fr)",
+ "ct-ca" => "Catalonia",
+ "cl-es" => "Chile",
+ "cn-zh" => "China",
+ "co-es" => "Colombia",
+ "hr-hr" => "Croatia",
+ "cz-cs" => "Czech Republic",
+ "dk-da" => "Denmark",
+ "ee-et" => "Estonia",
+ "fi-fi" => "Finland",
+ "fr-fr" => "France",
+ "de-de" => "Germany",
+ "gr-el" => "Greece",
+ "hk-tzh" => "Hong Kong",
+ "hu-hu" => "Hungary",
+ "in-en" => "India (en)",
+ "id-en" => "Indonesia (en)",
+ "ie-en" => "Ireland",
+ "il-en" => "Israel (en)",
+ "it-it" => "Italy",
+ "jp-jp" => "Japan",
+ "kr-kr" => "Korea",
+ "lv-lv" => "Latvia",
+ "lt-lt" => "Lithuania",
+ "my-en" => "Malaysia (en)",
+ "mx-es" => "Mexico",
+ "nl-nl" => "Netherlands",
+ "nz-en" => "New Zealand",
+ "no-no" => "Norway",
+ "pk-en" => "Pakistan (en)",
+ "pe-es" => "Peru",
+ "ph-en" => "Philippines (en)",
+ "pl-pl" => "Poland",
+ "pt-pt" => "Portugal",
+ "ro-ro" => "Romania",
+ "ru-ru" => "Russia",
+ "xa-ar" => "Saudi Arabia",
+ "sg-en" => "Singapore",
+ "sk-sk" => "Slovakia",
+ "sl-sl" => "Slovenia",
+ "za-en" => "South Africa",
+ "es-ca" => "Spain (ca)",
+ "es-es" => "Spain (es)",
+ "se-sv" => "Sweden",
+ "ch-de" => "Switzerland (de)",
+ "ch-fr" => "Switzerland (fr)",
+ "tw-tzh" => "Taiwan",
+ "th-en" => "Thailand (en)",
+ "tr-tr" => "Turkey",
+ "us-es" => "US (Spanish)",
+ "ua-uk" => "Ukraine",
+ "uk-en" => "United Kingdom",
+ "vn-en" => "Vietnam (en)"
+ ]
+ ]
+ ];
+
+ switch($pagetype){
+
+ case "web":
+ $base["country"]["option"] =
+ array_merge(["any" => "All Regions"], $base["country"]["option"]);
+
+ return array_merge($base,
+ [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "newer" => [
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ],
+ "extendedsearch" => [
+ // undefined display
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No",
+ ]
+ ]
+ ]
+ );
+ break;
+
+ case "images":
+ return array_merge($base,
+ [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ],
+ "date" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "Day" => "Past day",
+ "Week" => "Past week",
+ "Month" => "Past month"
+ ]
+ ],
+ "size" => [
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "Small" => "Small",
+ "Medium" => "Medium",
+ "Large" => "Large",
+ "Wallpaper" => "Wallpaper"
+ ]
+ ],
+ "color" => [
+ "display" => "Colors",
+ "option" => [
+ "any" => "All colors",
+ "Monochrome" => "Black and white",
+ "Red" => "Red",
+ "Orange" => "Orange",
+ "Yellow" => "Yellow",
+ "Green" => "Green",
+ "Blue" => "Blue",
+ "Purple" => "Purple",
+ "Pink" => "Pink",
+ "Brown" => "Brown",
+ "Black" => "Black",
+ "Gray" => "Gray",
+ "Teal" => "Teal",
+ "White" => "White"
+ ]
+ ],
+ "type" => [
+ "display" => "Type",
+ "option" => [
+ "any" => "All types",
+ "photo" => "Photograph",
+ "clipart" => "Clipart",
+ "gif" => "Animated GIF",
+ "transparent" => "Transparent"
+ ]
+ ],
+ "layout" => [
+ "display" => "Layout",
+ "option" => [
+ "any" => "All layouts",
+ "Square" => "Square",
+ "Tall" => "Tall",
+ "Wide" => "Wide"
+ ]
+ ],
+ "license" => [
+ "display" => "License",
+ "option" => [
+ "any" => "All licenses",
+ "Any" => "All Creative Commons",
+ "Public" => "Public domain",
+ "Share" => "Free to Share and Use",
+ "ShareCommercially" => "Free to Share and Use Commercially",
+ "Modify" => "Free to Modify, Share, and Use",
+ "ModifyCommercially" => "Free to Modify, Share, and Use Commercially"
+ ]
+ ]
+ ]
+ );
+ break;
+
+ case "videos":
+ return array_merge($base,
+ [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "date" => [
+ "display" => "Time fetched",
+ "option" => [
+ "any" => "Any time",
+ "d" => "Past day",
+ "w" => "Past week",
+ "m" => "Past month"
+ ]
+ ],
+ "resolution" => [ //videoDefinition
+ "display" => "Resolution",
+ "option" => [
+ "any" => "Any resolution",
+ "high" => "High definition",
+ "standard" => "Standard definition"
+ ]
+ ],
+ "duration" => [ // videoDuration
+ "display" => "Duration",
+ "option" => [
+ "any" => "Any duration",
+ "short" => "Short (>5min)",
+ "medium" => "Medium (5-20min)",
+ "long" => "Long (<20min)"
+ ]
+ ],
+ "license" => [
+ "display" => "License",
+ "option" => [
+ "any" => "Any license",
+ "creativeCommon" => "Creative Commons",
+ "youtube" => "YouTube Standard"
+ ]
+ ]
+ ]
+ );
+ break;
+
+ case "news":
+ return array_merge($base,
+ [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "date" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "d" => "Past day",
+ "w" => "Past week",
+ "m" => "Past month"
+ ]
+ ]
+ ]
+ );
+ break;
+ }
+ }
+
+ public function web($get){
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ if($get["npt"]){
+
+ [$js_link, $proxy] = $this->backend->get($get["npt"], "web");
+ $js_link = "https://links.duckduckgo.com" . $js_link;
+
+ $html = "";
+ $get["extendedsearch"] = "no";
+
+ }else{
+ if(strlen($get["s"]) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ // generate filters
+ $get_filters = [
+ "q" => $get["s"]
+ ];
+
+ if($get["country"] == "any"){
+
+ $get_filters["kl"] = "wt-wt";
+ }else{
+
+ $get_filters["kl"] = $get["country"];
+ }
+
+ switch($get["nsfw"]){
+
+ case "yes": $get_filters["kp"] = "-2"; break;
+ case "maybe": $get_filters["kp"] = "-1"; break;
+ case "no": $get_filters["kp"] = "1"; break;
+ }
+
+ $df = true;
+
+ if($get["newer"] === false){
+
+ if($get["older"] !== false){
+
+ $start = 36000;
+ $end = $get["older"];
+ }else{
+
+ $df = false;
+ }
+ }else{
+
+ $start = $get["newer"];
+
+ if($get["older"] !== false){
+
+ $end = $get["older"];
+ }else{
+
+ $end = time();
+ }
+ }
+
+ if($df === true){
+ $get_filters["df"] = date("Y-m-d", $start) . ".." . date("Y-m-d", $end);
+ }
+
+ //
+ // Get HTML
+ //
+ try{
+ $html = $this->get(
+ $proxy,
+ "https://duckduckgo.com/",
+ $get_filters
+ );
+ }catch(Exception $e){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ $this->fuckhtml->load($html);
+
+ $script =
+ $this->fuckhtml
+ ->getElementById(
+ "deep_preload_link",
+ "link"
+ );
+
+ if(
+ $script === null ||
+ !isset($script["attributes"]["href"])
+ ){
+
+ throw new Exception("Failed to grep d.js");
+ }
+
+ $js_link =
+ $this->fuckhtml
+ ->getTextContent(
+ $script["attributes"]["href"]
+ );
+ }
+
+ //
+ // Get d.js
+ //
+ try{
+ $js = $this->get(
+ $proxy,
+ $js_link,
+ [],
+ ddg::req_xhr
+ );
+
+ }catch(Exception $e){
+
+ throw new Exception("Failed to fetch d.js");
+ }
+
+ //echo htmlspecialchars($js);
+
+ $js_tmp =
+ preg_split(
+ '/DDG\.pageLayout\.load\(\s*\'d\'\s*,\s*/',
+ $js,
+ 2
+ );
+
+ if(count($js_tmp) <= 1){
+
+ throw new Exception("Failed to grep pageLayout(d)");
+ }
+
+ $json =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ $js_tmp[1]
+ ),
+ true
+ );
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ //
+ // Get search results + NPT token
+ //
+ foreach($json as $item){
+
+ if(isset($item["c"])){
+
+ if(
+ !isset($item["s"]) &&
+ isset($item["t"]) &&
+ $item["t"] == "DEEP_ERROR_NO_RESULTS"
+ ){
+
+ return $out;
+ }
+
+ $table = [];
+
+ // get youtube video information
+ if(isset($item["video"]["thumbnail_url_template"])){
+
+ $thumb =
+ [
+ "ratio" => "16:9",
+ "url" => $this->bingimg($item["video"]["thumbnail_url_template"])
+ ];
+ }else{
+
+ $thumb =
+ [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ // get table items
+ if(isset($item["rf"])){
+
+ foreach($item["rf"] as $hint){
+
+ if(
+ !isset($hint["label"]["text"]) ||
+ !isset($hint["items"][0]["text"])
+ ){
+
+ continue;
+ }
+
+ $text = [];
+
+ foreach($hint["items"] as $text_part){
+
+ $text[] = $text_part["text"];
+ }
+
+ $text = implode(", ", $text);
+
+ if(is_numeric($text)){
+
+ $text = number_format((string)$text);
+ }
+
+ $table[$hint["label"]["text"]] = $text;
+ }
+ }
+
+ // get ratings
+ if(isset($item["ar"])){
+
+ foreach($item["ar"] as $rating){
+
+ if(
+ isset($rating["aggregateRating"]["bestRating"]) &&
+ isset($rating["aggregateRating"]["ratingValue"])
+ ){
+
+ $text = $rating["aggregateRating"]["ratingValue"] . "/" . $rating["aggregateRating"]["bestRating"];
+
+ if(isset($rating["aggregateRating"]["reviewCount"])){
+
+ $text .= " (" . number_format($rating["aggregateRating"]["reviewCount"]) . " votes)";
+ }
+
+ $table["Rating"] = $text;
+ }
+ }
+ }
+
+ // get sublinks
+ $sublinks = [];
+
+ if(isset($item["l"])){
+
+ foreach($item["l"] as $sublink){
+
+ $sublinks[] = [
+ "title" => $this->titledots($sublink["text"]),
+ "description" => $this->titledots($sublink["snippet"]),
+ "url" => $sublink["targetUrl"],
+ "date" => null
+ ];
+ }
+ }
+
+ $title =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $item["t"]
+ )
+ );
+
+ if(
+ $title == "EOF" &&
+ strpos(
+ $item["c"],
+ "google"
+ )
+ ){
+
+ continue;
+ }
+
+ // parse search result
+ $out["web"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $item["t"]
+ )
+ ),
+ "description" =>
+ isset($item["a"]) ?
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $item["a"]
+ )
+ ) : null,
+ "url" => $this->unshiturl($item["c"]),
+ "date" =>
+ isset($item["e"]) ?
+ strtotime($item["e"]) : null,
+ "type" => "web",
+ "thumb" => $thumb,
+ "sublink" => $sublinks,
+ "table" => $table
+ ];
+ continue;
+ }
+
+ if(isset($item["n"])){
+
+ // get NPT
+ $out["npt"] =
+ $this->backend->store(
+ $item["n"],
+ "web",
+ $proxy
+ );
+ continue;
+ }
+ }
+
+ //
+ // Get spelling
+ //
+ $js_tmp =
+ preg_split(
+ '/DDG\.page\.showMessage\(\s*\'spelling\'\s*,\s*/',
+ $js,
+ 2
+ );
+
+ if(count($js_tmp) > 1){
+
+ $json =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ $js_tmp[1]
+ ),
+ true
+ );
+
+ if($json !== null){
+
+ // parse spelling
+ // qc=2: including
+
+ switch((int)$json["qc"]){
+
+ case 2:
+ $type = "including";
+ break;
+
+ default:
+ $type = "not_many";
+ break;
+ }
+
+ $out["spelling"] = [
+ "type" => $type,
+ "using" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $json["suggestion"]
+ ),
+ "correction" => html_entity_decode($json["recourseText"])
+ ];
+ }
+ }
+
+ //
+ // Get images
+ //
+ $js_tmp =
+ preg_split(
+ '/DDG\.duckbar\.load\(\s*\'images\'\s*,\s*/',
+ $js,
+ 2
+ );
+
+ if(count($js_tmp) > 1){
+
+ $json =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ $js_tmp[1]
+ ),
+ true
+ );
+
+ if($json !== null){
+
+ foreach($json["results"] as $image){
+
+ $ratio = $this->bingratio((int)$image["width"], (int)$image["height"]);
+
+ $out["image"][] = [
+ "title" => $image["title"],
+ "source" => [
+ [
+ "url" => $image["image"],
+ "width" => (int)$image["width"],
+ "height" => (int)$image["height"]
+ ],
+ [
+ "url" => $this->bingimg($image["thumbnail"]),
+ "width" => $ratio[0],
+ "height" => $ratio[1]
+ ]
+ ],
+ "url" => $this->unshiturl($image["url"])
+ ];
+ }
+ }
+ }
+
+ //
+ // Get videos
+ //
+ $js_tmp =
+ preg_split(
+ '/DDG\.duckbar\.load\(\s*\'videos\'\s*,\s*/',
+ $js,
+ 2
+ );
+
+ if(count($js_tmp) > 1){
+
+ $json =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ $js_tmp[1]
+ ),
+ true
+ );
+
+ if($json !== null){
+
+ foreach($json["results"] as $video){
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+
+ foreach(["large", "medium", "small"] as $contender){
+
+ if(isset($video["images"][$contender])){
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" => $this->bingimg($video["images"][$contender])
+ ];
+ break;
+ }
+ }
+
+ $out["video"][] = [
+ "title" => $this->titledots($video["title"]),
+ "description" =>
+ $video["description"] != "" ?
+ $this->titledots($video["description"]) : null,
+ "date" =>
+ isset($video["published"]) ?
+ strtotime($video["published"]) : null,
+ "duration" =>
+ $video["duration"] != "" ?
+ $this->hms2int($video["duration"]) : null,
+ "views" =>
+ isset($video["statistics"]["viewCount"]) ?
+ (int)$video["statistics"]["viewCount"] : null,
+ "thumb" => $thumb,
+ "url" => $this->unshiturl($video["content"])
+ ];
+ }
+ }
+ }
+
+ //
+ // Get news
+ //
+ $js_tmp =
+ preg_split(
+ '/DDG\.duckbar\.load\(\s*\'news\'\s*,\s*/',
+ $js,
+ 2
+ );
+
+ if(count($js_tmp) > 1){
+
+ $json =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ $js_tmp[1]
+ ),
+ true
+ );
+
+ if($json !== null){
+
+ foreach($json["results"] as $news){
+
+ if(isset($news["image"])){
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" => $news["image"]
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["news"][] = [
+ "title" => $news["title"],
+ "description" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $news["excerpt"]
+ ),
+ "date" => (int)$news["date"],
+ "thumb" => $thumb,
+ "url" => $news["url"]
+ ];
+ }
+ }
+ }
+
+ //
+ // Get related searches
+ //
+ $js_tmp =
+ preg_split(
+ '/DDG\.duckbar\.loadModule\(\s*\'related_searches\'\s*,\s*/',
+ $js,
+ 2
+ );
+
+ if(count($js_tmp) > 1){
+
+ $json =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ $js_tmp[1]
+ ),
+ true
+ );
+
+ if($json !== null){
+
+ foreach($json["results"] as $related){
+
+ $out["related"][] = $related["text"];
+ }
+ }
+ }
+
+ //
+ // Get instant answers
+ //
+ $js_tmp =
+ preg_split(
+ '/DDG\.duckbar\.add\(\s*/',
+ $html . $js,
+ 2
+ );
+
+ if(count($js_tmp) > 1){
+
+ $json =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ $js_tmp[1]
+ ),
+ true
+ );
+
+ if($json !== null){
+
+ $json = $json["data"];
+ $table = [];
+ $sublinks = [];
+ $description = [];
+
+ // get official website
+ if(
+ isset($json["OfficialWebsite"]) &&
+ $json["OfficialWebsite"] !== null
+ ){
+
+ $sublinks["Website"] = $json["OfficialWebsite"];
+ }
+
+ // get sublinks & table elements
+ if(isset($json["Infobox"]["content"])){
+ foreach($json["Infobox"]["content"] as $info){
+
+ if($info["data_type"] == "string"){
+
+ // add table element
+ $table[$info["label"]] = $info["value"];
+ continue;
+ }
+
+ if($info["data_type"] == "wd_description"){
+
+ $description[] = [
+ "type" => "quote",
+ "value" => $info["value"]
+ ];
+ continue;
+ }
+
+ // add sublink
+ switch($info["data_type"]){
+
+ case "official_site":
+ case "official_website":
+ $type = "Website";
+ break;
+
+ case "wikipedia": $type = "Wikipedia"; break;
+ case "itunes": $type = "iTunes"; break;
+ case "amazon": $type = "Amazon"; break;
+
+ case "imdb_title_id":
+ case "imdb_id":
+ case "imdb_name_id":
+ $type = "IMDb";
+ $delim = substr($info["value"], 0, 2);
+
+ if($delim == "nm"){
+
+ $prefix = "https://www.imdb.com/name/";
+ }elseif($delim == "tt"){
+
+ $prefix = "https://www.imdb.com/title/";
+ }elseif($delim == "co"){
+
+ $prefix = "https://www.imdb.com/search/title/?companies=";
+ }else{
+
+ $prefix = "https://www.imdb.com/title/";
+ }
+ break;
+
+ case "imdb_name_id": $prefix = "https://www.imdb.com/name/"; $type = "IMDb"; break;
+ case "twitter_profile": $prefix = "https://twitter.com/"; $type = "Twitter"; break;
+ case "instagram_profile": $prefix = "https://instagram.com/"; $type = "Instagram"; break;
+ case "facebook_profile": $prefix = "https://facebook.com/"; $type = "Facebook"; break;
+ case "spotify_artist_id": $prefix = "https://open.spotify.com/artist/"; $type = "Spotify"; break;
+ case "itunes_artist_id": $prefix = "https://music.apple.com/us/artist/"; $type = "iTunes"; break;
+ case "rotten_tomatoes": $prefix = "https://rottentomatoes.com/"; $type = "Rotten Tomatoes"; break;
+ case "youtube_channel": $prefix = "https://youtube.com/channel/"; $type = "YouTube"; break;
+ case "soundcloud_id": $prefix = "https://soundcloud.com/"; $type = "SoundCloud"; break;
+
+ default:
+ $prefix = null;
+ $type = false;
+ }
+
+ if($type !== false){
+
+ if($prefix === null){
+
+ $sublinks[$type] = $info["value"];
+ }else{
+
+ $sublinks[$type] = $prefix . $info["value"];
+ }
+ }
+ }
+ }
+
+ if(isset($json["Abstract"])){
+
+ $description = $this->parse_rich_text($json["Abstract"]);
+ }
+
+ if(
+ !isset($json["Image"]) ||
+ $json["Image"] == "" ||
+ $json["Image"] === null ||
+ $json["Image"] == "https://duckduckgo.com/i/"
+ ){
+
+ $image = null;
+ }else{
+
+ if(
+ preg_match(
+ '/^https?:\/\//',
+ $json["Image"]
+ )
+ ){
+
+ $image = $json["Image"];
+ }else{
+
+ $image = "https://duckduckgo.com" . $json["Image"];
+ }
+ }
+
+ $out["answer"][] = [
+ "title" => $json["Heading"],
+ "description" => $description,
+ "url" => $json["AbstractURL"],
+ "thumb" => $image,
+ "table" => $table,
+ "sublink" => $sublinks
+ ];
+ }
+ }
+
+ if($get["extendedsearch"] == "no"){
+
+ return $out;
+ }
+
+ //
+ // Parse additional data endpoints
+ //
+ //nrj('/js/spice/dictionary/definition/create', null, null, null, null, 'dictionary_definition');
+
+ preg_match_all(
+ '/nrj\(\s*\'([^\']+)\'/',
+ $js,
+ $nrj
+ );
+
+ if(isset($nrj[1])){
+
+ foreach($nrj[1] as $potential_endpoint){
+
+ //
+ // Probe for wordnik definition
+ //
+ preg_match(
+ '/\/js\/spice\/dictionary\/definition\/([^\/]+)/',
+ $potential_endpoint,
+ $word
+ );
+
+ if(isset($word[1])){
+
+ $word = $word[1];
+
+ // found wordnik definition & word
+ try{
+ $nik =
+ $this->get(
+ $proxy,
+ "https://duckduckgo.com/js/spice/dictionary/definition/" . $word,
+ [],
+ ddg::req_xhr
+ );
+
+ }catch(Exception $e){
+
+ // fail gracefully
+ return $out;
+ }
+
+ // remove javascript
+ $js_tmp =
+ preg_split(
+ '/ddg_spice_dictionary_definition\(\s*/',
+ $nik,
+ 2
+ );
+
+ if(count($js_tmp) > 1){
+
+ $nik =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ $js_tmp[1]
+ ),
+ true
+ );
+ }
+
+ if($nik === null){
+
+ return $out;
+ }
+
+ $answer_cat = [];
+ $answer = [];
+
+ foreach($nik as $snippet){
+
+ if(!isset($snippet["partOfSpeech"])){ continue; }
+
+ $push = [];
+
+ // add text snippet
+ if(isset($snippet["text"])){
+
+ $push[] = [
+ "type" => "text",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $snippet["text"]
+ )
+ ];
+ }
+
+ // add example uses
+ if(isset($snippet["exampleUses"])){
+
+ foreach($snippet["exampleUses"] as $example){
+
+ $push[] = [
+ "type" => "quote",
+ "value" => "\"" .
+ $this->fuckhtml
+ ->getTextContent(
+ $example["text"]
+ ) . "\""
+ ];
+ }
+ }
+
+ // add citations
+ if(isset($snippet["citations"])){
+
+ foreach($snippet["citations"] as $citation){
+
+ if(!isset($citation["cite"])){ continue; }
+
+ $text =
+ $this->fuckhtml
+ ->getTextContent(
+ $citation["cite"]
+ );
+
+ if(isset($citation["source"])){
+
+ $text .=
+ " - " .
+ $this->fuckhtml
+ ->getTextContent(
+ $citation["source"]
+ );
+ }
+
+ $push[] = [
+ "type" => "quote",
+ "value" => $text
+ ];
+ }
+ }
+
+ // add related words
+ if(isset($snippet["relatedWords"])){
+
+ $relations = [];
+
+ foreach($snippet["relatedWords"] as $related){
+
+ $words = [];
+ foreach($related["words"] as $wrd){
+
+ $words[] =
+ $this->fuckhtml
+ ->getTextContent(
+ $wrd
+ );
+ }
+
+ if(
+ count($words) !== 0 &&
+ isset($related["relationshipType"])
+ ){
+
+ $relations[ucfirst($related["relationshipType"]) . "s"] =
+ implode(", ", $words);
+ }
+ }
+
+ foreach($relations as $relation_title => $relation_content){
+
+ $push[] = [
+ "type" => "quote",
+ "value" => $relation_title . ": " . $relation_content
+ ];
+ }
+ }
+
+
+ if(count($push) !== 0){
+
+ // push data to answer_cat
+ if(!isset($answer_cat[$snippet["partOfSpeech"]])){
+
+ $answer_cat[$snippet["partOfSpeech"]] = [];
+ }
+
+ $answer_cat[$snippet["partOfSpeech"]] =
+ array_merge(
+ $answer_cat[$snippet["partOfSpeech"]],
+ $push
+ );
+ }
+ }
+
+ foreach($answer_cat as $answer_title => $answer_content){
+
+ $i = 0;
+ $answer[] = [
+ "type" => "title",
+ "value" => $answer_title
+ ];
+
+ $old_type = $answer[count($answer) - 1]["type"];
+
+ foreach($answer_content as $ans){
+
+ if(
+ $ans["type"] == "text" &&
+ $old_type == "text"
+ ){
+
+ $i++;
+ $c = count($answer) - 1;
+
+ // append text to existing textfield
+ $answer[$c] = [
+ "type" => "text",
+ "value" => $answer[$c]["value"] . "\n" . $i . ". " . $ans["value"]
+ ];
+
+ }elseif($ans["type"] == "text"){
+
+ $i++;
+ $answer[] = [
+ "type" => "text",
+ "value" => $i . ". " . $ans["value"]
+ ];
+ }else{
+
+ // append normally
+ $answer[] = $ans;
+ }
+
+ $old_type = $ans["type"];
+ }
+ }
+
+ // yeah.. sometimes duckduckgo doesnt give us a definition back
+ if(count($answer) !== 0){
+
+ $out["answer"][] = [
+ "title" => ucfirst($word),
+ "description" => $answer,
+ "url" => "https://www.wordnik.com/words/" . $word,
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+ }
+ }
+
+ //
+ // Parse stackoverflow answer
+ //
+ if(
+ preg_match(
+ '/^\/a\.js.*src_id=stack_overflow/',
+ $potential_endpoint
+ )
+ ){
+
+ // found stackoverflow answer
+ try{
+ $json =
+ $this->get(
+ $proxy,
+ "https://duckduckgo.com" . $potential_endpoint,
+ [],
+ ddg::req_xhr
+ );
+
+ }catch(Exception $e){
+
+ // fail gracefully
+ return $out;
+ }
+
+ $json = explode("DDG.duckbar.add_array(", $json, 2);
+
+ if(count($json) === 2){
+
+ $json =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ $json[1]
+ ),
+ true
+ );
+
+ if(
+ $json !== null &&
+ isset($json[0]["data"])
+ ){
+
+ $json = $json[0]["data"];
+
+ foreach($json as $answer){
+
+ if(isset($answer["Heading"])){
+
+ $title = $answer["Heading"];
+ }elseif(isset($answer["title"])){
+
+ $title = $answer["title"];
+ }else{
+
+ $title = null;
+ }
+
+ if(
+ $title !== null &&
+ isset($answer["Abstract"])
+ ){
+
+ $description = $this->parse_rich_text($answer["Abstract"]);
+
+ $out["answer"][] = [
+ "title" => $title,
+ "description" => $description,
+ "url" => $answer["AbstractURL"],
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return $out;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$js_link, $proxy] = $this->backend->get($get["npt"], "images");
+
+ }else{
+ if(strlen($get["s"]) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ $filters = [];
+
+ if($get["date"] != "any"){ $filters[] = "time:{$get["date"]}"; }
+ if($get["size"] != "any"){ $filters[] = "size:{$get["size"]}"; }
+ if($get["color"] != "any"){ $filters[] = "color:{$get["color"]}"; }
+ if($get["type"] != "any"){ $filters[] = "type:{$get["type"]}"; }
+ if($get["layout"] != "any"){ $filters[] = "layout:{$get["layout"]}"; }
+ if($get["license"] != "any"){ $filters[] = "license:{$get["license"]}"; }
+
+ $filters = implode(",", $filters);
+
+ $get_filters = [
+ "q" => $get["s"],
+ "iax" => "images",
+ "ia" => "images"
+ ];
+
+ if($filters != ""){
+
+ $get_filters["iaf"] = $filters;
+ }
+
+ $nsfw = $get["nsfw"] == "yes" ? "-1" : "1";
+ $get_filters["kp"] = $nsfw;
+
+ try{
+
+ $html = $this->get(
+ $proxy,
+ "https://duckduckgo.com",
+ $get_filters,
+ ddg::req_web
+ );
+ }catch(Exception $err){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ preg_match(
+ '/vqd="([0-9-]+)"/',
+ $html,
+ $vqd
+ );
+
+ if(!isset($vqd[1])){
+
+ throw new Exception("Failed to grep VQD token");
+ }
+
+ $js_link =
+ "i.js?" .
+ http_build_query([
+ "l" => $get["country"],
+ "o" => "json",
+ "q" => $get["s"],
+ "vqd" => $vqd[1],
+ "f" => $filters,
+ "p" => $nsfw
+ ]);
+ }
+
+ try{
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://duckduckgo.com/" . $js_link,
+ [],
+ ddg::req_xhr
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get i.js");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ if(!isset($json["results"])){
+
+ return $out;
+ }
+
+ // get npt
+ if(
+ isset($json["next"]) &&
+ $json["next"] !== null
+ ){
+
+ $vqd = null;
+
+ if(isset($vqd[1])){
+
+ $vqd = $vqd[1];
+ }else{
+
+ $vqd = array_values($json["vqd"]);
+
+ if(count($vqd) > 0){
+
+ $vqd = $vqd[0];
+ }
+ }
+
+ if($vqd !== null){
+
+ $out["npt"] =
+ $this->backend->store(
+ $json["next"] . "&vqd=" . $vqd,
+ "images",
+ $proxy
+ );
+ }
+ }
+
+ // get images
+ foreach($json["results"] as $image){
+
+ $ratio =
+ $this->bingratio(
+ (int)$image["width"],
+ (int)$image["height"]
+ );
+
+ $out["image"][] = [
+ "title" => $this->titledots($image["title"]),
+ "source" => [
+ [
+ "url" => $image["image"],
+ "width" => (int)$image["width"],
+ "height" => (int)$image["height"]
+ ],
+ [
+ "url" => $this->bingimg($image["thumbnail"]),
+ "width" => $ratio[0],
+ "height" => $ratio[1]
+ ]
+ ],
+ "url" => $this->unshiturl($image["url"])
+ ];
+ }
+
+ return $out;
+ }
+
+ public function video($get){
+
+ if($get["npt"]){
+
+ [$js_link, $proxy] = $this->backend->get($get["npt"], "videos");
+
+ }else{
+ if(strlen($get["s"]) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ $get_filters = [
+ "q" => $get["s"],
+ "iax" => "videos",
+ "ia" => "videos"
+ ];
+
+ switch($get["nsfw"]){
+
+ case "yes": $nsfw = "-2"; break;
+ case "maybe": $nsfw = "-1"; break;
+ case "no": $nsfw = "1"; break;
+ }
+
+ $filters = [];
+
+ if($get["date"] != "any"){ $filters[] = "publishedAfter:{$date}"; }
+ if($get["resolution"] != "any"){ $filters[] = "videoDefinition:{$resolution}"; }
+ if($get["duration"] != "any"){ $filters[] = "videoDuration:{$duration}"; }
+ if($get["license"] != "any"){ $filters[] = "videoLicense:{$license}"; }
+
+ $filters = implode(",", $filters);
+
+ if($filters != ""){
+
+ $get_filters["iaf"] = $filters;
+ }
+
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://duckduckgo.com/",
+ $get_filters,
+ ddg::req_web
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ preg_match(
+ '/vqd="([0-9-]+)"/',
+ $html,
+ $vqd
+ );
+
+ if(!isset($vqd[1])){
+
+ throw new Exception("Failed to grep VQD token");
+ }
+
+ $js_link =
+ "v.js?" .
+ http_build_query([
+ "l" => $get["country"],
+ "o" => "json",
+ "sr" => "1",
+ "q" => $get["s"],
+ "vqd" => $vqd[1],
+ "f" => $filters,
+ "p" => $nsfw
+ ]);
+ }
+
+ try{
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://duckduckgo.com/" . $js_link,
+ [],
+ ddg::req_xhr
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ if(!isset($json["results"])){
+
+ return $out;
+ }
+
+ // get NPT
+ if(
+ isset($json["next"]) &&
+ $json["next"] !== null
+ ){
+
+ $out["npt"] =
+ $this->backend->store(
+ $json["next"],
+ "videos",
+ $proxy
+ );
+ }
+
+ foreach($json["results"] as $video){
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+
+ foreach(["large", "medium", "small"] as $contender){
+
+ if(isset($video["images"][$contender])){
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" => $this->bingimg($video["images"][$contender])
+ ];
+ break;
+ }
+ }
+
+ $out["video"][] = [
+ "title" => $this->titledots($video["title"]),
+ "description" => $this->titledots($video["description"]),
+ "author" => [
+ "name" =>
+ (
+ isset($video["uploader"]) &&
+ $video["uploader"] != ""
+ ) ?
+ $video["uploader"] : null,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" =>
+ (
+ isset($video["published"]) &&
+ $video["published"] != ""
+ ) ?
+ strtotime($video["published"]) : null,
+ "duration" =>
+ (
+ isset($video["duration"]) &&
+ $video["duration"] != ""
+ ) ?
+ $this->hms2int($video["duration"]) : null,
+ "views" =>
+ isset($video["statistics"]["viewCount"]) ?
+ (int)$video["statistics"]["viewCount"] : null,
+ "thumb" => $thumb,
+ "url" => $this->unshiturl($video["content"])
+ ];
+ }
+
+ return $out;
+ }
+
+ public function news($get){
+
+ if($get["npt"]){
+
+ [$js_link, $proxy] = $this->backend->get($get["npt"], "news");
+
+ }else{
+ if(strlen($get["s"]) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ $get_filters = [
+ "q" => $get["s"],
+ "iar" => "news",
+ "ia" => "news"
+ ];
+
+ if($get["date"] != "any"){
+
+ $date = $get["date"];
+ $get_filters["df"] = $date;
+ }else{
+
+ $date = "";
+ }
+
+ switch($get["nsfw"]){
+
+ case "yes": $get_filters["kp"] = "-2"; break;
+ case "maybe": $get_filters["kp"] = "-1"; break;
+ case "no": $get_filters["kp"] = "1"; break;
+ }
+
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://duckduckgo.com/",
+ $get_filters,
+ ddg::req_web
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ preg_match(
+ '/vqd="([0-9-]+)"/',
+ $html,
+ $vqd
+ );
+
+ if(!isset($vqd[1])){
+
+ throw new Exception("Failed to grep VQD token");
+ }
+
+ $js_link =
+ "news.js?" .
+ http_build_query([
+ "l" => $get["country"],
+ "o" => "json",
+ "noamp" => "1",
+ "m" => "30",
+ "q" => $get["s"],
+ "vqd" => $vqd[1],
+ "p" => $get_filters["kp"],
+ "df" => $date,
+ "u" => "bing"
+ ]);
+ }
+
+ try{
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://duckduckgo.com/" . $js_link,
+ [],
+ ddg::req_xhr
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "news" => []
+ ];
+
+ if(!isset($json["results"])){
+
+ return $out;
+ }
+
+ // get NPT
+ if(
+ isset($json["next"]) &&
+ $json["next"] !== null
+ ){
+
+ $out["npt"] =
+ $this->backend->store(
+ $json["next"],
+ "news",
+ $proxy
+ );
+ }
+
+ foreach($json["results"] as $news){
+
+ if(
+ isset($news["image"]) &&
+ $news["image"] != ""
+ ){
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" => $news["image"]
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["news"][] = [
+ "title" => $news["title"],
+ "author" =>
+ (
+ isset($news["source"]) &&
+ $news["source"] != ""
+ ) ?
+ $news["source"] : null,
+ "description" =>
+ (
+ isset($news["excerpt"]) &&
+ $news["excerpt"] != ""
+ ) ?
+ $this->fuckhtml
+ ->getTextContent(
+ $news["excerpt"]
+ ) : null,
+ "date" =>
+ isset($news["date"]) ?
+ (int)$news["date"] : null,
+ "thumb" => $thumb,
+ "url" => $this->unshiturl($news["url"])
+ ];
+ }
+
+ return $out;
+ }
+
+ private function parse_rich_text($html){
+
+ $description = [];
+
+ // pre-process the html, remove useless elements
+ $html =
+ strip_tags(
+ $html,
+ [
+ "h1", "h2", "h3", "h4", "h5", "h6", "h7",
+ "pre", "code"
+ ]
+ );
+
+ $html =
+ preg_replace(
+ '/<(\/?)pre *[^>]*>\s*<\/?code *[^>]*>/i',
+ '<$1pre>',
+ $html
+ );
+
+ $this->fuckhtml->load($html);
+
+ $tags =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "*"
+ );
+
+ if(count($tags) === 0){
+
+ $description[] = [
+ "type" => "text",
+ "value" =>
+ trim(
+ $this->fuckhtml
+ ->getTextContent(
+ $html,
+ true,
+ false
+ )
+ )
+ ];
+ }else{
+
+ $start = 0;
+ $was_code_block = true;
+ foreach($tags as $tag){
+
+ $text =
+ $this->fuckhtml
+ ->getTextContent(
+ substr(
+ $html,
+ $start,
+ $tag["startPos"] - $start
+ ),
+ true,
+ false
+ );
+
+ if($was_code_block){
+
+ $text = ltrim($text);
+ $was_code_block = false;
+ }
+
+ $description[] = [
+ "type" => "text",
+ "value" => $text
+ ];
+
+ switch($tag["tagName"]){
+
+ case "pre":
+ $append = "code";
+ $was_code_block = true;
+ $c = count($description) - 1;
+ $description[$c]["value"] =
+ rtrim($description[$c]["value"]);
+ break;
+
+ case "code":
+ $append = "inline_code";
+ $c = count($description) - 1;
+ $description[$c]["value"] =
+ rtrim($description[$c]["value"]) . " ";
+ break;
+
+ case "h1":
+ case "h2":
+ case "h3":
+ case "h4":
+ case "h5":
+ case "h6":
+ case "h7":
+ $append = "title";
+ $c = count($description) - 1;
+ $description[$c]["value"] =
+ rtrim($description[$c]["value"]);
+ break;
+ }
+
+ $description[] = [
+ "type" => $append,
+ "value" =>
+ trim(
+ $this->fuckhtml
+ ->getTextContent(
+ $tag,
+ true,
+ false
+ )
+ )
+ ];
+
+ $start = $tag["endPos"];
+ }
+
+ // shit out remainder
+ $description[] = [
+ "type" => "text",
+ "value" =>
+ trim(
+ $this->fuckhtml
+ ->getTextContent(
+ substr(
+ $html,
+ $start
+ ),
+ true,
+ false
+ )
+ )
+ ];
+ }
+
+ return $description;
+ }
+
+ private function titledots($title){
+
+ $substr = substr($title, -3);
+
+ if(
+ $substr == "..." ||
+ $substr == "…"
+ ){
+
+ return trim(substr($title, 0, -3));
+ }
+
+ return trim($title);
+ }
+
+ private function hms2int($time){
+
+ $parts = explode(":", $time, 3);
+ $time = 0;
+
+ if(count($parts) === 3){
+
+ // hours
+ $time = $time + ((int)$parts[0] * 3600);
+ array_shift($parts);
+ }
+
+ if(count($parts) === 2){
+
+ // minutes
+ $time = $time + ((int)$parts[0] * 60);
+ array_shift($parts);
+ }
+
+ // seconds
+ $time = $time + (int)$parts[0];
+
+ return $time;
+ }
+
+
+ private function unshiturl($url){
+
+ // check for domains w/out first short subdomain (ex: www.)
+
+ $domain = parse_url($url, PHP_URL_HOST);
+
+ $subdomain = preg_replace(
+ '/^[A-z0-9]{1,3}\./',
+ "",
+ $domain
+ );
+
+ switch($subdomain){
+ case "ebay.com.au":
+ case "ebay.at":
+ case "ebay.ca":
+ case "ebay.fr":
+ case "ebay.de":
+ case "ebay.com.hk":
+ case "ebay.ie":
+ case "ebay.it":
+ case "ebay.com.my":
+ case "ebay.nl":
+ case "ebay.ph":
+ case "ebay.pl":
+ case "ebay.com.sg":
+ case "ebay.es":
+ case "ebay.ch":
+ case "ebay.co.uk":
+ case "cafr.ebay.ca":
+ case "ebay.com":
+ case "community.ebay.com":
+ case "pages.ebay.com":
+
+ // remove ebay tracking elements
+ $old_params = parse_url($url, PHP_URL_QUERY);
+ parse_str($old_params, $params);
+
+ if(isset($params["mkevt"])){ unset($params["mkevt"]); }
+ if(isset($params["mkcid"])){ unset($params["mkcid"]); }
+ if(isset($params["mkrid"])){ unset($params["mkrid"]); }
+ if(isset($params["campid"])){ unset($params["campid"]); }
+ if(isset($params["customid"])){ unset($params["customid"]); }
+ if(isset($params["toolid"])){ unset($params["toolid"]); }
+ if(isset($params["_sop"])){ unset($params["_sop"]); }
+ if(isset($params["_dcat"])){ unset($params["_dcat"]); }
+ if(isset($params["epid"])){ unset($params["epid"]); }
+ if(isset($params["epid"])){ unset($params["oid"]); }
+
+ $params = http_build_query($params);
+
+ if(strlen($params) === 0){
+ $replace = "\?";
+ }else{
+ $replace = "";
+ }
+
+ $url = preg_replace(
+ "/" . $replace . preg_quote($old_params, "/") . "$/",
+ $params,
+ $url
+ );
+ break;
+ }
+
+ return $url;
+ }
+
+ private function bingimg($url){
+
+ $image = parse_url($url);
+
+ $id = null;
+ if(isset($image["query"])){
+
+ parse_str($image["query"], $str);
+
+ if(isset($str["id"])){
+
+ $id = $str["id"];
+ }
+ }
+
+ if($id === null){
+
+ $id = explode("/th/id/", $image["path"], 2);
+
+ if(count($id) !== 2){
+
+ // malformed
+ return $url;
+ }
+
+ $id = $id[1];
+ }
+
+ return "https://" . $image["host"] . "/th?id=" . rawurlencode($id);
+ }
+
+ private function bingratio($width, $height){
+
+ $ratio = [
+ 474 / $width,
+ 474 / $height
+ ];
+
+ if($ratio[0] < $ratio[1]){
+
+ $ratio = $ratio[0];
+ }else{
+
+ $ratio = $ratio[1];
+ }
+
+ return [
+ floor($width * $ratio),
+ floor($height * $ratio)
+ ];
+ }
+}
diff --git a/scraper/facebook.php b/scraper/facebook.php
new file mode 100644
index 0000000..395a863
--- /dev/null
+++ b/scraper/facebook.php
@@ -0,0 +1,820 @@
+<?php
+
+class facebook{
+
+ const get = 0;
+ const post = 1;
+
+ public function __construct(){
+
+ include "lib/nextpage.php";
+ $this->nextpage = new nextpage("fb");
+
+ include "lib/proxy_pool.php";
+ $this->proxy = new proxy_pool("facebook");
+ }
+
+ public function getfilters($page){
+
+ return [
+ "sort" => [
+ "display" => "Sort by",
+ "option" => [
+ "relevance" => "Relevance",
+ "most_recent" => "Most recent"
+ ]
+ ],
+ "newer" => [
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ],
+ "live" => [
+ "display" => "Livestream",
+ "option" => [
+ "no" => "No",
+ "yes" => "Yes"
+ ]
+ ]
+ ];
+ }
+
+ private function get($url, $get = [], $reqtype = self::get){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+
+ $get = http_build_query($get);
+
+ if($reqtype === self::get){
+
+ $headers = [
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"
+ ];
+
+ $url .= "?" . $get;
+ }else{
+
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ $headers = [
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0",
+ "Accept: */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br",
+ "Content-Type: application/x-www-form-urlencoded",
+ "X-FB-Friendly-Name: SearchCometResultsPaginatedResultsQuery",
+ //"X-FB-LSD: AVptQC4a16c",
+ //"X-ASBD-ID: 129477",
+ "Content-Length: " . strlen($get),
+ "Origin: https://www.facebook.com",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Referer: https://www.facebook.com/watch/",
+ "Cookie: datr=__GMZCgwVF5BbyvAtfJojQwg; oo=v1%7C3%3A1691641171; wd=955x995",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "TE: trailers"
+ ];
+
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
+ }
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->proxy->assign_proxy($curlproc);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function video($get){
+
+ $search = $get["s"];
+ $npt = $get["npt"];
+
+ $this->out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ if($get["npt"]){
+
+ $nextpage =
+ json_decode(
+ $this->nextpage->get(
+ $npt,
+ "videos"
+ ),
+ true
+ );
+
+ // parse next page
+ $this->video_nextpage($nextpage);
+
+ return $this->out;
+ }
+
+ // generate filter data
+ // {
+ // "rp_creation_time:0":"{\"name\":\"creation_time\",\"args\":\"{\\\"start_year\\\":\\\"2023\\\",\\\"start_month\\\":\\\"2023-08\\\",\\\"end_year\\\":\\\"2023\\\",\\\"end_month\\\":\\\"2023-08\\\",\\\"start_day\\\":\\\"2023-08-10\\\",\\\"end_day\\\":\\\"2023-08-10\\\"}\"}",
+ // "videos_sort_by:0":"{\"name\":\"videos_sort_by\",\"args\":\"Most Recent\"}",
+ // "videos_live:0":"{\"name\":\"videos_live\",\"args\":\"\"}"
+ // }
+ $filter = [];
+ $sort = $get["sort"];
+ $live = $get["live"];
+ $older = $get["older"];
+ $newer = $get["newer"];
+
+ if(
+ $older !== false ||
+ $newer !== false
+ ){
+
+ if($older === false){
+
+ $older = time();
+ }
+
+ if($newer === false){
+
+ $newer = 0;
+ }
+
+ $filter["rp_creation_time:0"] =
+ json_encode(
+ [
+ "name" => "creation_time",
+ "args" =>
+ json_encode(
+ [
+ "start_year" => date("Y", $newer),
+ "start_month" => date("Y-m", $newer),
+ "end_year" => date("Y", $older),
+ "end_month" => date("Y-m", $older),
+ "start_day" => date("Y-m-d", $newer),
+ "end_day" => date("Y-m-d", $older)
+ ]
+ )
+ ]
+ );
+ }
+
+ if($sort != "relevance"){
+
+ $filter["videos_sort_by:0"] =
+ json_encode(
+ [
+ "name" => "videos_sort_by",
+ "args" => "Most Recent"
+ ]
+ );
+ }
+
+ if($live != "no"){
+
+ $filter["videos_live:0"] = json_encode(
+ [
+ "name" => "videos_live",
+ "args" => ""
+ ]
+ );
+ }
+
+ $req = [
+ "q" => $search
+ ];
+
+ if(count($filter) !== 0){
+
+ $req["filters"] =
+ base64_encode(
+ json_encode(
+ $filter
+ )
+ );
+ }
+ /*
+ $html =
+ $this->get(
+ "https://www.facebook.com/watch/search/",
+ $req
+ );*/
+
+ $handle = fopen("scraper/facebook.html", "r");
+ $html = fread($handle, filesize("scraper/facebook.html"));
+ fclose($handle);
+
+ preg_match_all(
+ '/({"__bbox":.*,"sequence_number":0}})\]\]/',
+ $html,
+ $json
+ );
+
+ if(!isset($json[1][1])){
+
+ throw new Exception("Could not grep JSON body");
+ }
+
+ $json = json_decode($json[1][1], true);
+
+ foreach(
+ $json
+ ["__bbox"]
+ ["result"]
+ ["data"]
+ ["serpResponse"]
+ ["results"]
+ ["edges"]
+ as $result
+ ){
+
+ $this->parse_edge($result);
+ }
+
+ // get nextpage data
+ if(
+ $json
+ ["__bbox"]
+ ["result"]
+ ["data"]
+ ["serpResponse"]
+ ["results"]
+ ["page_info"]
+ ["has_next_page"]
+ == 1
+ ){
+
+ preg_match(
+ '/handleWithCustomApplyEach\(ScheduledApplyEach,({.*})\);}\);}\);<\/script>/',
+ $html,
+ $nextpagedata
+ );
+
+ // [POST] https://www.facebook.com/api/graphql/
+ // FORM data, not JSON!
+
+ $nextpage = [
+ "av" => "0",
+ "__user" => null,
+ "__a" => null,
+ "__req" => "2",
+ "__hs" => null,
+ "dpr" => "1",
+ "__ccg" => null,
+ "__rev" => null,
+ // another client side token
+ "__s" => $this->randomstring(6) . ":" . $this->randomstring(6) . ":" . $this->randomstring(6),
+ "__hsi" => null,
+ // tracking fingerprint (probably generated using webgl)
+ "__dyn" => "7xeUmwlE7ibwKBWo2vwAxu13w8CewSwMwNw9G2S0im3y4o0B-q1ew65xO2O1Vw8G1Qw5Mx61vw9m1YwBgao6C0Mo5W3S7Udo5q4U2zxe2Gew9O222SUbEaU2eU5O0GpovU19pobodEGdw46wbS1LwTwNwLw8O1pwr86C16w",
+ "__csr" => $this->randomstring(null),
+ "__comet_req" => null,
+ "lsd" => null,
+ "jazoest" => null,
+ "__spin_r" => null,
+ "__spin_b" => null,
+ "__spin_t" => null,
+ "fb_api_caller_class" => "RelayModern",
+ "fb_api_req_friendly_name" => "SearchCometResultsPaginatedResultsQuery",
+ "variables" => [ // this is json
+ "UFI2CommentsProvider_commentsKey" => "SearchCometResultsInitialResultsQuery",
+ "allow_streaming" => false,
+ "args" => [
+ "callsite" => "comet:watch_search",
+ "config" => [
+ "exact_match" => false,
+ "high_confidence_config" => null,
+ "intercept_config" => null,
+ "sts_disambiguation" => null,
+ "watch_config" => null
+ ],
+ "context" => [
+ "bsid" => null,
+ "tsid" => null
+ ],
+ "experience" => [
+ "encoded_server_defined_params" => null,
+ "fbid" => null,
+ "type" => "WATCH_TAB_GLOBAL"
+ ],
+ "filters" => [],
+ "text" => $search
+ ],
+ "count" => 5,
+ "cursor" =>
+ $json
+ ["__bbox"]
+ ["result"]
+ ["data"]
+ ["serpResponse"]
+ ["results"]
+ ["page_info"]
+ ["end_cursor"],
+ "displayCommentsContextEnableComment" => false,
+ "displayCommentsContextIsAdPreview" => false,
+ "displayCommentsContextIsAggregatedShare" => false,
+ "displayCommentsContextIsStorySet" => false,
+ "displayCommentsFeedbackContext" => null,
+ "feedLocation" => "SEARCH",
+ "feedbackSource" => 23,
+ "fetch_filters" => true,
+ "focusCommentID" => null,
+ "locale" => null,
+ "privacySelectorRenderLocation" => "COMET_STREAM",
+ "renderLocation" => "search_results_page",
+ "scale" => 1,
+ "stream_initial_count" => 0,
+ "useDefaultActor" => false,
+ "__relay_internal__pv__IsWorkUserrelayprovider" => false,
+ "__relay_internal__pv__IsMergQAPollsrelayprovider" => false,
+ "__relay_internal__pv__StoriesArmadilloReplyEnabledrelayprovider" => false,
+ "__relay_internal__pv__StoriesRingrelayprovider" => false
+ ],
+ "server_timestamps" => "true",
+ "doc_id" => "6761275837251607" // is actually dynamic
+ ];
+
+ // append filters to nextpage
+ foreach($filter as $key => $value){
+
+ $nextpage["variables"]["args"]["filters"][] =
+ $value;
+ }
+
+ $nextpagedata = json_decode($nextpagedata[1], true);
+
+ // get bsid
+ foreach($nextpagedata["require"] as $key){
+
+ foreach($key as $innerkey){
+
+ if(is_array($innerkey)){
+ foreach($innerkey as $inner_innerkey){
+
+ if(is_array($inner_innerkey)){
+ foreach($inner_innerkey as $inner_inner_innerkey){
+
+ if(
+ isset(
+ $inner_inner_innerkey
+ ["variables"]
+ ["args"]
+ ["context"]
+ ["bsid"]
+ )
+ ){
+
+ $nextpage
+ ["variables"]
+ ["args"]
+ ["context"]
+ ["bsid"] =
+ $inner_inner_innerkey
+ ["variables"]
+ ["args"]
+ ["context"]
+ ["bsid"];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ foreach($nextpagedata["define"] as $key){
+
+ if(isset($key[2]["haste_session"])){
+
+ $nextpage["__hs"] = $key[2]["haste_session"];
+ }
+
+ if(isset($key[2]["connectionClass"])){
+
+ $nextpage["__ccg"] = $key[2]["connectionClass"];
+ }
+
+ if(isset($key[2]["__spin_r"])){
+
+ $nextpage["__spin_r"] = (string)$key[2]["__spin_r"];
+ }
+
+ if(isset($key[2]["hsi"])){
+
+ $nextpage["__hsi"] = (string)$key[2]["hsi"];
+ }
+
+ if(
+ isset($key[2]["token"]) &&
+ !empty($key[2]["token"])
+ ){
+
+ $nextpage["lsd"] = $key[2]["token"];
+ }
+
+ if(isset($key[2]["__spin_r"])){
+
+ $nextpage["__spin_r"] = (string)$key[2]["__spin_r"];
+ $nextpage["__rev"] = $nextpage["__spin_r"];
+ }
+
+ if(isset($key[2]["__spin_b"])){
+
+ $nextpage["__spin_b"] = $key[2]["__spin_b"];
+ }
+
+ if(isset($key[2]["__spin_t"])){
+
+ $nextpage["__spin_t"] = (string)$key[2]["__spin_t"];
+ }
+ }
+
+ preg_match(
+ '/{"u":"\\\\\/ajax\\\\\/qm\\\\\/\?__a=([0-9]+)&__user=([0-9]+)&__comet_req=([0-9]+)&jazoest=([0-9]+)"/',
+ $html,
+ $ajaxparams
+ );
+
+ if(count($ajaxparams) !== 5){
+
+ throw new Exception("Could not grep the AJAX parameters");
+ }
+
+ $nextpage["__a"] = $ajaxparams[1];
+ $nextpage["__user"] = $ajaxparams[2];
+ $nextpage["__comet_req"] = $ajaxparams[3];
+ $nextpage["jazoest"] = $ajaxparams[4];
+
+ /*
+ $handle = fopen("scraper/facebook-nextpage.json", "r");
+ $json = fread($handle, filesize("scraper/facebook-nextpage.json"));
+ fclose($handle);*/
+
+ $nextpage["variables"] = json_encode($nextpage["variables"]);
+
+ $this->video_nextpage($nextpage);
+ }
+
+ return $this->out;
+ }
+
+ private function video_nextpage($nextpage, $getcursor = false){
+
+ $json =
+ $this->get(
+ "https://www.facebook.com/api/graphql/",
+ $nextpage,
+ self::post
+ );
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode next page JSON");
+ }
+
+ foreach(
+ $json
+ ["data"]
+ ["serpResponse"]
+ ["results"]
+ ["edges"]
+ as $result
+ ){
+
+ $this->parse_edge($result);
+ }
+
+ if(
+ $json
+ ["data"]
+ ["serpResponse"]
+ ["results"]
+ ["page_info"]
+ ["has_next_page"] == 1
+ ){
+
+ $nextpage["variables"] = json_decode($nextpage["variables"], true);
+
+ $nextpage["variables"]["cursor"] =
+ $json
+ ["data"]
+ ["serpResponse"]
+ ["results"]
+ ["page_info"]
+ ["end_cursor"];
+
+ $nextpage["variables"] = json_encode($nextpage["variables"]);
+
+ //change this for second call. after, it's static.
+ // TODO: csr also updates to longer string
+ $nextpage["__dyn"] = "7xeUmwlEnwn8K2WnFw9-2i5U4e0yoW3q322aew9G2S0zU20xi3y4o0B-q1ew65xOfxO1Vw8G11xmfz81s8hwGwQw9m1YwBgao6C2O0B85W3S7Udo5qfK0EUjwGzE2swwwJK2W2K0zK5o4q0GpovU19pobodEGdw46wbS1LwTwNwLw8O1pwr86C16w";
+
+ // TODO: change this on third and 6th call
+ //$nextpage["__s"] = $this->randomstring(6) . ":" . explode(":", $nextpage["__s"], 2)[1];
+
+ $this->out["npt"] = $this->nextpage->store(json_encode($nextpage), "videos");
+ }
+ }
+
+ private function parse_edge($edge){
+
+ $append = "video";
+ $edge =
+ $edge
+ ["relay_rendering_strategy"]
+ ["view_model"];
+
+ if(
+ strtolower(
+ $edge
+ ["video_metadata_model"]
+ ["video_broadcast_status"]
+ )
+ == "live"
+ ){
+
+ // handle livestream
+ $duration = "_LIVE";
+ $append = "livestream";
+ $timetext = null;
+ $views =
+ (int)$edge
+ ["video_metadata_model"]
+ ["relative_time_string"];
+
+ $url_prefix = "https://www.facebook.com/watch/live/?v=";
+
+ }elseif(
+ stripos(
+ $edge
+ ["video_metadata_model"]
+ ["video_broadcast_status"],
+ "vod"
+ ) !== false
+ ){
+
+ // handle VOD format
+ $timetext = null;
+ $views =
+ (int)$edge
+ ["video_metadata_model"]
+ ["relative_time_string"];
+
+ $duration =
+ $this->hms2int(
+ $edge
+ ["video_thumbnail_model"]
+ ["video_duration_text"]
+ );
+
+ $url_prefix = "https://www.facebook.com/watch/live/?v=";
+
+ }else{
+
+ // handle normal format
+ $timetext =
+ explode(
+ " · ",
+ $edge
+ ["video_metadata_model"]
+ ["relative_time_string"],
+ 2
+ );
+
+ if(count($timetext) === 2){
+
+ $views = $this->truncatedcount2int($timetext[1]);
+ }else{
+
+ $views = null;
+ }
+
+ $timetext = strtotime($timetext[0]);
+
+ $duration =
+ $this->hms2int(
+ $edge
+ ["video_thumbnail_model"]
+ ["video_duration_text"]
+ );
+
+ $url_prefix = "https://www.facebook.com/watch/?v=";
+ }
+
+ if(
+ isset(
+ $edge
+ ["video_metadata_model"]
+ ["video_owner_profile"]
+ ["uri_token"]
+ )
+ ){
+
+ $profileurl =
+ "https://www.facebook.com/watch/" .
+ $edge
+ ["video_metadata_model"]
+ ["video_owner_profile"]
+ ["uri_token"];
+ }else{
+
+ $profileurl =
+ $edge
+ ["video_metadata_model"]
+ ["video_owner_profile"]
+ ["url"];
+ }
+
+ $this->out[$append][] = [
+ "title" =>
+ $this->limitstrlen(
+ str_replace(
+ "\n",
+ " ",
+ $edge
+ ["video_metadata_model"]
+ ["title"]
+ ),
+ 100
+ ),
+ "description" =>
+ empty(
+ $edge
+ ["video_metadata_model"]
+ ["save_description"]
+ ) ?
+ null :
+ str_replace(
+ "\n",
+ " ",
+ $this->limitstrlen(
+ $edge
+ ["video_metadata_model"]
+ ["save_description"]
+ )
+ ),
+ "author" => [
+ "name" =>
+ $edge
+ ["video_metadata_model"]
+ ["video_owner_profile"]
+ ["name"],
+ "url" => $profileurl,
+ "avatar" => null
+ ],
+ "date" => $timetext,
+ "duration" => $duration,
+ "views" => $views,
+ "thumb" =>
+ [
+ "url" =>
+ $edge
+ ["video_thumbnail_model"]
+ ["thumbnail_image"]
+ ["uri"],
+ "ratio" => "16:9"
+ ],
+ "url" =>
+ $url_prefix .
+ $edge
+ ["video_click_model"]
+ ["click_metadata_model"]
+ ["video_id"]
+ ];
+ }
+
+ private function randomstring($len){
+
+ if($len === null){
+
+ $str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789-";
+ $len = rand(141, 145);
+ $c = 61;
+ }else{
+
+ $str = "abcdefghijklmnopqrstuvwxyz123456789";
+ $c = 34;
+ }
+
+ $out = null;
+ for($i=0; $i<$len; $i++){
+
+ $out .= $str[rand(0, $c)];
+ }
+
+ return $out;
+ }
+
+ private function limitstrlen($text, $len = 300){
+
+ return explode("\n", wordwrap($text, $len, "\n"))[0];
+ }
+
+ private function hms2int($time){
+
+ $parts = explode(":", $time, 3);
+ $time = 0;
+
+ if(count($parts) === 3){
+
+ // hours
+ $time = $time + ((int)$parts[0] * 3600);
+ array_shift($parts);
+ }
+
+ if(count($parts) === 2){
+
+ // minutes
+ $time = $time + ((int)$parts[0] * 60);
+ array_shift($parts);
+ }
+
+ // seconds
+ $time = $time + (int)$parts[0];
+
+ return $time;
+ }
+
+ private function truncatedcount2int($number){
+
+ // decimal should always be 1 number long
+ $number = explode(" ", $number, 2);
+ $number = $number[0];
+
+ $unit = strtolower($number[strlen($number) - 1]);
+
+ $tmp = explode(".", $number, 2);
+ $number = (int)$number;
+
+ if(count($tmp) === 2){
+
+ $decimal = (int)$tmp[1];
+ }else{
+
+ $decimal = 0;
+ }
+
+ switch($unit){
+
+ case "k":
+ $exponant = 1000;
+ break;
+
+ case "m":
+ $exponant = 1000000;
+ break;
+
+ case "b";
+ $exponant = 1000000000;
+ break;
+
+ default:
+ $exponant = 1;
+ break;
+ }
+
+ return ($number * $exponant) + ($decimal * ($exponant / 10));
+ }
+}
diff --git a/scraper/fivehpx.php b/scraper/fivehpx.php
new file mode 100644
index 0000000..8a600df
--- /dev/null
+++ b/scraper/fivehpx.php
@@ -0,0 +1,262 @@
+<?php
+
+class fivehpx{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("fivehpx");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [
+ "sort" => [
+ "display" => "Sort",
+ "option" => [
+ "relevance" => "Relevance",
+ "pulse" => "Pulse",
+ "newest" => "Newest"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = [], $post_data = null){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ if($post_data === null){
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: same-origin",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=0, i",
+ "TE: trailers"]
+ );
+ }else{
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Referer: https://500px.com/",
+ "content-type: application/json",
+ //"x-csrf-token: undefined",
+ "x-500px-source: Search",
+ "Content-Length: " . strlen($post_data),
+ "Origin: https://500px.com",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ // "Cookie: _pin_unauth, _fbp, _sharedID, _sharedID_cst",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-site",
+ "Priority: u=4",
+ "TE: trailers"]
+ );
+
+ // set post data
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt($curlproc, CURLOPT_POSTFIELDS, $post_data);
+ }
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ // http2 bypass
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$pagination, $proxy] =
+ $this->backend->get(
+ $get["npt"], "images"
+ );
+
+ $pagination = json_decode($pagination, true);
+ $search = $pagination["search"];
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $pagination = [
+ "sort" => strtoupper($get["sort"]),
+ "search" => $search,
+ "filters" => [],
+ "nlp" => false,
+ ];
+ }
+
+ try{
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://api.500px.com/graphql",
+ [],
+ json_encode([
+ "operationName" => "PhotoSearchPaginationContainerQuery",
+ "variables" => $pagination,
+ "query" =>
+ 'query PhotoSearchPaginationContainerQuery(' .
+ (isset($pagination["cursor"]) ? '$cursor: String, ' : "") .
+ '$sort: PhotoSort, $search: String!, $filters: [PhotoSearchFilter!], $nlp: Boolean) { ...PhotoSearchPaginationContainer_query_1vzAZD} fragment PhotoSearchPaginationContainer_query_1vzAZD on Query { photoSearch(sort: $sort, first: 100, ' .
+ (isset($pagination["cursor"]) ? 'after: $cursor, ' : "") .
+ 'search: $search, filters: $filters, nlp: $nlp) { edges { node { id legacyId canonicalPath name description width height images(sizes: [33, 36]) { size url id } } } totalCount pageInfo { endCursor hasNextPage } }}'
+ ])
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch graphQL object");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode graphQL object");
+ }
+
+ if(isset($json["errors"][0]["message"])){
+
+ throw new Exception("500px returned an API error: " . $json["errors"][0]["message"]);
+ }
+
+ if(!isset($json["data"]["photoSearch"]["edges"])){
+
+ throw new Exception("No edges returned by API");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ foreach($json["data"]["photoSearch"]["edges"] as $image){
+
+ $image = $image["node"];
+ $title =
+ trim(
+ $this->fuckhtml
+ ->getTextContent(
+ $image["name"]
+ ) . ": " .
+ $this->fuckhtml
+ ->getTextContent(
+ $image["description"]
+ )
+ , " :"
+ );
+
+ $small = $this->image_ratio(600, $image["width"], $image["height"]);
+ $large = $this->image_ratio(2048, $image["width"], $image["height"]);
+
+ $out["image"][] = [
+ "title" => $title,
+ "source" => [
+ [
+ "url" => $image["images"][1]["url"],
+ "width" => $large[0],
+ "height" => $large[1]
+ ],
+ [
+ "url" => $image["images"][0]["url"],
+ "width" => $small[0],
+ "height" => $small[1]
+ ]
+ ],
+ "url" => "https://500px.com" . $image["canonicalPath"]
+ ];
+ }
+
+ // get NPT token
+ if($json["data"]["photoSearch"]["pageInfo"]["hasNextPage"] === true){
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode([
+ "cursor" => $json["data"]["photoSearch"]["pageInfo"]["endCursor"],
+ "search" => $search,
+ "sort" => $pagination["sort"],
+ "filters" => [],
+ "nlp" => false
+ ]),
+ "images",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+ private function image_ratio($longest_edge, $width, $height){
+
+ $ratio = [
+ $longest_edge / $width,
+ $longest_edge / $height
+ ];
+
+ if($ratio[0] < $ratio[1]){
+
+ $ratio = $ratio[0];
+ }else{
+
+ $ratio = $ratio[1];
+ }
+
+ return [
+ floor($width * $ratio),
+ floor($height * $ratio)
+ ];
+ }
+}
diff --git a/scraper/flickr.php b/scraper/flickr.php
new file mode 100644
index 0000000..71656ee
--- /dev/null
+++ b/scraper/flickr.php
@@ -0,0 +1,415 @@
+<?php
+
+class flickr{
+
+ const req_web = 0;
+ const req_xhr = 1;
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("flickr");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No",
+ ]
+ ],
+ "sort" => [
+ "display" => "Sort by",
+ "option" => [
+ "relevance" => "Relevance",
+ "date-posted-desc" => "Newest uploads",
+ "date-posted-asc" => "Oldest uploads",
+ "date-taken-desc" => "Newest taken",
+ "date-taken-asc" => "Oldest taken",
+ "interestingness-desc" => "Interesting"
+ ]
+ ],
+ "color" => [
+ "display" => "Color",
+ "option" => [
+ "any" => "Any color",
+ // color_codes=
+ "0" => "Red",
+ "1" => "Brown",
+ "2" => "Orange",
+ "b" => "Pink",
+ "4" => "Yellow",
+ "3" => "Golden",
+ "5" => "Lime",
+ "6" => "Green",
+ "7" => "Sky blue",
+ "8" => "Blue",
+ "9" => "Purple",
+ "a" => "Hot pink",
+ "c" => "White",
+ "d" => "Gray",
+ "e" => "Black",
+ // styles= override
+ "blackandwhite" => "Black & white",
+ ]
+ ],
+ "style" => [ // styles=
+ "display" => "Style",
+ "option" => [
+ "any" => "Any style",
+ "depthoffield" => "Depth of field",
+ "minimalism" => "Minimalism",
+ "pattern" => "Patterns"
+ ]
+ ],
+ "license" => [
+ "display" => "License",
+ "option" => [
+ "any" => "Any license",
+ "1,2,3,4,5,6,9,11,12,13,14,15,16" => "All creative commons",
+ "4,5,6,9,10,11,12,13" => "Commercial use allowed",
+ "1,2,4,5,9,10,11,12,14,15" => "Modifications allowed",
+ "4,5,9,10,11,12" => "Commercial use & mods allowed",
+ "7,9,10" => "No known copyright restrictions",
+ "8" => "U.S Government works"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = [], $reqtype){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ if($reqtype === flickr::req_web){
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: same-origin",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=0, i",
+ "TE: trailers"]
+ );
+ }else{
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Origin: https://www.flickr.com",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Referer: https://www.flickr.com/",
+ // Cookie:
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-site",
+ "TE: trailers"]
+ );
+ }
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ // http2 bypass
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$filters, $proxy] =
+ $this->backend->get(
+ $get["npt"], "images"
+ );
+
+ $filters = json_decode($filters, true);
+
+ // Workaround for the future, if flickr deprecates &page argument on html page
+ /*
+ try{
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://api.flickr.com/services/rest",
+ [
+ "sort" => $data["sort"],
+ "parse_tags" => 1,
+ // url_s,url_n,url_w,url_m,url_z,url_c,url_l,url_h,url_k,url_3k,url_4k,url_5k,url_6k,url_o
+ "extras" => "can_comment,can_print,count_comments,count_faves,description,isfavorite,license,media,needs_interstitial,owner_name,path_alias,realname,rotation,url_sq,url_q,url_t,url_s,url_n,url_w,url_m,url_z,url_c,url_l",
+ "per_page" => 100,
+ "page" => $data["page"],
+ "lang" => "en-US",
+ "text" => $data["search"],
+ "viewerNSID" => "",
+ "method" => "flickr.photos.search",
+ "csrf" => "",
+ "api_key" => $data["api_key"],
+ "format" => "json",
+ "hermes" => 1,
+ "hermesClient" => 1,
+ "reqId" => $data["reqId"],
+ "nojsoncallback" => 1
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }*/
+
+ }else{
+
+ if(strlen($get["s"]) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ // compute filters
+ $filters = [
+ "page" => 1,
+ "sort" => $get["sort"]
+ ];
+
+ if($get["style"] != "any"){
+
+ $filters["styles"] = $get["style"];
+ }
+
+ if($get["color"] != "any"){
+
+ if($get["color"] != "blackandwhite"){
+
+ $filters["color_codes"] = $get["color"];
+ }else{
+
+ $filters["styles"] = "blackandwhite";
+ }
+ }
+
+ if($get["license"] != "any"){
+
+ $filters["license"] = $get["license"];
+ }
+
+ switch($get["nsfw"]){
+
+ case "yes": $filters["safe_search"] = 0; break;
+ case "maybe": $filters["safe_search"] = 2; break;
+ case "no": $filters["safe_search"] = 1; break;
+ }
+ }
+
+ $get_params = [
+ "text" => $get["s"],
+ "per_page" => 50,
+ // scrape highest resolution
+ "extras" => "url_s,url_n,url_w,url_m,url_z,url_c,url_l,url_h,url_k,url_3k,url_4k,url_5k,url_6k,url_o",
+ "view_all" => 1
+ ];
+
+ $get_params = array_merge($get_params, $filters);
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://www.flickr.com/search/",
+ $get_params,
+ flickr::req_web
+ );
+
+ // @TODO
+ // get api_key and reqId, if flickr deprecates &page
+
+ $this->fuckhtml->load($html);
+
+ //
+ // get response JSON
+ //
+ $scripts =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "modelExport",
+ "script"
+ );
+
+ $found = false;
+ foreach($scripts as $script){
+
+ $json =
+ preg_split(
+ '/modelExport: ?/',
+ $script["innerHTML"],
+ 2
+ );
+
+ if(count($json) !== 0){
+
+ $found = true;
+ $json = $json[1];
+ break;
+ }
+ }
+
+ if($found === false){
+
+ throw new Exception("Failed to grep JSON");
+ }
+
+ $json =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ $json
+ ),
+ true
+ );
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ if(!isset($json["main"]["search-photos-lite-models"][0]["data"]["photos"]["data"]["_data"])){
+
+ throw new Exception("Failed to access data object");
+ }
+
+ foreach($json["main"]["search-photos-lite-models"][0]["data"]["photos"]["data"]["_data"] as $image){
+
+ if(!isset($image["data"])){
+
+ // flickr likes to gives us empty array objects
+ continue;
+ }
+
+ $image = $image["data"];
+
+ $title = [];
+
+ if(isset($image["title"])){
+
+ $title[] =
+ $this->fuckhtml
+ ->getTextContent(
+ $image["title"]
+ );
+ }
+
+ if(isset($image["description"])){
+
+ $title[] =
+ $this->fuckhtml
+ ->getTextContent(
+ str_replace(
+ "\n",
+ " ",
+ $image["description"]
+ )
+ );
+ }
+
+ $title = implode(": ", $title);
+
+ $sources = array_values($image["sizes"]["data"]);
+
+ $suitable_sizes = ["n", "m", "w", "s"];
+
+ $thumb = &$sources[0]["data"];
+ foreach($suitable_sizes as $testing_size){
+
+ if(isset($image["sizes"]["data"][$testing_size])){
+
+ $thumb = &$image["sizes"]["data"][$testing_size]["data"];
+ break;
+ }
+ }
+
+ $og = &$sources[count($sources) - 1]["data"];
+
+ $out["image"][] = [
+ "title" => $title,
+ "source" => [
+ [
+ "url" => "https:" . $og["displayUrl"],
+ "width" => (int)$og["width"],
+ "height" => (int)$og["height"]
+ ],
+ [
+ "url" => "https:" . $thumb["displayUrl"],
+ "width" => (int)$thumb["width"],
+ "height" => (int)$thumb["height"]
+ ]
+ ],
+ "url" => "https://www.flickr.com/photos/" . $image["ownerNsid"] . "/" . $image["id"] . "/"
+ ];
+ }
+
+ $total_items = (int)$json["main"]["search-photos-lite-models"][0]["data"]["photos"]["data"]["totalItems"];
+
+ if(($filters["page"]) * 50 < $total_items){
+
+ $filters["page"]++;
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($filters),
+ "images",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+}
diff --git a/scraper/ftm.php b/scraper/ftm.php
new file mode 100644
index 0000000..470c13e
--- /dev/null
+++ b/scraper/ftm.php
@@ -0,0 +1,161 @@
+<?php
+
+class ftm{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("ftm");
+ }
+
+ public function getfilters($page){
+
+ return [];
+ }
+
+ private function get($proxy, $url, $search, $offset){
+
+ $curlproc = curl_init();
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ $payload =
+ json_encode(
+ [
+ "search" => $search,
+ "offset" => $offset
+ ]
+ );
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Content-Length: " . strlen($payload),
+ "Content-Type: application/json",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Origin: https://findthatmeme.com",
+ "Referer: https://findthatmeme.com/?search=" . urlencode($search),
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1",
+ "X-Auth-Key: undefined",
+ "X-CSRF-Validation-Header: true"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt($curlproc, CURLOPT_POSTFIELDS, $payload);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function image($get){
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ if($get["npt"]){
+
+ [$data, $proxy] = $this->backend->get($get["npt"], "images");
+ $data = json_decode($data, true);
+
+ $count = $data["count"];
+ $search = $data["search"];
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $count = 0;
+ $proxy = $this->backend->get_ip();
+ }
+
+ try{
+ $json =
+ json_decode(
+ $this->get(
+ $proxy,
+ "https://findthatmeme.com/api/v1/search",
+ $search,
+ $count
+ ),
+ true
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ foreach($json as $item){
+
+ $count++;
+
+ if($item["type"] == "VIDEO"){
+
+ $thumb = "thumb/" . $item["thumbnail"];
+ }else{
+
+ $thumb = $item["image_path"];
+ }
+
+ $out["image"][] = [
+ "title" => date("jS \of F Y @ g:ia", strtotime($item["created_at"])),
+ "source" => [
+ [
+ "url" =>
+ "https://s3.thehackerblog.com/findthatmeme/" .
+ $thumb,
+ "width" => null,
+ "height" => null
+ ]
+ ],
+ "url" => $item["source_page_url"]
+ ];
+ }
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode([
+ "count" => $count,
+ "search" => $search
+ ]),
+ "images",
+ $proxy
+ );
+
+ return $out;
+ }
+}
diff --git a/scraper/ghostery.php b/scraper/ghostery.php
new file mode 100644
index 0000000..394756e
--- /dev/null
+++ b/scraper/ghostery.php
@@ -0,0 +1,320 @@
+<?php
+
+class ghostery{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("ghostery");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ if($page != "web"){
+
+ return [];
+ }
+
+ return [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "any" => "All regions",
+ "AR" => "Argentina",
+ "AU" => "Australia",
+ "AT" => "Austria",
+ "BE" => "Belgium",
+ "BR" => "Brazil",
+ "CA" => "Canada",
+ "CL" => "Chile",
+ "DK" => "Denmark",
+ "FI" => "Finland",
+ "FR" => "France",
+ "DE" => "Germany",
+ "HK" => "Hong Kong",
+ "IN" => "India",
+ "ID" => "Indonesia",
+ "IT" => "Italy",
+ "JP" => "Japan",
+ "KR" => "Korea",
+ "MY" => "Malaysia",
+ "MX" => "Mexico",
+ "NL" => "Netherlands",
+ "NZ" => "New Zealand",
+ "NO" => "Norway",
+ "CN" => "People's Republic of China",
+ "PL" => "Poland",
+ "PT" => "Portugal",
+ "PH" => "Republic of the Philippines",
+ "RU" => "Russia",
+ "SA" => "Saudi Arabia",
+ "ZA" => "South Africa",
+ "ES" => "Spain",
+ "SE" => "Sweden",
+ "CH" => "Switzerland",
+ "TW" => "Taiwan",
+ "TR" => "Turkey",
+ "GB" => "United Kingdom",
+ "US" => "United States"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = [], $country){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Referer: https://ghosterysearch.com",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Cookie: ctry=" . ($country == "any" ? "--" : $country) . "; noads=true",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: same-origin",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=0, i"]
+ );
+
+ // http2 bypass
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ [$query, $proxy] = $this->backend->get($get["npt"], "web");
+
+ parse_str($query, $query);
+
+ // country
+ $country = $query["c"];
+ unset($query["c"]);
+
+ $query = http_build_query($query);
+
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://ghosterysearch.com/search?" . $query,
+ [],
+ $country
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ }else{
+
+ $proxy = $this->backend->get_ip();
+
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://ghosterysearch.com/search",
+ [
+ "q" => $get["s"]
+ ],
+ $get["country"]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $this->fuckhtml->load($html);
+
+ $results_wrapper =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "results",
+ "section"
+ );
+
+ if(count($results_wrapper) === 0){
+
+ throw new Exception("Failed to grep result section");
+ }
+
+ $this->fuckhtml->load($results_wrapper[0]);
+
+ // get search results
+ $results =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "result",
+ "li"
+ );
+
+ if(count($results) === 0){
+
+ return $out;
+ }
+
+ foreach($results as $result){
+
+ $this->fuckhtml->load($result);
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "url",
+ "a"
+ );
+
+ if(count($a) === 0){
+
+ continue;
+ }
+
+ $a = $a[0];
+
+ $out["web"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "h2"
+ )[0]
+ )
+ ),
+ "description" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "p"
+ )[0]
+ )
+ ),
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $a
+ ["attributes"]
+ ["href"]
+ ),
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ $this->fuckhtml->load($html);
+
+ // get pagination token
+ $pagination_wrapper =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "pagination",
+ "div"
+ );
+
+ if(count($pagination_wrapper) !== 0){
+
+ // found next page!
+ $this->fuckhtml->load($pagination_wrapper[0]);
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ if(count($a) !== 0){
+
+ $q =
+ parse_url(
+ $this->fuckhtml
+ ->getTextContent(
+ $a[count($a) - 1]
+ ["attributes"]
+ ["href"]
+ ),
+ PHP_URL_QUERY
+ );
+
+ $out["npt"] =
+ $this->backend
+ ->store(
+ $q . "&c=" . $get["country"],
+ "web",
+ $proxy
+ );
+ }
+ }
+
+ return $out;
+ }
+
+ private function titledots($title){
+
+ return trim($title, " .\t\n\r\0\x0B…");
+ }
+}
diff --git a/scraper/google.php b/scraper/google.php
new file mode 100644
index 0000000..0c73ea0
--- /dev/null
+++ b/scraper/google.php
@@ -0,0 +1,2989 @@
+<?php
+
+// @TODO check for consent.google.com page, if need be
+
+class google{
+
+ public function __construct(){
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ include "lib/backend.php";
+ $this->backend = new backend("google");
+ }
+
+ public function getfilters($page){
+
+ $base = [
+ "country" => [ // gl=<country> (image: cr=countryAF)
+ "display" => "Country",
+ "option" => [
+ "any" => "Instance's country",
+ "af" => "Afghanistan",
+ "al" => "Albania",
+ "dz" => "Algeria",
+ "as" => "American Samoa",
+ "ad" => "Andorra",
+ "ao" => "Angola",
+ "ai" => "Anguilla",
+ "aq" => "Antarctica",
+ "ag" => "Antigua and Barbuda",
+ "ar" => "Argentina",
+ "am" => "Armenia",
+ "aw" => "Aruba",
+ "au" => "Australia",
+ "at" => "Austria",
+ "az" => "Azerbaijan",
+ "bs" => "Bahamas",
+ "bh" => "Bahrain",
+ "bd" => "Bangladesh",
+ "bb" => "Barbados",
+ "by" => "Belarus",
+ "be" => "Belgium",
+ "bz" => "Belize",
+ "bj" => "Benin",
+ "bm" => "Bermuda",
+ "bt" => "Bhutan",
+ "bo" => "Bolivia",
+ "ba" => "Bosnia and Herzegovina",
+ "bw" => "Botswana",
+ "bv" => "Bouvet Island",
+ "br" => "Brazil",
+ "io" => "British Indian Ocean Territory",
+ "bn" => "Brunei Darussalam",
+ "bg" => "Bulgaria",
+ "bf" => "Burkina Faso",
+ "bi" => "Burundi",
+ "kh" => "Cambodia",
+ "cm" => "Cameroon",
+ "ca" => "Canada",
+ "cv" => "Cape Verde",
+ "ky" => "Cayman Islands",
+ "cf" => "Central African Republic",
+ "td" => "Chad",
+ "cl" => "Chile",
+ "cn" => "China",
+ "cx" => "Christmas Island",
+ "cc" => "Cocos (Keeling) Islands",
+ "co" => "Colombia",
+ "km" => "Comoros",
+ "cg" => "Congo",
+ "cd" => "Congo, the Democratic Republic",
+ "ck" => "Cook Islands",
+ "cr" => "Costa Rica",
+ "ci" => "Cote D'ivoire",
+ "hr" => "Croatia",
+ "cu" => "Cuba",
+ "cy" => "Cyprus",
+ "cz" => "Czech Republic",
+ "dk" => "Denmark",
+ "dj" => "Djibouti",
+ "dm" => "Dominica",
+ "do" => "Dominican Republic",
+ "ec" => "Ecuador",
+ "eg" => "Egypt",
+ "sv" => "El Salvador",
+ "gq" => "Equatorial Guinea",
+ "er" => "Eritrea",
+ "ee" => "Estonia",
+ "et" => "Ethiopia",
+ "fk" => "Falkland Islands (Malvinas)",
+ "fo" => "Faroe Islands",
+ "fj" => "Fiji",
+ "fi" => "Finland",
+ "fr" => "France",
+ "gf" => "French Guiana",
+ "pf" => "French Polynesia",
+ "tf" => "French Southern Territories",
+ "ga" => "Gabon",
+ "gm" => "Gambia",
+ "ge" => "Georgia",
+ "de" => "Germany",
+ "gh" => "Ghana",
+ "gi" => "Gibraltar",
+ "gr" => "Greece",
+ "gl" => "Greenland",
+ "gd" => "Grenada",
+ "gp" => "Guadeloupe",
+ "gu" => "Guam",
+ "gt" => "Guatemala",
+ "gn" => "Guinea",
+ "gw" => "Guinea-Bissau",
+ "gy" => "Guyana",
+ "ht" => "Haiti",
+ "hm" => "Heard Island and Mcdonald Islands",
+ "va" => "Holy See (Vatican City State)",
+ "hn" => "Honduras",
+ "hk" => "Hong Kong",
+ "hu" => "Hungary",
+ "is" => "Iceland",
+ "in" => "India",
+ "id" => "Indonesia",
+ "ir" => "Iran, Islamic Republic",
+ "iq" => "Iraq",
+ "ie" => "Ireland",
+ "il" => "Israel",
+ "it" => "Italy",
+ "jm" => "Jamaica",
+ "jp" => "Japan",
+ "jo" => "Jordan",
+ "kz" => "Kazakhstan",
+ "ke" => "Kenya",
+ "ki" => "Kiribati",
+ "kp" => "Korea, Democratic People's Republic",
+ "kr" => "Korea, Republic",
+ "kw" => "Kuwait",
+ "kg" => "Kyrgyzstan",
+ "la" => "Lao People's Democratic Republic",
+ "lv" => "Latvia",
+ "lb" => "Lebanon",
+ "ls" => "Lesotho",
+ "lr" => "Liberia",
+ "ly" => "Libyan Arab Jamahiriya",
+ "li" => "Liechtenstein",
+ "lt" => "Lithuania",
+ "lu" => "Luxembourg",
+ "mo" => "Macao",
+ "mk" => "Macedonia, the Former Yugosalv Republic",
+ "mg" => "Madagascar",
+ "mw" => "Malawi",
+ "my" => "Malaysia",
+ "mv" => "Maldives",
+ "ml" => "Mali",
+ "mt" => "Malta",
+ "mh" => "Marshall Islands",
+ "mq" => "Martinique",
+ "mr" => "Mauritania",
+ "mu" => "Mauritius",
+ "yt" => "Mayotte",
+ "mx" => "Mexico",
+ "fm" => "Micronesia, Federated States",
+ "md" => "Moldova, Republic",
+ "mc" => "Monaco",
+ "mn" => "Mongolia",
+ "ms" => "Montserrat",
+ "ma" => "Morocco",
+ "mz" => "Mozambique",
+ "mm" => "Myanmar",
+ "na" => "Namibia",
+ "nr" => "Nauru",
+ "np" => "Nepal",
+ "nl" => "Netherlands",
+ "an" => "Netherlands Antilles",
+ "nc" => "New Caledonia",
+ "nz" => "New Zealand",
+ "ni" => "Nicaragua",
+ "ne" => "Niger",
+ "ng" => "Nigeria",
+ "nu" => "Niue",
+ "nf" => "Norfolk Island",
+ "mp" => "Northern Mariana Islands",
+ "no" => "Norway",
+ "om" => "Oman",
+ "pk" => "Pakistan",
+ "pw" => "Palau",
+ "ps" => "Palestinian Territory, Occupied",
+ "pa" => "Panama",
+ "pg" => "Papua New Guinea",
+ "py" => "Paraguay",
+ "pe" => "Peru",
+ "ph" => "Philippines",
+ "pn" => "Pitcairn",
+ "pl" => "Poland",
+ "pt" => "Portugal",
+ "pr" => "Puerto Rico",
+ "qa" => "Qatar",
+ "re" => "Reunion",
+ "ro" => "Romania",
+ "ru" => "Russian Federation",
+ "rw" => "Rwanda",
+ "sh" => "Saint Helena",
+ "kn" => "Saint Kitts and Nevis",
+ "lc" => "Saint Lucia",
+ "pm" => "Saint Pierre and Miquelon",
+ "vc" => "Saint Vincent and the Grenadines",
+ "ws" => "Samoa",
+ "sm" => "San Marino",
+ "st" => "Sao Tome and Principe",
+ "sa" => "Saudi Arabia",
+ "sn" => "Senegal",
+ "cs" => "Serbia and Montenegro",
+ "sc" => "Seychelles",
+ "sl" => "Sierra Leone",
+ "sg" => "Singapore",
+ "sk" => "Slovakia",
+ "si" => "Slovenia",
+ "sb" => "Solomon Islands",
+ "so" => "Somalia",
+ "za" => "South Africa",
+ "gs" => "South Georgia and the South Sandwich Islands",
+ "es" => "Spain",
+ "lk" => "Sri Lanka",
+ "sd" => "Sudan",
+ "sr" => "Suriname",
+ "sj" => "Svalbard and Jan Mayen",
+ "sz" => "Swaziland",
+ "se" => "Sweden",
+ "ch" => "Switzerland",
+ "sy" => "Syrian Arab Republic",
+ "tw" => "Taiwan, Province of China",
+ "tj" => "Tajikistan",
+ "tz" => "Tanzania, United Republic",
+ "th" => "Thailand",
+ "tl" => "Timor-Leste",
+ "tg" => "Togo",
+ "tk" => "Tokelau",
+ "to" => "Tonga",
+ "tt" => "Trinidad and Tobago",
+ "tn" => "Tunisia",
+ "tr" => "Turkey",
+ "tm" => "Turkmenistan",
+ "tc" => "Turks and Caicos Islands",
+ "tv" => "Tuvalu",
+ "ug" => "Uganda",
+ "ua" => "Ukraine",
+ "ae" => "United Arab Emirates",
+ "uk" => "United Kingdom",
+ "us" => "United States",
+ "um" => "United States Minor Outlying Islands",
+ "uy" => "Uruguay",
+ "uz" => "Uzbekistan",
+ "vu" => "Vanuatu",
+ "ve" => "Venezuela",
+ "vn" => "Viet Nam",
+ "vg" => "Virgin Islands, British",
+ "vi" => "Virgin Islands, U.S.",
+ "wf" => "Wallis and Futuna",
+ "eh" => "Western Sahara",
+ "ye" => "Yemen",
+ "zm" => "Zambia",
+ "zw" => "Zimbabwe"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes", // safe=active
+ "no" => "No" // safe=off
+ ]
+ ]
+ ];
+
+ switch($page){
+
+ case "web":
+ return array_merge(
+ $base,
+ [
+ "lang" => [ // lr=<lang> (prefix lang with "lang_")
+ "display" => "Language",
+ "option" => [
+ "any" => "Any language",
+ "ar" => "Arabic",
+ "bg" => "Bulgarian",
+ "ca" => "Catalan",
+ "cs" => "Czech",
+ "da" => "Danish",
+ "de" => "German",
+ "el" => "Greek",
+ "en" => "English",
+ "es" => "Spanish",
+ "et" => "Estonian",
+ "fi" => "Finnish",
+ "fr" => "French",
+ "hr" => "Croatian",
+ "hu" => "Hungarian",
+ "id" => "Indonesian",
+ "is" => "Icelandic",
+ "it" => "Italian",
+ "iw" => "Hebrew",
+ "ja" => "Japanese",
+ "ko" => "Korean",
+ "lt" => "Lithuanian",
+ "lv" => "Latvian",
+ "nl" => "Dutch",
+ "no" => "Norwegian",
+ "pl" => "Polish",
+ "pt" => "Portuguese",
+ "ro" => "Romanian",
+ "ru" => "Russian",
+ "sk" => "Slovak",
+ "sl" => "Slovenian",
+ "sr" => "Serbian",
+ "sv" => "Swedish",
+ "tr" => "Turkish",
+ "zh-CN" => "Chinese (Simplified)",
+ "zh-TW" => "Chinese (Traditional)"
+ ]
+ ],
+ "newer" => [ // tbs
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ],
+ "spellcheck" => [
+ "display" => "Spellcheck",
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ]
+ ]
+ );
+ break;
+
+ case "images":
+ return array_merge(
+ $base,
+ [
+ "time" => [ // tbs=qdr:<time>
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "d" => "Past 24 hours",
+ "w" => "Past week",
+ "m" => "Past month",
+ "y" => "Past year"
+ ]
+ ],
+ "size" => [ // imgsz
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "l" => "Large",
+ "m" => "Medium",
+ "i" => "Icon",
+ "qsvga" => "Larger than 400x300",
+ "vga" => "Larger than 640x480",
+ "svga" => "Larger than 800x600",
+ "xga" => "Larger than 1024x768",
+ "2mp" => "Larger than 2MP",
+ "4mp" => "Larger than 4MP",
+ "6mp" => "Larger than 6MP",
+ "8mp" => "Larger than 8MP",
+ "10mp" => "Larger than 10MP",
+ "12mp" => "Larger than 12MP",
+ "15mp" => "Larger than 15MP",
+ "20mp" => "Larger than 20MP",
+ "40mp" => "Larger than 40MP",
+ "70mp" => "Larger than 70MP"
+ ]
+ ],
+ "ratio" => [ // imgar
+ "display" => "Aspect ratio",
+ "option" => [
+ "any" => "Any ratio",
+ "t|xt" => "Tall",
+ "s" => "Square",
+ "w" => "Wide",
+ "xw" => "Panoramic"
+ ]
+ ],
+ "color" => [ // imgc
+ "display" => "Color",
+ "option" => [
+ "any" => "Any color",
+ "color" => "Full color",
+ "bnw" => "Black & white",
+ "trans" => "Transparent",
+ // from here, imgcolor
+ "red" => "Red",
+ "orange" => "Orange",
+ "yellow" => "Yellow",
+ "green" => "Green",
+ "teal" => "Teal",
+ "blue" => "Blue",
+ "purple" => "Purple",
+ "pink" => "Pink",
+ "white" => "White",
+ "gray" => "Gray",
+ "black" => "Black",
+ "brown" => "Brown"
+ ]
+ ],
+ "type" => [ // tbs=itp:<type>
+ "display" => "Type",
+ "option" => [
+ "any" => "Any type",
+ "clipart" => "Clip Art",
+ "lineart" => "Line Drawing",
+ "animated" => "Animated"
+ ]
+ ],
+ "format" => [ // as_filetype
+ "display" => "Format",
+ "option" => [
+ "any" => "Any format",
+ "jpg" => "JPG",
+ "gif" => "GIF",
+ "png" => "PNG",
+ "bmp" => "BMP",
+ "svg" => "SVG",
+ "webp" => "WEBP",
+ "ico" => "ICO",
+ "craw" => "RAW"
+ ]
+ ],
+ "rights" => [ // tbs=sur:<rights>
+ "display" => "Usage rights",
+ "option" => [
+ "any" => "Any license",
+ "cl" => "Creative Commons licenses",
+ "ol" => "Commercial & other licenses"
+ ]
+ ]
+ ]
+ );
+ break;
+
+ case "videos":
+ return array_merge(
+ $base,
+ [
+ "newer" => [ // tbs
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ],
+ "duration" => [
+ "display" => "Duration",
+ "option" => [
+ "any" => "Any duration",
+ "s" => "Short (0-4min)", // tbs=dur:s
+ "m" => "Medium (4-20min)", // tbs=dur:m
+ "l" => "Long (20+ min)" // tbs=dur:l
+ ]
+ ],
+ "quality" => [
+ "display" => "Quality",
+ "option" => [
+ "any" => "Any quality",
+ "h" => "High quality" // tbs=hq:h
+ ]
+ ],
+ "captions" => [
+ "display" => "Captions",
+ "option" => [
+ "any" => "No preference",
+ "yes" => "Closed captioned" // tbs=cc:1
+ ]
+ ]
+ ]
+ );
+ break;
+
+ case "news":
+ return array_merge(
+ $base,
+ [
+ "newer" => [ // tbs
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ],
+ "sort" => [
+ "display" => "Sort",
+ "option" => [
+ "relevance" => "Relevance",
+ "date" => "Date" // sbd:1
+ ]
+ ]
+ ]
+ );
+ break;
+ }
+ }
+
+ private function get($proxy, $url, $get = [], $use_lynx = false){
+
+ $curlproc = curl_init();
+
+ if($use_lynx === false){
+
+ $headers = [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ //"Cookie: SOCS=CAESNQgCEitib3FfaWRlbnRpdHlmcm9udGVuZHVpc2VydmVyXzIwMjQwMzE3LjA4X3AwGgJlbiAEGgYIgM7orwY",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=1",
+ "TE: trailers"
+ ];
+
+ // use http2
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+ }else{
+
+ $headers = [
+ "Accept: text/html, text/plain, text/sgml, */*;q=0.01",
+ "Accept-Encoding: gzip, compress, bzip2",
+ "Accept-Language: en",
+ "User-Agent: Lynx/2.9.0dev.12 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/3.7.8"
+ ];
+ }
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ // follow redirects
+ curl_setopt($curlproc, CURLOPT_FOLLOWLOCATION, true);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+
+ if($use_lynx){
+
+ return mb_convert_encoding($data, "UTF-8", "ISO-8859-1");
+ }
+
+ return $data;
+ }
+
+
+ private function scrape_dimg($html){
+
+ // get images loaded through javascript
+ $this->dimg = [];
+
+ preg_match_all(
+ '/function\(\){google\.ldi=({.*?});/',
+ $html,
+ $dimg
+ );
+
+ if(isset($dimg[1])){
+
+ foreach($dimg[1] as $i){
+
+ $tmp = json_decode($i, true);
+ foreach($tmp as $key => $value){
+
+ $this->dimg[$key] =
+ $this->unshit_thumb(
+ $value
+ );
+ }
+ }
+ }
+
+ // get additional javascript base64 images
+ preg_match_all(
+ '/var s=\'(data:image\/[^\']+)\';var ii=\[((?:\'[^\']+\',?)+)\];/',
+ $html,
+ $dimg
+ );
+
+ if(isset($dimg[1])){
+
+ for($i=0; $i<count($dimg[1]); $i++){
+
+ $delims = explode(",", $dimg[2][$i]);
+ $string =
+ $this->fuckhtml
+ ->parseJsString(
+ $dimg[1][$i]
+ );
+
+ foreach($delims as $delim){
+
+ $this->dimg[trim($delim, "'")] = $string;
+ }
+ }
+ }
+ }
+
+
+ private function scrape_imagearr($html){
+ // get image links arrays
+ preg_match_all(
+ '/\[[0-9]+,"([^"]+)",\["([^"]+)\",([0-9]+),([0-9]+)\],\["([^"]+)",([0-9]+),([0-9]+)\]/',
+ $html,
+ $image_arr
+ );
+
+ $this->image_arr = [];
+ if(isset($image_arr[1])){
+
+ for($i=0; $i<count($image_arr[1]); $i++){
+
+ $original =
+ $this->fuckhtml
+ ->parseJsString(
+ $image_arr[5][$i]
+ );
+
+ if(
+ preg_match(
+ '/^x-raw-image/',
+ $original
+ )
+ ){
+
+ // only add thumbnail, google doesnt have OG resolution
+ $this->image_arr[$image_arr[1][$i]] = [
+ [
+ "url" =>
+ $this->unshit_thumb(
+ $this->fuckhtml
+ ->parseJsString(
+ $image_arr[2][$i]
+ )
+ ),
+ "width" => (int)$image_arr[7][$i], // pass the OG image width & height
+ "height" => (int)$image_arr[6][$i]
+ ]
+ ];
+
+ continue;
+ }
+
+ $this->image_arr[$image_arr[1][$i]] =
+ [
+ [
+ "url" => $original,
+ "width" => (int)$image_arr[7][$i],
+ "height" => (int)$image_arr[6][$i]
+ ],
+ [
+ "url" =>
+ $this->unshit_thumb(
+ $this->fuckhtml
+ ->parseJsString(
+ $image_arr[2][$i]
+ )
+ ),
+ "width" => (int)$image_arr[4][$i],
+ "height" => (int)$image_arr[3][$i]
+ ]
+ ];
+ }
+ }
+ }
+
+
+ private function getdimg($dimg){
+
+ return isset($this->dimg[$dimg]) ? $this->dimg[$dimg] : null;
+ }
+
+
+ private function unshit_thumb($url){
+ // https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQINE2vbnNLHXqoZr3RVsaEJFyOsj1_BiBnJch-e1nyz3oia7Aj5xVj
+ // https://i.ytimg.com/vi/PZVIyA5ER3Y/mqdefault.jpg?sqp=-oaymwEFCJQBEFM&rs=AMzJL3nXeaCpdIar-ltNwl82Y82cIJfphA
+
+ $parts = parse_url($url);
+
+ if(
+ isset($parts["host"]) &&
+ preg_match(
+ '/tbn.*\.gstatic\.com/',
+ $parts["host"]
+ )
+ ){
+
+ parse_str($parts["query"], $params);
+
+ if(isset($params["q"])){
+
+ return "https://" . $parts["host"] . "/images?q=" . $params["q"];
+ }
+ }
+
+ return $url;
+ }
+
+
+ private function parsestyles(){
+
+ $styles = [];
+
+ $style_div =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "style"
+ );
+
+ $raw_styles = "";
+
+ foreach($style_div as $style){
+
+ $raw_styles .= $style["innerHTML"];
+ }
+
+ // filter out media/keyframe queries
+ $raw_styles =
+ preg_replace(
+ '/@\s*(?!font-face)[^{]+\s*{[\S\s]+?}\s*}/',
+ "",
+ $raw_styles
+ );
+
+ // get styles
+ preg_match_all(
+ '/(.+?){([\S\s]*?)}/',
+ $raw_styles,
+ $matches
+ );
+
+ for($i=0; $i<count($matches[1]); $i++){
+
+ // get style values
+ preg_match_all(
+ '/([^:;]+):([^;]*?(?:\([^)]+\)[^;]*?)?)(?:;|$)/',
+ $matches[2][$i],
+ $values_regex
+ );
+
+ $values = [];
+ for($k=0; $k<count($values_regex[1]); $k++){
+
+ $values[trim($values_regex[1][$k])] =
+ strtolower(trim($values_regex[2][$k]));
+ }
+
+ $names = explode(",", $matches[1][$i]);
+
+ // h1,h2,h3 will each get their own array index
+ foreach($names as $name){
+
+ $name = trim($name, "}\t\n\r\0\x0B");
+
+ foreach($values as $key => $value){
+
+ $styles[$name][$key] = $value;
+ }
+ }
+ }
+
+ foreach($styles as $key => $values){
+
+ $styles[$key]["_c"] = count($values);
+ }
+
+ $this->styles = $styles;
+
+ // get CSS colors
+ $this->css_colors = [];
+
+ if(isset($this->styles[":root"])){
+
+ foreach($this->styles[":root"] as $key => $value){
+
+ $this->css_colors[$value] = strtolower($key);
+ }
+ }
+ }
+
+
+
+ private function getstyle($styles){
+
+ $styles["_c"] = count($styles);
+
+ foreach($this->styles as $style_key => $style_values){
+
+ if(count(array_intersect_assoc($style_values, $styles)) === $styles["_c"] + 1){
+
+ $style_key =
+ explode(" ", $style_key);
+
+ $style_key = $style_key[count($style_key) - 1];
+
+ return
+ ltrim(
+ str_replace(
+ [".", "#"],
+ " ",
+ $style_key
+ )
+ );
+ }
+ }
+
+ return false;
+ }
+
+
+
+ private function getcolorvar($color){
+
+ if(isset($this->css_colors[$color])){
+
+ return $this->css_colors[$color];
+ }
+
+ return null;
+ }
+
+
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ [$get, $proxy] = $this->backend->get($get["npt"], "web");
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://www.google.com" . $get,
+ [],
+ true
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+
+ }else{
+ $search = $get["s"];
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $lang = $get["lang"];
+ $older = $get["older"];
+ $newer = $get["newer"];
+ $spellcheck = $get["spellcheck"];
+ $proxy = $this->backend->get_ip();
+
+ $offset = 0;
+
+ $params = [
+ "q" => $search,
+ "hl" => "en",
+ "num" => 20
+ ];
+
+ // country
+ if($country != "any"){
+
+ $params["gl"] = $country;
+ }
+
+ // nsfw
+ $params["safe"] = $nsfw == "yes" ? "off" : "active";
+
+ // language
+ if($lang != "any"){
+
+ $params["lr"] = "lang_" . $lang;
+ }
+
+ // generate tbs
+ $tbs = [];
+
+ // get date
+ $older = $older === false ? null : date("m/d/Y", $older);
+ $newer = $newer === false ? null : date("m/d/Y", $newer);
+
+ if(
+ $older !== null ||
+ $newer !== null
+ ){
+
+ $tbs["cdr"] = "1";
+ $tbs["cd_min"] = $newer;
+ $tbs["cd_max"] = $older;
+ }
+
+ // spellcheck filter
+ if($spellcheck == "no"){
+
+ $params["nfpr"] = "1";
+ }
+
+ if(count($tbs) !== 0){
+
+ $params["tbs"] = "";
+
+ foreach($tbs as $key => $value){
+
+ $params["tbs"] .= $key . ":" . $value . ",";
+ }
+
+ $params["tbs"] = rtrim($params["tbs"], ",");
+ }
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://www.google.com/search",
+ $params,
+ true
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+
+ //$html = file_get_contents("scraper/google.html");
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $this->fuckhtml->load($html);
+ $this->detect_sorry();
+
+ $this->parsestyles();
+
+ $boxes =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "border" => "thin solid #dadce0",
+ "padding" => "12px 16px 12px 16px",
+ "margin-bottom" => "10px",
+ "font-family" => "sans-serif"
+ ]),
+ "div"
+ );
+
+ $skip_next = false;
+
+ // get next page token
+ $npt =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "border" => "thin solid #dadce0",
+ "color" => "#70757a",
+ "font-size" => "14px",
+ "text-align" => "center",
+ "table-layout" => "fixed",
+ "width" => "100%"
+ ]),
+ "table"
+ );
+
+ if(count($npt) !== 0){
+
+ $this->fuckhtml->load($npt[0]);
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ foreach($as as $a){
+
+ $text =
+ $this->fuckhtml
+ ->getTextContent(
+ $a
+ );
+
+ if(
+ $text == "Next&nbsp;>" ||
+ $text == ">"
+ ){
+
+ $out["npt"] =
+ $this->backend->store(
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ ),
+ "web",
+ $proxy
+ );
+ }
+ }
+
+ $this->fuckhtml->load($html);
+ }
+
+ $first_box = true;
+ foreach($boxes as $box){
+
+ $this->fuckhtml->load($box);
+
+ if($first_box){
+
+ //
+ // Probe for word correction
+ //
+ $first_box = false;
+
+ $txt =
+ $this->fuckhtml
+ ->getTextContent($box);
+
+ if(
+ preg_match(
+ '/^Showing results for /',
+ $txt
+ )
+ ){
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ if(count($as) === 2){
+
+ $out["spelling"] = [
+ "type" => "including",
+ "using" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $as[0]
+ ),
+ "correction" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $as[1]
+ )
+ ];
+ }
+ continue;
+ }
+ }
+
+ // probe for custom container
+ $container_title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "font-weight" => "bold"
+ ])
+ );
+
+ if(count($container_title) !== 0){
+
+ $container_title =
+ strtolower(
+ $this->fuckhtml
+ ->getTextContent(
+ $container_title[0]
+ )
+ );
+
+ if($container_title == "images"){
+
+ //
+ // Parse image carousel
+ //
+ $images =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "display" => "inline-block",
+ "padding" => "2px",
+ "padding-bottom" => "4px"
+ ]),
+ "a"
+ );
+
+ foreach($images as $image){
+
+ $this->fuckhtml->load($image);
+
+ $image_data =
+ $this->unshiturl(
+ $image["attributes"]["href"],
+ true
+ );
+
+ $img =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "img"
+ )[0];
+
+ $out["image"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $img["attributes"]["alt"]
+ )
+ ),
+ "source" => [
+ [
+ "url" => $image_data["url"],
+ "width" => $image_data["image_width"],
+ "height" => $image_data["image_height"]
+ ],
+ [
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $img["attributes"]["src"]
+ ),
+ "width" => $image_data["thumb_width"],
+ "height" => $image_data["thumb_height"]
+ ]
+ ],
+ "url" => $image_data["ref"]
+ ];
+ }
+
+ continue;
+ }
+
+ if(
+ $container_title == "related searches" ||
+ $container_title == "people also search for"
+ ){
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "color" => "#202124",
+ "font-size" => "13px",
+ "line-height" => "20px"
+ ]),
+ "span"
+ );
+
+ foreach($as as $a){
+
+ $out["related"][] =
+ $this->fuckhtml
+ ->getTextContent(
+ $a
+ );
+ }
+ continue;
+ }
+ }
+
+ // probe for website link
+ $link =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "color" => "#1967d2",
+ "font-size" => "18px",
+ "line-height" => "24px"
+ ]),
+ "a"
+ );
+
+ if(count($link) !== 0){
+
+ //
+ // Parse search result
+ //
+
+ $this->fuckhtml->load($link[0]);
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "color" => "#1967d2",
+ "font-size" => "18px",
+ "line-height" => "24px"
+ ]),
+ "span"
+ );
+
+ if(count($title) === 0){
+
+ continue;
+ }
+
+ $this->fuckhtml->load($box);
+
+ $sublinks = [];
+ $table = [];
+
+ $categories =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "color" => "#202124",
+ "font-size" => "13px",
+ "line-height" => "20px"
+ ]),
+ "span"
+ );
+
+ $i = 0;
+ foreach($categories as $category){
+
+ $this->fuckhtml->load($category);
+
+ // probe for sublinks
+ $subs =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "color" => "#1967d2"
+ ]),
+ "a"
+ );
+
+ if(count($subs) !== 0){
+
+ foreach($subs as $sub){
+
+ $url =
+ $this->unshiturl(
+ $this->fuckhtml
+ ->getTextContent(
+ $sub["attributes"]["href"]
+ )
+ );
+
+ if(
+ preg_match(
+ '/^https?:\/\//',
+ $url
+ )
+ ){
+
+ $sublinks[] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $sub
+ )
+ ),
+ "description" => null,
+ "url" =>
+ $this->unshiturl(
+ $this->fuckhtml
+ ->getTextContent(
+ $sub["attributes"]["href"]
+ )
+ ),
+ "date" => null
+ ];
+ }
+ }
+
+ unset($categories[$i]);
+ }
+
+ $i++;
+ }
+
+ // get description & date
+ $date = null;
+
+ $categories = array_values($categories);
+
+ //print_r($categories);
+
+ $c = count($categories) - 1;
+
+ $description =
+ $this->fuckhtml
+ ->getTextContent(
+ $categories[$c]
+ );
+
+ // remove last category since we're done with it
+ unset($categories[$c]);
+
+ // probe for date
+ $description_tmp = explode("·", $description, 2);
+ $date_tmp = strtotime(trim($description_tmp[0]));
+
+ if(
+ count($description_tmp) === 2 &&
+ strlen($description_tmp[0]) <= 20 &&
+ $date_tmp !== false
+ ){
+
+ $description =
+ ltrim(
+ $this->titledots(
+ $description_tmp[1]
+ )
+ );
+ $date = $date_tmp;
+ }else{
+
+ $description =
+ $this->titledots(
+ $description
+ );
+ }
+
+ // remaining categories should all be greytext
+ if(count($categories) !== 0){
+
+ $texts =
+ explode(
+ "·",
+ preg_replace(
+ '/\s+/',
+ " ",
+ $this->fuckhtml
+ ->getTextContent(
+ $categories[0]
+ )
+ )
+ );
+
+ foreach($texts as $text){
+
+ $text = trim($text);
+
+ if(
+ preg_match(
+ '/^Rating ([0-9.]+)(?: \(([0-9,]+)\))?/',
+ $text,
+ $rating
+ )
+ ){
+
+ $table["Rating"] = $rating[1];
+ if(isset($rating[2])){
+
+ $table["Rating"] .= " (" . $rating[2] . " votes)";
+ }
+
+ continue;
+ }
+
+ if(stripos($text, "stock") !== false){
+
+ $table["Stock"] = $text;
+ continue;
+ }
+ }
+ }
+
+ $out["web"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ )
+ ),
+ "description" => $description,
+ "url" =>
+ $this->unshiturl(
+ $link[0]["attributes"]["href"]
+ ),
+ "date" => $date,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => $sublinks,
+ "table" => $table
+ ];
+
+ continue;
+ }
+
+ // parse wikipedia heads
+ $wiki_title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "color" => "#202124",
+ "font-size" => "18px",
+ "line-height" => "24px"
+ ]),
+ "span"
+ );
+
+ if(count($wiki_title) !== 0){
+
+ $wiki_title =
+ $this->fuckhtml
+ ->getTextContent(
+ $wiki_title[0]
+ );
+
+ if($wiki_title == "See results about"){
+
+ // ignore
+ continue;
+ }
+
+ if($wiki_title == "Top stories"){
+
+ //
+ // Parse news
+ //
+ $tds =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "td"
+ );
+
+ foreach($tds as $td){
+
+ $this->fuckhtml->load($td);
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ if(count($a) === 0){
+
+ continue;
+ }
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "color" => "#1967d2"
+ ]),
+ "span"
+ );
+
+ if(count($title) === 0){
+
+ continue;
+ }
+
+ $date = null;
+
+ $meta_div =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "color" => "#70757a",
+ "font-size" => "13px",
+ "line-height" => "20px"
+ ]),
+ "span"
+ );
+
+ $meta_div =
+ explode(
+ "·",
+ $this->fuckhtml
+ ->getTextContent(
+ $meta_div[count($meta_div) - 1]
+ ),
+ 2
+ );
+
+ if(count($meta_div) === 2){
+
+ $date = strtotime($meta_div[count($meta_div) - 1]);
+
+ if($date === false){
+
+ $date = null;
+ }
+ }
+
+ $out["news"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ )
+ ),
+ "description" => null,
+ "date" => $date,
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "url" =>
+ $this->unshiturl(
+ $a[0]["attributes"]["href"]
+ )
+ ];
+ }
+ continue;
+ }
+
+ //
+ // Parse wikipedia heads
+ //
+
+ $table_div =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "table"
+ );
+
+ if(count($table_div) === 0){
+
+ continue;
+ }
+
+ $this->fuckhtml->load($table_div[0]);
+
+ // remove table from box
+ $box["innerHTML"] =
+ str_replace(
+ $table_div[0]["outerHTML"],
+ "",
+ $box["innerHTML"]
+ );
+
+ // find wiki image
+ $thumb = null;
+
+ $img =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "img"
+ );
+
+ if(count($img) !== 0){
+
+ $thumb =
+ $this->fuckhtml
+ ->getTextContent(
+ $img[0]["attributes"]["src"]
+ );
+ }
+
+ $tds =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "td"
+ );
+
+ $description = [];
+
+ foreach($tds as $td){
+
+ // probe for subtitle
+ $this->fuckhtml->load($td);
+
+ $subtext =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "color" => "#70757a",
+ "font-size" => "13px",
+ "line-height" => "20px"
+ ])
+ );
+
+ if(count($subtext) !== 0){
+
+ $description[] = [
+ "type" => "quote",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $subtext[0]
+ )
+ ];
+ break;
+ }
+ }
+
+ $this->fuckhtml->load($box);
+
+ // probe for word definition
+ $lists =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "ol"
+ );
+
+ if(count($lists) !== 0){
+
+ $description = [];
+
+ foreach($lists as $list){
+
+ $box["innerHTML"] =
+ explode(
+ $list["outerHTML"],
+ $box["innerHTML"],
+ 2
+ );
+
+ if(
+ count($box["innerHTML"]) === 1 ||
+ trim($box["innerHTML"][0]) == ""
+ ){
+
+ break;
+ }
+
+ $description[] = [
+ "type" => "title",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $box["innerHTML"][0]
+ )
+ ];
+
+ $this->fuckhtml->load($list);
+
+ $lis =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "li"
+ );
+
+ $increment = 1;
+
+ foreach($lis as $li){
+
+ $this->fuckhtml->load($li);
+
+ $list_items =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "color" => "#202124",
+ "font-size" => "13px",
+ "line-height" => "20px"
+ ])
+ );
+
+ $first_item = true;
+ foreach($list_items as $it){
+
+ if($first_item){
+
+ $first_item = false;
+ $c = count($description);
+
+ if(
+ $c !== 0 &&
+ $description[$c - 1]["type"] == "text"
+ ){
+
+ $description[$c - 1]["value"] .=
+ "\n\n" .
+ $increment . ". " . $this->fuckhtml
+ ->getTextContent(
+ $it
+ );
+ }else{
+
+ $description[] = [
+ "type" => "text",
+ "value" =>
+ $increment . ". " . $this->fuckhtml
+ ->getTextContent(
+ $it
+ )
+ ];
+ }
+ }else{
+
+ $description[] = [
+ "type" => "quote",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $it
+ )
+ ];
+ }
+
+ $increment++;
+ }
+ }
+
+ $box["innerHTML"] = $box["innerHTML"][1];
+ }
+
+ $out["answer"][] = [
+ "title" => $wiki_title,
+ "description" => $description,
+ "url" => null,
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+
+ continue;
+ }
+
+ // get separator between description and facts
+ $separator =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "height" => "4px"
+ ]),
+ "div"
+ );
+
+ $box_html = [];
+ $table = [];
+
+ if(count($separator) !== 0){
+
+ $box_html =
+ explode(
+ $separator[0]["outerHTML"],
+ $box["innerHTML"],
+ 2
+ );
+
+ if(count($box_html) === 2){
+
+ $box["innerHTML"] = $box_html[0];
+ }
+
+ $this->fuckhtml->load($box_html[1]);
+
+ // get all facts
+ $facts =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "div"
+ );
+
+ foreach($facts as $fact){
+
+ if($fact["level"] !== 1){ continue; }
+
+ $fact =
+ explode(
+ ":",
+ $this->fuckhtml
+ ->getTextContent(
+ $fact
+ )
+ );
+
+ $table[trim(preg_replace('/\s+/', " ", $fact[0]))] =
+ trim(preg_replace('/\s+/', " ", $fact[1]));
+ }
+
+ $this->fuckhtml->load($box);
+ }
+
+ // remove wikipedia link
+ $wiki_link =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "color" => "#1967d2"
+ ]),
+ "a"
+ );
+
+ $url = null;
+ if(count($wiki_link) !== 0){
+
+ foreach($wiki_link as $link){
+
+ if(
+ strtolower(
+ $this->fuckhtml
+ ->getTextContent(
+ $link
+ )
+ ) == "wikipedia"
+ ){
+
+ $box["innerHTML"] =
+ str_replace(
+ $link["outerHTML"],
+ "",
+ $box["innerHTML"]
+ );
+
+ $url =
+ $this->unshiturl(
+ $link["attributes"]["href"]
+ );
+
+ $this->fuckhtml->load($box);
+ break;
+ }
+ }
+ }
+
+ // remains of box should be description
+ $description[] = [
+ "type" => "text",
+ "value" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $box
+ )
+ )
+ ];
+
+ $out["answer"][] = [
+ "title" => $wiki_title,
+ "description" => $description,
+ "url" => $url,
+ "thumb" => $thumb,
+ "table" => $table,
+ "sublink" => []
+ ];
+ }
+ }
+
+ return $out;
+ }
+
+
+
+ public function video($get){
+
+ if($get["npt"]){
+
+ [$params, $proxy] = $this->backend->get($get["npt"], "video");
+ $params = json_decode($params, true);
+
+ }else{
+ $search = $get["s"];
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $older = $get["older"];
+ $newer = $get["newer"];
+ $duration = $get["duration"];
+ $quality = $get["quality"];
+ $captions = $get["captions"];
+ $proxy = $this->backend->get_ip();
+
+ $params = [
+ "q" => $search,
+ "udm" => "7",
+ "hl" => "en",
+ "num" => 20
+ ];
+
+ // country
+ if($country != "any"){
+
+ $params["gl"] = $country;
+ }
+
+ // nsfw
+ $params["safe"] = $nsfw == "yes" ? "off" : "active";
+
+ $tbs = [];
+
+ // get date
+ $older = $older === false ? null : date("m/d/Y", $older);
+ $newer = $newer === false ? null : date("m/d/Y", $newer);
+
+ if(
+ $older !== null ||
+ $newer !== null
+ ){
+
+ $tbs["cdr"] = "1";
+ $tbs["cd_min"] = $newer;
+ $tbs["cd_max"] = $older;
+ }
+
+ // duration
+ if($duration != "any"){
+
+ $tbs[] = "dur:" . $duration;
+ }
+
+ // quality
+ if($quality != "any"){
+
+ $tbs[] = "hq:" . $quality;
+ }
+
+ // captions
+ if($captions != "any"){
+
+ $tbs[] = "cc:" . $captions;
+ }
+
+ // append tbs
+ if(count($tbs) !== 0){
+
+ $params["tbs"] =
+ implode(",", $tbs);
+ }
+ }
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://www.google.com/search",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+
+ if(!isset($params["start"])){
+
+ $params["start"] = 0;
+ }
+ $params["start"] += 20;
+
+ $this->fuckhtml->load($html);
+
+ //
+ // Parse web video page
+ //
+ $this->detect_sorry();
+
+ // parse all <style> tags
+ $this->parsestyles();
+
+ // get javascript images
+ $this->scrape_dimg($html);
+
+ $this->scrape_imagearr($html);
+
+ $out = [
+ "status" => "ok",
+ "npt" =>
+ $this->backend->store(
+ json_encode($params),
+ "videos",
+ $proxy
+ ),
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ $search_div =
+ $this->fuckhtml
+ ->getElementById(
+ "center_col"
+ );
+
+ if($search_div === false){
+
+ throw new Exception("Failed to grep search div");
+ }
+
+ $this->fuckhtml->load($search_div);
+
+ $results =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "margin" => "0px 0px 30px"
+ ]),
+ "div"
+ );
+
+ foreach($results as $result){
+
+ $this->fuckhtml->load($result);
+
+ $url =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ if(count($url) === 0){
+
+ // no url, weird, continue
+ continue;
+ }
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "h3"
+ );
+
+ if(count($title) === 0){
+
+ // no title, weird, continue
+ continue;
+ }
+
+ // get description
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "-webkit-box-orient" => "vertical",
+ "display" => "-webkit-box",
+ "-webkit-line-clamp" => "2",
+ "overflow" => "hidden",
+ "word-break" => "break-word"
+ ]),
+ "div"
+ );
+
+ if(count($description) === 0){
+
+ $description = null;
+ }else{
+
+ $description =
+ html_entity_decode(
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ )
+ )
+ );
+ }
+
+ // get author + date posted
+ $metadiv =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "margin-top" => "12px"
+ ]),
+ "div"
+ );
+
+ $author = null;
+ $date = null;
+
+ if(count($metadiv) !== 0){
+
+ $metadiv =
+ explode(
+ "·",
+ $this->fuckhtml
+ ->getTextContent(
+ $metadiv[0]
+ )
+ );
+
+ if(count($metadiv) === 3){
+
+ $author = trim($metadiv[1]);
+ $date = strtotime(trim($metadiv[2]));
+ }elseif(count($metadiv) === 2){
+
+ $author = trim($metadiv[0]);
+ $date = strtotime(trim($metadiv[1]));
+ }
+ }
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+
+ $image =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "img"
+ );
+
+ $duration = null;
+
+ if(
+ count($image) !== 0 &&
+ isset($image[0]["attributes"]["id"])
+ ){
+
+ $thumb = [
+ "url" => $this->getdimg($image[0]["attributes"]["id"]),
+ "ratio" => "16:9"
+ ];
+
+ // get duration
+ $duration =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle([
+ "background-color" => "rgba(0,0,0,0.6)",
+ "color" => "#fff",
+ "fill" => "#fff"
+ ])
+ );
+
+ if(count($duration) !== 0){
+
+ $duration =
+ $this->hms2int(
+ $this->fuckhtml
+ ->getTextContent(
+ $duration[0]
+ ));
+ }else{
+
+ $duration = null;
+ }
+ }
+
+ $out["video"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ )
+ ),
+ "description" => $description,
+ "author" => [
+ "name" => $author,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => $date,
+ "duration" => $duration,
+ "views" => null,
+ "thumb" => $thumb,
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $url[0]["attributes"]["href"]
+ )
+ ];
+ }
+
+ return $out;
+ }
+
+
+
+ public function news($get){
+
+ if($get["npt"]){
+
+ [$req, $proxy] = $this->backend->get($get["npt"], "news");
+ /*parse_str(
+ parse_url($req, PHP_URL_QUERY),
+ $search
+ );*/
+
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://www.google.com" . $req,
+ []
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+
+ }else{
+ $search = $get["s"];
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $older = $get["older"];
+ $newer = $get["newer"];
+ $sort = $get["sort"];
+ $proxy = $this->backend->get_ip();
+
+ $params = [
+ "q" => $search,
+ "tbm" => "nws",
+ "hl" => "en",
+ "num" => "20"
+ ];
+
+ // country
+ if($country != "any"){
+
+ $params["gl"] = $country;
+ }
+
+ // nsfw
+ $params["safe"] = $nsfw == "yes" ? "off" : "active";
+
+ $tbs = [];
+
+ // get date
+ $older = $older === false ? null : date("m/d/Y", $older);
+ $newer = $newer === false ? null : date("m/d/Y", $newer);
+
+ if(
+ $older !== null ||
+ $newer !== null
+ ){
+
+ $tbs["cdr"] = "1";
+ $tbs["cd_min"] = $newer;
+ $tbs["cd_max"] = $older;
+ }
+
+ // relevance
+ if($sort == "date"){
+
+ $tbs["sbd"] = "1";
+ }
+
+ // append tbs
+ if(count($tbs) !== 0){
+
+ $params["tbs"] = "";
+
+ foreach($tbs as $key => $value){
+
+ $params["tbs"] .= $key . ":" . $value . ",";
+ }
+
+ $params["tbs"] = rtrim($params["tbs"], ",");
+ }
+
+ //$html = file_get_contents("scraper/google-news.html");
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://www.google.com/search",
+ $params
+ );
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "news" => []
+ ];
+
+ $this->fuckhtml->load($html);
+
+ $this->detect_sorry();
+
+ // get images
+ $this->scrape_dimg($html);
+
+ // parse styles
+ $this->parsestyles();
+
+ $center_col =
+ $this->fuckhtml
+ ->getElementById(
+ "center_col",
+ "div"
+ );
+
+ if($center_col === null){
+
+ throw new Exception("Could not grep result div");
+ }
+
+ $this->fuckhtml->load($center_col);
+
+ // get next page
+ $npt =
+ $this->fuckhtml
+ ->getElementById(
+ "pnnext",
+ "a"
+ );
+
+ if($npt !== false){
+
+ $out["npt"] =
+ $this->backend->store(
+ $this->fuckhtml
+ ->getTextContent(
+ $npt["attributes"]
+ ["href"]
+ ),
+ "news",
+ $proxy
+ );
+ }
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByAttributeName(
+ "jsname",
+ "a"
+ );
+
+ foreach($as as $a){
+
+ $this->fuckhtml->load($a);
+
+ // get title
+ $title =
+ $this->fuckhtml
+ ->getElementsByAttributeValue(
+ "role",
+ "heading",
+ "div"
+ );
+
+ if(count($title) === 0){
+
+ continue;
+ }
+
+ $title =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ )
+ );
+
+ // get thumbnail
+ $image =
+ $this->fuckhtml
+ ->getElementsByAttributeName(
+ "id",
+ "img"
+ );
+
+ // check for padded title node, if found, we're inside a carousel
+ $probe =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle(
+ [
+ "padding" => "16px 16px 40px 16px"
+ ]
+ ),
+ "div"
+ );
+
+ if(count($probe) !== 0){
+
+ $probe = true;
+ }else{
+
+ $probe = false;
+ }
+
+ if(
+ count($image) !== 0 &&
+ !isset($image[0]["attributes"]["width"])
+ ){
+
+ $thumb = [
+ "url" =>
+ $this->getdimg(
+ $image[0]["attributes"]["id"]
+ ),
+ "ratio" => $probe === true ? "16:9" : "1:1"
+ ];
+ }else{
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ $description = null;
+
+ if($probe === false){
+
+ $desc_divs =
+ $this->fuckhtml
+ ->getElementsByAttributeName(
+ "style",
+ "div"
+ );
+
+ foreach($desc_divs as $desc){
+
+ if(
+ strpos(
+ $desc["attributes"]["style"],
+ "margin-top:"
+ ) !== false
+ ){
+
+ $description =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $desc
+ )
+ );
+ break;
+ }
+ }
+ }
+
+ // get author
+ $author =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle(
+ [
+ "overflow" => "hidden",
+ "text-align" => "left",
+ "text-overflow" => "ellipsis",
+ "white-space" => "nowrap",
+ "margin-bottom" => "8px"
+ ]
+ ),
+ "div"
+ );
+
+ if(count($author) !== 0){
+
+ $author =
+ $this->fuckhtml
+ ->getTextContent(
+ $author[0]
+ );
+ }else{
+
+ $author = null;
+ }
+
+ // get date
+ $date = null;
+
+ $date_div =
+ $this->fuckhtml
+ ->getElementsByAttributeName(
+ "style",
+ "div"
+ );
+
+ foreach($date_div as $d){
+
+ $this->fuckhtml->load($d);
+
+ $span =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "span"
+ );
+
+ if(
+ strpos(
+ $d["attributes"]["style"],
+ "bottom:"
+ ) !== false
+ ){
+
+ $date =
+ strtotime(
+ $this->fuckhtml
+ ->getTextContent(
+ $span[count($span) - 1]
+ )
+ );
+ break;
+ }
+ }
+
+ $out["news"][] = [
+ "title" => $title,
+ "author" => $author,
+ "description" => $description,
+ "date" => $date,
+ "thumb" => $thumb,
+ "url" =>
+ $this->unshiturl(
+ $a["attributes"]
+ ["href"]
+ )
+ ];
+ }
+
+ return $out;
+ }
+
+
+
+
+ public function image($get){
+
+ // generate parameters
+ if($get["npt"]){
+
+ [$params, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "images"
+ );
+
+ $params = json_decode($params, true);
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $time = $get["time"];
+ $size = $get["size"];
+ $ratio = $get["ratio"];
+ $color = $get["color"];
+ $type = $get["type"];
+ $format = $get["format"];
+ $rights = $get["rights"];
+
+ $params = [
+ "q" => $search,
+ "udm" => "2" // get images
+ ];
+
+ // country (image search uses cr instead of gl)
+ if($country != "any"){
+
+ $params["cr"] = "country" . strtoupper($country);
+ }
+
+ // nsfw
+ $params["safe"] = $nsfw == "yes" ? "off" : "active";
+
+ // generate tbs
+ $tbs = [];
+
+ // time
+ if($time != "any"){
+
+ $tbs["qdr"] = $time;
+ }
+
+ // size
+ if($size != "any"){
+
+ $params["imgsz"] = $size;
+ }
+
+ // ratio
+ if($ratio != "any"){
+
+ $params["imgar"] = $ratio;
+ }
+
+ // color
+ if($color != "any"){
+
+ if(
+ $color == "color" ||
+ $color == "trans"
+ ){
+
+ $params["imgc"] = $color;
+ }elseif($color == "bnw"){
+
+ $params["imgc"] = "gray";
+ }else{
+
+ $tbs["ic"] = "specific";
+ $tbs["isc"] = $color;
+ }
+ }
+
+ // type
+ if($type != "any"){
+
+ $tbs["itp"] = $type;
+ }
+
+ // format
+ if($format != "any"){
+
+ $params["as_filetype"] = $format;
+ }
+
+ // rights (tbs)
+ if($rights != "any"){
+
+ $tbs["sur"] = $rights;
+ }
+
+ // append tbs
+ if(count($tbs) !== 0){
+
+ $params["tbs"] = "";
+
+ foreach($tbs as $key => $value){
+
+ $params["tbs"] .= $key . ":" . $value . ",";
+ }
+
+ $params["tbs"] = rtrim($params["tbs"], ",");
+ }
+ }
+ /*
+ $handle = fopen("scraper/page.html", "r");
+ $html = fread($handle, filesize("scraper/page.html"));
+ fclose($handle);*/
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://www.google.com/search",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get search page");
+ }
+
+ $this->fuckhtml->load($html);
+
+ $this->detect_sorry();
+
+ // get javascript images
+ $this->scrape_imagearr($html);
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ $images =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "ivg-i",
+ "div"
+ );
+
+ foreach($images as $div){
+
+ $this->fuckhtml->load($div);
+
+ $image =
+ $this->fuckhtml
+ ->getElementsByTagName("img")[0];
+
+ // make sure we dont attempt to show an image we dont have data for
+ if(
+ isset($div["attributes"]["data-docid"]) &&
+ isset($this->image_arr[$div["attributes"]["data-docid"]])
+ ){
+
+ $source =
+ $this->image_arr[
+ $div["attributes"]["data-docid"]
+ ];
+ }else{
+
+ continue;
+ }
+
+ $out["image"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $image["attributes"]["alt"]
+ )
+ ),
+ "source" => $source,
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $div["attributes"]["data-lpage"]
+ )
+ ];
+ }
+
+ // as usual, no way to check if there is a next page reliably
+ if(count($out["image"]) > 50){
+
+ if(!isset($params["start"])){
+
+ $params["start"] = 10;
+ }else{
+
+ $params["start"] += 10;
+ }
+
+ $out["npt"] =
+ $this->backend
+ ->store(
+ json_encode($params),
+ "image",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+ private function unshiturl($url, $return_size = false){
+
+ // decode
+ $url =
+ $this->fuckhtml
+ ->getTextContent(
+ $url
+ );
+
+ $url_parts = parse_url($url);
+
+ if(isset($url_parts["query"])){
+
+ parse_str($url_parts["query"], $query);
+ }else{
+
+ $query = [];
+ }
+
+ if(
+ !isset(
+ $url_parts["host"]
+ ) ||
+ stripos($url_parts["host"], "google.") !== false
+ ){
+
+ // no host, we have a tracking url
+ if(isset($query["imgurl"])){
+
+ $url = $query["imgurl"];
+ }
+ elseif(isset($query["q"])){
+
+ $url = $query["q"];
+ }
+ }
+
+ // rewrite URLs to remove extra tracking parameters
+ $domain = parse_url($url, PHP_URL_HOST);
+
+ if(
+ preg_match(
+ '/wikipedia.org$/',
+ $domain
+ )
+ ){
+
+ // rewrite wikipedia mobile URLs to desktop
+ $url =
+ $this->replacedomain(
+ $url,
+ preg_replace(
+ '/([a-z0-9]+)(\.m\.)/',
+ '$1.',
+ $domain
+ )
+ );
+ }
+
+ elseif(
+ preg_match(
+ '/imdb\.com$|youtube\.[^.]+$/',
+ $domain
+ )
+ ){
+
+ // rewrite imdb and youtube mobile URLs too
+ $url =
+ $this->replacedomain(
+ $url,
+ preg_replace(
+ '/^m\./',
+ "",
+ $domain
+ )
+ );
+
+ }
+
+ elseif(
+ preg_match(
+ '/play\.google\.[^.]+$/',
+ $domain
+ )
+ ){
+
+ // remove referrers from play.google.com
+ $u_query = parse_url($url, PHP_URL_QUERY);
+ if($u_query !== null){
+
+ parse_str($u_query, $u_query);
+ if(isset($u_query["referrer"])){ unset($u_query["referrer"]); }
+ if(isset($u_query["hl"])){ unset($u_query["hl"]); }
+ if(isset($u_query["gl"])){ unset($u_query["gl"]); }
+
+ $query = http_build_query($query);
+
+ $url =
+ str_replace(
+ $u_query,
+ $u_query,
+ $url
+ );
+ }
+ }
+
+ elseif(
+ preg_match(
+ '/twitter\.com$/',
+ $domain
+ )
+ ){
+ // remove more referrers from twitter.com
+ $u_query = parse_url($url, PHP_URL_QUERY);
+ if($u_query !== null){
+
+ parse_str($u_query, $u_query);
+ if(isset($u_query["ref_src"])){ unset($u_query["ref_src"]); }
+
+ $u_query = http_build_query($u_query);
+
+ $url =
+ str_replace(
+ $oldquery,
+ $u_query,
+ $url
+ );
+ }
+ }
+
+ elseif(
+ preg_match(
+ '/maps\.google\.[^.]+/',
+ $domain
+ )
+ ){
+
+ if(stripos($url, "maps?") !== false){
+
+ $u_query = parse_url($url, PHP_URL_QUERY);
+
+ if($u_query !== null){
+
+ parse_str($u_query, $u_query);
+
+ if(isset($u_query["daddr"])){
+
+ $url =
+ "https://maps.google.com/maps?daddr=" .
+ urlencode($u_query["daddr"]);
+ }
+ }
+ }
+ }
+
+ if($return_size){
+
+ return [
+ "url" => $url,
+ "ref" => isset($query["imgrefurl"]) ? $query["imgrefurl"] : null,
+ "thumb_width" => isset($query["tbnw"]) ? (int)$query["tbnw"] : null,
+ "thumb_height" => isset($query["tbnh"]) ? (int)$query["tbnh"] : null,
+ "image_width" => isset($query["w"]) ? (int)$query["w"] : null,
+ "image_height" => isset($query["h"]) ? (int)$query["h"] : null
+ ];
+ }
+
+ return $url;
+ }
+
+ private function replacedomain($url, $domain){
+
+ return
+ preg_replace(
+ '/(https?:\/\/)([^\/]+)/',
+ '$1' . $domain,
+ $url
+ );
+ }
+
+ private function titledots($title){
+
+ return trim($title, " .\t\n\r\0\x0B…");
+ }
+
+ private function hms2int($time){
+
+ $parts = explode(":", $time, 3);
+ $time = 0;
+
+ if(count($parts) === 3){
+
+ // hours
+ $time = $time + ((int)$parts[0] * 3600);
+ array_shift($parts);
+ }
+
+ if(count($parts) === 2){
+
+ // minutes
+ $time = $time + ((int)$parts[0] * 60);
+ array_shift($parts);
+ }
+
+ // seconds
+ $time = $time + (int)$parts[0];
+
+ return $time;
+ }
+
+ private function detect_sorry(){
+
+ $captcha_form =
+ $this->fuckhtml
+ ->getElementById(
+ "captcha-form",
+ "form"
+ );
+
+ if($captcha_form !== false){
+
+ throw new Exception("Google returned a captcha");
+ }
+ }
+}
diff --git a/scraper/google_cse.php b/scraper/google_cse.php
new file mode 100644
index 0000000..02ab462
--- /dev/null
+++ b/scraper/google_cse.php
@@ -0,0 +1,1054 @@
+<?php
+
+class google_cse{
+
+ public const req_html = 0;
+ public const req_js = 1;
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("google_cse");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ $base = [
+ "country" => [ // gl=<country> (image: cr=countryAF)
+ "display" => "Country",
+ "option" => [
+ "any" => "Any country",
+ "af" => "Afghanistan",
+ "al" => "Albania",
+ "dz" => "Algeria",
+ "as" => "American Samoa",
+ "ad" => "Andorra",
+ "ao" => "Angola",
+ "ai" => "Anguilla",
+ "aq" => "Antarctica",
+ "ag" => "Antigua and Barbuda",
+ "ar" => "Argentina",
+ "am" => "Armenia",
+ "aw" => "Aruba",
+ "au" => "Australia",
+ "at" => "Austria",
+ "az" => "Azerbaijan",
+ "bs" => "Bahamas",
+ "bh" => "Bahrain",
+ "bd" => "Bangladesh",
+ "bb" => "Barbados",
+ "by" => "Belarus",
+ "be" => "Belgium",
+ "bz" => "Belize",
+ "bj" => "Benin",
+ "bm" => "Bermuda",
+ "bt" => "Bhutan",
+ "bo" => "Bolivia",
+ "ba" => "Bosnia and Herzegovina",
+ "bw" => "Botswana",
+ "bv" => "Bouvet Island",
+ "br" => "Brazil",
+ "io" => "British Indian Ocean Territory",
+ "bn" => "Brunei Darussalam",
+ "bg" => "Bulgaria",
+ "bf" => "Burkina Faso",
+ "bi" => "Burundi",
+ "kh" => "Cambodia",
+ "cm" => "Cameroon",
+ "ca" => "Canada",
+ "cv" => "Cape Verde",
+ "ky" => "Cayman Islands",
+ "cf" => "Central African Republic",
+ "td" => "Chad",
+ "cl" => "Chile",
+ "cn" => "China",
+ "cx" => "Christmas Island",
+ "cc" => "Cocos (Keeling) Islands",
+ "co" => "Colombia",
+ "km" => "Comoros",
+ "cg" => "Congo",
+ "cd" => "Congo, the Democratic Republic",
+ "ck" => "Cook Islands",
+ "cr" => "Costa Rica",
+ "ci" => "Cote D'ivoire",
+ "hr" => "Croatia",
+ "cu" => "Cuba",
+ "cy" => "Cyprus",
+ "cz" => "Czech Republic",
+ "dk" => "Denmark",
+ "dj" => "Djibouti",
+ "dm" => "Dominica",
+ "do" => "Dominican Republic",
+ "ec" => "Ecuador",
+ "eg" => "Egypt",
+ "sv" => "El Salvador",
+ "gq" => "Equatorial Guinea",
+ "er" => "Eritrea",
+ "ee" => "Estonia",
+ "et" => "Ethiopia",
+ "fk" => "Falkland Islands (Malvinas)",
+ "fo" => "Faroe Islands",
+ "fj" => "Fiji",
+ "fi" => "Finland",
+ "fr" => "France",
+ "gf" => "French Guiana",
+ "pf" => "French Polynesia",
+ "tf" => "French Southern Territories",
+ "ga" => "Gabon",
+ "gm" => "Gambia",
+ "ge" => "Georgia",
+ "de" => "Germany",
+ "gh" => "Ghana",
+ "gi" => "Gibraltar",
+ "gr" => "Greece",
+ "gl" => "Greenland",
+ "gd" => "Grenada",
+ "gp" => "Guadeloupe",
+ "gu" => "Guam",
+ "gt" => "Guatemala",
+ "gn" => "Guinea",
+ "gw" => "Guinea-Bissau",
+ "gy" => "Guyana",
+ "ht" => "Haiti",
+ "hm" => "Heard Island and Mcdonald Islands",
+ "va" => "Holy See (Vatican City State)",
+ "hn" => "Honduras",
+ "hk" => "Hong Kong",
+ "hu" => "Hungary",
+ "is" => "Iceland",
+ "in" => "India",
+ "id" => "Indonesia",
+ "ir" => "Iran, Islamic Republic",
+ "iq" => "Iraq",
+ "ie" => "Ireland",
+ "il" => "Israel",
+ "it" => "Italy",
+ "jm" => "Jamaica",
+ "jp" => "Japan",
+ "jo" => "Jordan",
+ "kz" => "Kazakhstan",
+ "ke" => "Kenya",
+ "ki" => "Kiribati",
+ "kp" => "Korea, Democratic People's Republic",
+ "kr" => "Korea, Republic",
+ "kw" => "Kuwait",
+ "kg" => "Kyrgyzstan",
+ "la" => "Lao People's Democratic Republic",
+ "lv" => "Latvia",
+ "lb" => "Lebanon",
+ "ls" => "Lesotho",
+ "lr" => "Liberia",
+ "ly" => "Libyan Arab Jamahiriya",
+ "li" => "Liechtenstein",
+ "lt" => "Lithuania",
+ "lu" => "Luxembourg",
+ "mo" => "Macao",
+ "mk" => "Macedonia, the Former Yugosalv Republic",
+ "mg" => "Madagascar",
+ "mw" => "Malawi",
+ "my" => "Malaysia",
+ "mv" => "Maldives",
+ "ml" => "Mali",
+ "mt" => "Malta",
+ "mh" => "Marshall Islands",
+ "mq" => "Martinique",
+ "mr" => "Mauritania",
+ "mu" => "Mauritius",
+ "yt" => "Mayotte",
+ "mx" => "Mexico",
+ "fm" => "Micronesia, Federated States",
+ "md" => "Moldova, Republic",
+ "mc" => "Monaco",
+ "mn" => "Mongolia",
+ "ms" => "Montserrat",
+ "ma" => "Morocco",
+ "mz" => "Mozambique",
+ "mm" => "Myanmar",
+ "na" => "Namibia",
+ "nr" => "Nauru",
+ "np" => "Nepal",
+ "nl" => "Netherlands",
+ "an" => "Netherlands Antilles",
+ "nc" => "New Caledonia",
+ "nz" => "New Zealand",
+ "ni" => "Nicaragua",
+ "ne" => "Niger",
+ "ng" => "Nigeria",
+ "nu" => "Niue",
+ "nf" => "Norfolk Island",
+ "mp" => "Northern Mariana Islands",
+ "no" => "Norway",
+ "om" => "Oman",
+ "pk" => "Pakistan",
+ "pw" => "Palau",
+ "ps" => "Palestinian Territory, Occupied",
+ "pa" => "Panama",
+ "pg" => "Papua New Guinea",
+ "py" => "Paraguay",
+ "pe" => "Peru",
+ "ph" => "Philippines",
+ "pn" => "Pitcairn",
+ "pl" => "Poland",
+ "pt" => "Portugal",
+ "pr" => "Puerto Rico",
+ "qa" => "Qatar",
+ "re" => "Reunion",
+ "ro" => "Romania",
+ "ru" => "Russian Federation",
+ "rw" => "Rwanda",
+ "sh" => "Saint Helena",
+ "kn" => "Saint Kitts and Nevis",
+ "lc" => "Saint Lucia",
+ "pm" => "Saint Pierre and Miquelon",
+ "vc" => "Saint Vincent and the Grenadines",
+ "ws" => "Samoa",
+ "sm" => "San Marino",
+ "st" => "Sao Tome and Principe",
+ "sa" => "Saudi Arabia",
+ "sn" => "Senegal",
+ "cs" => "Serbia and Montenegro",
+ "sc" => "Seychelles",
+ "sl" => "Sierra Leone",
+ "sg" => "Singapore",
+ "sk" => "Slovakia",
+ "si" => "Slovenia",
+ "sb" => "Solomon Islands",
+ "so" => "Somalia",
+ "za" => "South Africa",
+ "gs" => "South Georgia and the South Sandwich Islands",
+ "es" => "Spain",
+ "lk" => "Sri Lanka",
+ "sd" => "Sudan",
+ "sr" => "Suriname",
+ "sj" => "Svalbard and Jan Mayen",
+ "sz" => "Swaziland",
+ "se" => "Sweden",
+ "ch" => "Switzerland",
+ "sy" => "Syrian Arab Republic",
+ "tw" => "Taiwan, Province of China",
+ "tj" => "Tajikistan",
+ "tz" => "Tanzania, United Republic",
+ "th" => "Thailand",
+ "tl" => "Timor-Leste",
+ "tg" => "Togo",
+ "tk" => "Tokelau",
+ "to" => "Tonga",
+ "tt" => "Trinidad and Tobago",
+ "tn" => "Tunisia",
+ "tr" => "Turkey",
+ "tm" => "Turkmenistan",
+ "tc" => "Turks and Caicos Islands",
+ "tv" => "Tuvalu",
+ "ug" => "Uganda",
+ "ua" => "Ukraine",
+ "ae" => "United Arab Emirates",
+ "uk" => "United Kingdom",
+ "us" => "United States",
+ "um" => "United States Minor Outlying Islands",
+ "uy" => "Uruguay",
+ "uz" => "Uzbekistan",
+ "vu" => "Vanuatu",
+ "ve" => "Venezuela",
+ "vn" => "Viet Nam",
+ "vg" => "Virgin Islands, British",
+ "vi" => "Virgin Islands, U.S.",
+ "wf" => "Wallis and Futuna",
+ "eh" => "Western Sahara",
+ "ye" => "Yemen",
+ "zm" => "Zambia",
+ "zw" => "Zimbabwe"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes", // safe=active
+ "no" => "No" // safe=off
+ ]
+ ],
+ "spellcheck" => [
+ // display undefined
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ]
+ ];
+
+ switch($page){
+
+ case "web":
+ return array_merge(
+ $base,
+ [
+ "lang" => [ // lr=<lang> (prefix lang with "lang_")
+ "display" => "Language",
+ "option" => [
+ "any" => "Any language",
+ "ar" => "Arabic",
+ "bg" => "Bulgarian",
+ "ca" => "Catalan",
+ "cs" => "Czech",
+ "da" => "Danish",
+ "de" => "German",
+ "el" => "Greek",
+ "en" => "English",
+ "es" => "Spanish",
+ "et" => "Estonian",
+ "fi" => "Finnish",
+ "fr" => "French",
+ "hr" => "Croatian",
+ "hu" => "Hungarian",
+ "id" => "Indonesian",
+ "is" => "Icelandic",
+ "it" => "Italian",
+ "iw" => "Hebrew",
+ "ja" => "Japanese",
+ "ko" => "Korean",
+ "lt" => "Lithuanian",
+ "lv" => "Latvian",
+ "nl" => "Dutch",
+ "no" => "Norwegian",
+ "pl" => "Polish",
+ "pt" => "Portuguese",
+ "ro" => "Romanian",
+ "ru" => "Russian",
+ "sk" => "Slovak",
+ "sl" => "Slovenian",
+ "sr" => "Serbian",
+ "sv" => "Swedish",
+ "tr" => "Turkish",
+ "zh-CN" => "Chinese (Simplified)",
+ "zh-TW" => "Chinese (Traditional)"
+ ]
+ ],
+ "sort" => [
+ "display" => "Sort by",
+ "option" => [
+ "relevance" => "Relevance",
+ "date" => "Date"
+ ]
+ ],
+ "redundant" => [
+ "display" => "Remove redundant",
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No",
+ ]
+ ]
+ ]
+ );
+ break;
+
+ case "images":
+ return array_merge(
+ $base,
+ [
+ "size" => [ // imgsz
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "l" => "Large",
+ "m" => "Medium",
+ "i" => "Icon",
+ "qsvga" => "Larger than 400x300",
+ "vga" => "Larger than 640x480",
+ "svga" => "Larger than 800x600",
+ "xga" => "Larger than 1024x768",
+ "2mp" => "Larger than 2MP",
+ "4mp" => "Larger than 4MP",
+ "6mp" => "Larger than 6MP",
+ "8mp" => "Larger than 8MP",
+ "10mp" => "Larger than 10MP",
+ "12mp" => "Larger than 12MP",
+ "15mp" => "Larger than 15MP",
+ "20mp" => "Larger than 20MP",
+ "40mp" => "Larger than 40MP",
+ "70mp" => "Larger than 70MP"
+ ]
+ ],
+ "color" => [ // imgc
+ "display" => "Color",
+ "option" => [
+ "any" => "Any color",
+ "color" => "Full color",
+ "bnw" => "Black & white",
+ "trans" => "Transparent",
+ // from here, imgcolor
+ "red" => "Red",
+ "orange" => "Orange",
+ "yellow" => "Yellow",
+ "green" => "Green",
+ "teal" => "Teal",
+ "blue" => "Blue",
+ "purple" => "Purple",
+ "pink" => "Pink",
+ "white" => "White",
+ "gray" => "Gray",
+ "black" => "Black",
+ "brown" => "Brown"
+ ]
+ ],
+ "format" => [ // as_filetype
+ "display" => "Format",
+ "option" => [
+ "any" => "Any format",
+ "jpg" => "JPG",
+ "gif" => "GIF",
+ "png" => "PNG",
+ "bmp" => "BMP",
+ "svg" => "SVG",
+ "webp" => "WEBP",
+ "ico" => "ICO",
+ "craw" => "RAW"
+ ]
+ ]
+ ]
+ );
+ break;
+ }
+ }
+
+ private function get($proxy, $url, $get = [], $reqtype = self::req_js){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ // http2 bypass
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ if($reqtype === self::req_js){
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Alt-Used: cse.google.com",
+ "Connection: keep-alive",
+ "Referer: https://cse.google.com/cse?cx=" . config::GOOGLE_CX_ENDPOINT,
+ "Sec-Fetch-Dest: script",
+ "Sec-Fetch-Mode: no-cors",
+ "Sec-Fetch-Site: same-origin",
+ "TE: trailers"]
+ );
+ }else{
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=0, i"]
+ );
+ }
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ // page 1
+ // https://cse.google.com/cse/element/v1?rsz=filtered_cse&num=10&hl=en&source=gcsc&cselibv=8fa85d58e016b414&cx=d4e68b99b876541f0&q=asmr&safe=active&cse_tok=AB-tC_6RPUTmB4XK0lE9e1AFFC5r%3A1729563832926&lr=&cr=&gl=&filter=0&sort=&as_oq=&as_sitesearch=&exp=cc%2Capo&oq=asmr&gs_l=partner-web.3..0i512i433j0i512i433i131l2j0i512i433j0i512i433i131j0i512i433j0i512i433i131l2j0i512l2.10902.266627.5.267157.11.10.0.0.0.0.188.1108.2j7.9.0.csems%2Cnrl%3D10...0....1.34.partner-web..42.14.1500.WJQvMvfXkx4&cseclient=hosted-page-client&callback=google.search.cse.api8223&rurl=https%3A%2F%2Fcse.google.com%2Fcse%3Fcx%3Dd4e68b99b876541f0%23gsc.tab%3D0%26gsc.q%3Dtest%26gsc.sort%3D
+
+ // page 2
+ // https://cse.google.com/cse/element/v1?rsz=filtered_cse&num=10&hl=en&source=gcsc&start=10&cselibv=8fa85d58e016b414&cx=d4e68b99b876541f0&q=asmr&safe=active&cse_tok=AB-tC_6RPUTmB4XK0lE9e1AFFC5r%3A1729563832926&lr=&cr=&gl=&filter=0&sort=&as_oq=&as_sitesearch=&exp=cc%2Capo&callback=google.search.cse.api3595&rurl=https%3A%2F%2Fcse.google.com%2Fcse%3Fcx%3Dd4e68b99b876541f0%23gsc.tab%3D0%26gsc.q%3Dtest%26gsc.sort%3D
+
+ if($get["npt"]){
+
+ [$req_params, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "web"
+ );
+
+ $req_params =
+ json_decode(
+ $req_params,
+ true
+ );
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://cse.google.com/cse/element/v1",
+ $req_params,
+ self::req_js
+ );
+
+ }else{
+
+ $proxy = $this->backend->get_ip();
+ $params = $this->generate_token($proxy);
+
+ //$json = file_get_contents("scraper/google_cse.txt");
+ $req_params = [
+ "rsz" => "filtered_cse",
+ "num" => 20,
+ "hl" => "en",
+ "source" => "gcsc",
+ "cselibv" => $params["lib"],
+ "cx" => config::GOOGLE_CX_ENDPOINT,
+ "q" => $get["s"],
+ "safe" => $get["nsfw"] == "yes" ? "off" : "active",
+ "cse_tok" => $params["token"],
+ "lr" => $get["lang"] == "any" ? "" : "lang_" . $get["lang"],
+ "cr" => $get["country"] == "any" ? "" : "country" . strtoupper($get["country"]),
+ "gl" => "",
+ "filter" => $get["redundant"] == "yes" ? "1" : "0",
+ "sort" => $get["sort"] == "relevance" ? "" : "date",
+ "as_oq" => "",
+ "as_sitesearch" => "",
+ "exp" => "cc,apo",
+ "oq" => $get["s"],
+ "gs_l" => "partner-web.3...33294.34225.3.34597.26.11.0.0.0.0.201.1132.6j4j1.11.0.csems,nrl=10...0....1.34.partner-web..34.19.1897.FKEeG5yh2iw",
+ "cseclient" => "hosted-page-client",
+ "callback" => "google.search.cse.api" . random_int(4000, 99999),
+ "rurl" => "https://cse.google.com/cse?cx=" . config::GOOGLE_CX_ENDPOINT . "#gsc.tab=0&gsc.q=" . $get["s"] . "&gsc.sort="
+ ];
+
+ if($get["spellcheck"] == "no"){
+
+ $req_params["nfpr"] = "1";
+ }
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://cse.google.com/cse/element/v1",
+ $req_params,
+ self::req_js
+ );
+
+ unset($req_params["gs_l"]);
+ $req_params["start"] = 0;
+ }
+
+ $req_params["start"] += 20;
+
+ if(
+ !preg_match(
+ '/google\.search\.cse\.[A-Za-z0-9]+\(([\S\s]*)\);/i',
+ $json,
+ $json
+ )
+ ){
+
+ throw new Exception("Failed to grep JSON");
+ }
+
+ $json = json_decode($json[1], true);
+
+ if(isset($json["error"])){
+
+ if(isset($json["error"]["errors"][0]["message"])){
+
+ throw new Exception("Google returned an error: " . $json["error"]["errors"][0]["message"]);
+ }
+
+ if(isset($json["error"]["message"])){
+
+ throw new Exception("Google returned an error: " . $json["error"]["message"]);
+ }
+
+ throw new Exception("Google returned an error object");
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ // detect word correction
+ if(isset($json["spelling"]["type"])){
+
+ switch($json["spelling"]["type"]){
+
+ case "DYM": // did you mean? @TODO fix wording
+ $type = "including";
+ break;
+
+ case "SPELL_CORRECTED_RESULTS": // not many results for
+ $type = "not_many";
+ break;
+
+ default:
+ $type = "not_many";
+ }
+
+ if(isset($json["spelling"]["originalQuery"])){
+
+ $using = $json["spelling"]["originalQuery"];
+ }
+ elseif(isset($json["spelling"]["anchor"])){
+
+ $using = html_entity_decode(strip_tags($json["spelling"]["anchor"]));
+ }elseif(isset($json["spelling"]["originalAnchor"])){
+
+ $using = html_entity_decode(strip_tags($json["spelling"]["originalAnchor"]));
+ }
+
+ $out["spelling"] = [
+ "type" => $type,
+ "using" => $using,
+ "correction" => $json["spelling"]["correctedQuery"]
+ ];
+ }
+
+ if(!isset($json["results"])){
+
+ return $out;
+ }
+
+ foreach($json["results"] as $result){
+
+ // get date from description
+ $description =
+ explode(
+ "...",
+ trim($result["contentNoFormatting"], " ."),
+ 2
+ );
+
+ if(count($description) === 2){
+
+ if($date = strtotime($description[0])){
+
+ $description = ltrim($description[1]);
+ }else{
+
+ $date = null;
+ $description = implode("...", $description);
+ }
+ }else{
+
+ $description = implode("...", $description);
+ $date = null;
+ }
+
+ $description = trim($description, " .");
+
+ // get thumbnails
+ if(isset($result["richSnippet"]["cseThumbnail"]["src"])){
+
+ $thumb = [
+ "url" => $this->unshit_thumb($result["richSnippet"]["cseThumbnail"]["src"]),
+ "ratio" => "1:1"
+ ];
+ }
+ elseif(isset($result["richSnippet"]["cseImage"]["src"])){
+
+ $thumb = [
+ "url" => $result["richSnippet"]["cseImage"]["src"],
+ "ratio" => "1:1"
+ ];
+ }else{
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ if($thumb["url"] !== null){
+
+ $found_size = false;
+
+ // find correct ratio
+
+ if(
+ isset($result["richSnippet"]["cseThumbnail"]["width"]) &&
+ isset($result["richSnippet"]["cseThumbnail"]["height"])
+ ){
+ $found_size = true;
+ $width = (int)$result["richSnippet"]["cseThumbnail"]["width"];
+ $height = (int)$result["richSnippet"]["cseThumbnail"]["height"];
+ }
+ elseif(
+ isset($result["richSnippet"]["metatags"]["ogImageWidth"]) &&
+ isset($result["richSnippet"]["metatags"]["ogImageHeight"])
+ ){
+ $found_size = true;
+ $width = (int)$result["richSnippet"]["metatags"]["ogImageWidth"];
+ $height = (int)$result["richSnippet"]["metatags"]["ogImageHeight"];
+ }
+
+ // calculate rounded ratio
+ if($found_size){
+
+ $aspect_ratio = $width / $height;
+
+ if($aspect_ratio >= 1.5){
+
+ $thumb["ratio"] = "16:9";
+ }
+ elseif($aspect_ratio >= 0.8){
+
+ $thumb["ratio"] = "1:1";
+ }else{
+
+ $thumb["ratio"] = "9:16";
+ }
+ }
+ }
+
+ $out["web"][] = [
+ "title" => rtrim($result["titleNoFormatting"], " ."),
+ "description" => $description,
+ "url" => $result["unescapedUrl"],
+ "date" => $date,
+ "type" => "web",
+ "thumb" => $thumb,
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ // detect next page
+ if(
+ isset($json["cursor"]["isExactTotalResults"]) || // detects last page
+ !isset($json["cursor"]["pages"]) // detects no results on page
+ ){
+
+ return $out;
+ }
+
+ // get next page
+ $out["npt"] =
+ $this->backend->store(
+ json_encode(
+ $req_params
+ ),
+ "web",
+ $proxy
+ );
+
+ return $out;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$req_params, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "images"
+ );
+
+ $req_params =
+ json_decode(
+ $req_params,
+ true
+ );
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://cse.google.com/cse/element/v1",
+ $req_params,
+ self::req_js
+ );
+
+ }else{
+
+ $proxy = $this->backend->get_ip();
+ $params = $this->generate_token($proxy);
+
+ //$json = file_get_contents("scraper/google_cse.txt");
+ $req_params = [
+ "rsz" => "filtered_cse",
+ "num" => 20,
+ "hl" => "en",
+ "source" => "gcsc",
+ "cselibv" => $params["lib"],
+ "searchtype" => "image",
+ "cx" => config::GOOGLE_CX_ENDPOINT,
+ "q" => $get["s"],
+ "safe" => $get["nsfw"] == "yes" ? "off" : "active",
+ "cse_tok" => $params["token"],
+ "exp" => "cc,apo",
+ "cseclient" => "hosted-page-client",
+ "callback" => "google.search.cse.api" . random_int(4000, 99999),
+ "rurl" => "https://cse.google.com/cse?cx=" . config::GOOGLE_CX_ENDPOINT . "#gsc.tab=1&gsc.q=" . $get["s"] . "&gsc.sort="
+ ];
+
+ // add additional hidden filters
+
+ // country (image search uses cr instead of gl)
+ if($get["country"] != "any"){
+
+ $req_params["cr"] = "country" . strtoupper($get["country"]);
+ }
+
+ // nsfw
+ $req_params["safe"] = $get["nsfw"] == "yes" ? "off" : "active";
+
+ // size
+ if($get["size"] != "any"){
+
+ $req_params["imgsz"] = $get["size"];
+ }
+
+ // format
+ if($get["format"] != "any"){
+
+ $req_params["as_filetype"] = $get["format"];
+ }
+
+ // color
+ if($get["color"] != "any"){
+
+ if(
+ $get["color"] == "color" ||
+ $get["color"] == "trans"
+ ){
+
+ $req_params["imgc"] = $get["color"];
+ }elseif($get["color"] == "bnw"){
+
+ $req_params["imgc"] = "gray";
+ }else{
+
+ $req_params["imgcolor"] = $get["color"];
+ }
+ }
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://cse.google.com/cse/element/v1",
+ $req_params,
+ self::req_js
+ );
+
+ $req_params["start"] = 0;
+ }
+
+ $req_params["start"] += 20;
+
+ if(
+ !preg_match(
+ '/google\.search\.cse\.[A-Za-z0-9]+\(([\S\s]*)\);/i',
+ $json,
+ $json
+ )
+ ){
+
+ throw new Exception("Failed to grep JSON");
+ }
+
+ $json = json_decode($json[1], true);
+
+ if(isset($json["error"])){
+
+ if(isset($json["error"]["errors"][0]["message"])){
+
+ throw new Exception("Google returned an error: " . $json["error"]["errors"][0]["message"]);
+ }
+
+ if(isset($json["error"]["message"])){
+
+ throw new Exception("Google returned an error: " . $json["error"]["message"]);
+ }
+
+ throw new Exception("Google returned an error object");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ // detect next page
+ if(
+ isset($json["cursor"]["isExactTotalResults"]) || // detects last page
+ !isset($json["cursor"]["pages"]) // detects no results on page
+ ){
+
+ return $out;
+ }
+
+ foreach($json["results"] as $result){
+
+ $out["image"][] = [
+ "title" => rtrim($result["titleNoFormatting"], " ."),
+ "source" => [
+ [
+ "url" => $result["unescapedUrl"],
+ "width" => (int)$result["width"],
+ "height" => (int)$result["height"]
+ ],
+ [
+ "url" => $result["tbLargeUrl"],
+ "width" => (int)$result["tbLargeWidth"],
+ "height" => (int)$result["tbLargeHeight"]
+ ]
+ ],
+ "url" => $result["originalContextUrl"]
+ ];
+ }
+
+ // get next page
+ $out["npt"] =
+ $this->backend->store(
+ json_encode(
+ $req_params
+ ),
+ "images",
+ $proxy
+ );
+
+ return $out;
+ }
+
+ private function generate_token($proxy){
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://cse.google.com/cse",
+ [
+ "cx" => config::GOOGLE_CX_ENDPOINT
+ ],
+ self::req_html
+ );
+
+ // detect captcha
+ $this->fuckhtml->load($html);
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "title"
+ );
+
+ if(
+ count($title) !== 0 &&
+ $title[0]["innerHTML"] == "302 Moved"
+ ){
+
+ throw new Exception("Google returned a captcha");
+ }
+
+ // get token
+ preg_match(
+ '/relativeUrl=\'([^\']+)\';/i',
+ $html,
+ $js_uri
+ );
+
+ if(!isset($js_uri[1])){
+
+ throw new Exception("Failed to grep search token");
+ }
+
+ $js_uri =
+ $this->fuckhtml
+ ->parseJsString(
+ $js_uri[1]
+ );
+
+ // get parameters
+ $js =
+ $this->get(
+ $proxy,
+ "https://cse.google.com" . $js_uri,
+ [],
+ self::req_js
+ );
+
+ preg_match(
+ '/}\)\(({[\S\s]+})\);/',
+ $js,
+ $json
+ );
+
+ if(!isset($json[1])){
+
+ throw new Exception("Failed to grep JSON parameters");
+ }
+
+ $json = json_decode($json[1], true);
+
+ return [
+ "token" => $json["cse_token"],
+ "lib" => $json["cselibVersion"]
+ ];
+ }
+
+ private function unshit_thumb($url){
+ // https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQINE2vbnNLHXqoZr3RVsaEJFyOsj1_BiBnJch-e1nyz3oia7Aj5xVj
+ // https://i.ytimg.com/vi/PZVIyA5ER3Y/mqdefault.jpg?sqp=-oaymwEFCJQBEFM&rs=AMzJL3nXeaCpdIar-ltNwl82Y82cIJfphA
+
+ $parts = parse_url($url);
+
+ if(
+ isset($parts["host"]) &&
+ preg_match(
+ '/tbn.*\.gstatic\.com/',
+ $parts["host"]
+ )
+ ){
+
+ parse_str($parts["query"], $params);
+
+ if(isset($params["q"])){
+
+ return "https://" . $parts["host"] . "/images?q=" . $params["q"];
+ }
+ }
+
+ return $url;
+ }
+}
diff --git a/scraper/greppr.php b/scraper/greppr.php
new file mode 100644
index 0000000..fc8511c
--- /dev/null
+++ b/scraper/greppr.php
@@ -0,0 +1,435 @@
+<?php
+
+class greppr{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("greppr");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [];
+ }
+
+ private function get($proxy, $url, $get = [], $cookie = false, $post){
+
+ $curlproc = curl_init();
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ if($post === false){
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ if($cookie === false){
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"]
+ );
+ }else{
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Referer: https://greppr.org/search",
+ "Cookie: PHPSESSID=$cookie",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: same-origin",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=0, i"]
+ );
+ }
+ }else{
+
+ $get = http_build_query($get);
+
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "Content-Type: application/x-www-form-urlencoded",
+ "Content-Length: " . strlen($get),
+ "Origin: https://greppr.org",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Referer: https://greppr.org/",
+ "Cookie: PHPSESSID=$cookie",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: same-origin",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=0, i"]
+ );
+ }
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $headers = [];
+
+ curl_setopt(
+ $curlproc,
+ CURLOPT_HEADERFUNCTION,
+ function($curlproc, $header) use (&$headers){
+
+ $len = strlen($header);
+ $header = explode(':', $header, 2);
+
+ if(count($header) < 2){
+
+ // ignore invalid headers
+ return $len;
+ }
+
+ $headers[strtolower(trim($header[0]))] = trim($header[1]);
+
+ return $len;
+ }
+ );
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+
+ return [
+ "headers" => $headers,
+ "data" => $data
+ ];
+ }
+
+ public function web($get, $first_attempt = true){
+
+ if($get["npt"]){
+
+ [$q, $proxy] = $this->backend->get($get["npt"], "web");
+
+ $tokens = json_decode($q, true);
+
+ //
+ // Get paginated page
+ //
+ try{
+
+ $html = $this->get(
+ $proxy,
+ "https://greppr.org" . $tokens["get"],
+ [],
+ $tokens["cookie"],
+ false
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ //
+ // get token
+ //
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://greppr.org",
+ [],
+ false,
+ false
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search tokens");
+ }
+
+ //
+ // Parse token
+ //
+ $this->fuckhtml->load($html["data"]);
+
+ $tokens = [];
+
+ $inputs =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "input"
+ );
+
+ foreach($inputs as $input){
+
+ if(!isset($input["attributes"]["name"])){
+
+ continue;
+ }
+
+ switch($input["attributes"]["name"]){
+
+ case "var1":
+ case "var2":
+ case "n":
+ $tokens[$input["attributes"]["name"]] =
+ $this->fuckhtml
+ ->getTextContent(
+ $input["attributes"]["value"]
+ );
+ break;
+
+ default:
+ $tokens["req"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $input["attributes"]["name"]
+ );
+ break;
+ }
+ }
+
+ // get cookie
+ preg_match(
+ '/PHPSESSID=([^;]+)/',
+ $html["headers"]["set-cookie"],
+ $cookie
+ );
+
+ if(!isset($cookie[1])){
+
+ // server sent an unexpected cookie
+ throw new Exception("Got malformed cookie");
+ }
+
+ $tokens["cookie"] = $cookie[1];
+
+ if($tokens === false){
+
+ throw new Exception("Failed to grep search tokens");
+ }
+
+ //
+ // Get initial search page
+ //
+ try{
+
+ $html = $this->get(
+ $proxy,
+ "https://greppr.org/search",
+ [
+ "var1" => $tokens["var1"],
+ "var2" => $tokens["var2"],
+ $tokens["req"] => $search,
+ "n" => $tokens["n"]
+ ],
+ $tokens["cookie"],
+ true
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ }
+
+ //$html = file_get_contents("scraper/greppr.html");
+ //$this->fuckhtml->load($html);
+ $this->fuckhtml->load($html["data"]);
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ // get results for later
+ $results =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "result",
+ "div"
+ );
+
+ // check for next page
+ $next_elem =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "pagination",
+ "ul"
+ );
+
+ if(count($next_elem) !== 0){
+
+ $this->fuckhtml->load($next_elem[0]);
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "page-link",
+ "a"
+ );
+
+ $break = false;
+ foreach($as as $a){
+
+ if($break === true){
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode([
+ "get" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ ),
+ "cookie" => $tokens["cookie"]
+ ]),
+ "web",
+ $proxy
+ );
+ break;
+ }
+
+ if($a["attributes"]["href"] == "#"){
+
+ $break = true;
+ }
+ }
+ }
+
+ // scrape results
+ foreach($results as $result){
+
+ $this->fuckhtml->load($result);
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ )[0];
+
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "highlightedDesc",
+ "p"
+ );
+
+ if(count($description) === 0){
+
+ $description = null;
+ }else{
+
+ $description =
+ $this->limitstrlen(
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ )
+ );
+ }
+
+ $date =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "p"
+ );
+
+ $date =
+ strtotime(
+ explode(
+ ":",
+ $this->fuckhtml
+ ->getTextContent(
+ $date[count($date) - 1]["innerHTML"]
+ )
+ )[1]
+ );
+
+ $out["web"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $a["innerHTML"]
+ ),
+ "description" => $description,
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ ),
+ "date" => $date,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+
+ private function limitstrlen($text){
+
+ return explode("\n", wordwrap($text, 300, "\n"))[0];
+ }
+}
diff --git a/scraper/imgur.php b/scraper/imgur.php
new file mode 100644
index 0000000..e41f4c2
--- /dev/null
+++ b/scraper/imgur.php
@@ -0,0 +1,282 @@
+<?php
+
+class imgur{
+
+ public function __construct(){
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ include "lib/backend.php";
+ $this->backend = new backend("imgur");
+ }
+
+ public function getfilters($page){
+
+ return [
+ "sort" => [ // /score/
+ "display" => "Sort by",
+ "option" => [
+ "score" => "Highest scoring",
+ "relevance" => "Most relevant",
+ "time" => "Newest first"
+ ]
+ ],
+ "time" => [ // /score/day/
+ "display" => "Time posted",
+ "option" => [
+ "all" => "All time",
+ "day" => "Today",
+ "week" => "This week",
+ "month" => "This month",
+ "year" => "This year"
+ ]
+ ],
+ "format" => [ // q_type
+ "display" => "Format",
+ "option" => [
+ "any" => "Any format",
+ "jpg" => "JPG",
+ "png" => "PNG",
+ "gif" => "GIF",
+ "anigif" => "Animated GIF",
+ "album" => "Albums"
+ ]
+ ],
+ "size" => [ // q_size_px
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "small" => "Small (500px or less)",
+ "med" => "Medium (500px to 2000px)",
+ "big" => "Big (2000px to 5000px)",
+ "lrg" => "Large (5000px to 10000px)",
+ "huge" => "Huge (10000px and above)"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = []){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?scrolled&" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Referer: https://imgur.com/search/",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "TE: trailers",
+ "X-Requested-With: XMLHttpRequest"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$filter, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "images"
+ );
+
+ $filter = json_decode($filter, true);
+
+ $search = $filter["s"];
+ unset($filter["s"]);
+
+ $sort = $filter["sort"];
+ unset($filter["sort"]);
+
+ $time = $filter["time"];
+ unset($filter["time"]);
+
+ $format = $filter["format"];
+ unset($filter["format"]);
+
+ $size = $filter["size"];
+ unset($filter["size"]);
+
+ $page = $filter["page"];
+ unset($filter["page"]);
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $sort = $get["sort"];
+ $time = $get["time"];
+ $format = $get["format"];
+ $size = $get["size"];
+ $page = 0;
+
+ $filter = [
+ "q" => $search
+ ];
+
+ if($format != "any"){
+
+ $filter["q_type"] = $format;
+ }
+
+ if($size != "any"){
+
+ $filter["q_size_px"] = $size;
+ $filter["q_size_is_mpx"] = "off";
+ }
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://imgur.com/search/$sort/$time/page/$page",
+ $filter
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch HTML");
+ }
+
+ $json = json_decode($html, true);
+
+ if($json){
+
+ // {"data":{"error":"Imgur is temporarily over capacity. Please try again later."},"success":false,"status":403}
+
+ if(isset($json["data"]["error"])){
+
+ if(stripos($json["data"]["error"], "capacity")){
+
+ throw new Exception("Imgur IP blocked this 4get instance or request proxy. Try again");
+ }
+ }
+
+ throw new Exception("Imgur returned an unknown error (IP ban?)");
+ }
+
+ $this->fuckhtml->load($html);
+
+ $posts =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "post",
+ "div"
+ );
+
+ foreach($posts as $post){
+
+ $this->fuckhtml->load($post);
+
+ $image =
+ $this->fuckhtml
+ ->getElementsByTagName("img");
+
+ if(count($image) === 0){
+
+ continue;
+ }
+
+ $image = $image[0];
+
+ $image_url = "https:" . substr($this->fuckhtml->getTextContent($image["attributes"]["src"]), 0, -5);
+
+ $out["image"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $image["attributes"]["alt"]
+ ),
+ "source" => [
+ [
+ "url" => $image_url . ".jpg",
+ "width" => null,
+ "height" => null
+ ],
+ [
+ "url" => $image_url . "m.jpg",
+ "width" => null,
+ "height" => null
+ ]
+ ],
+ "url" =>
+ "https://imgur.com" .
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "image-list-link",
+ "a"
+ )
+ [0]
+ ["attributes"]
+ ["href"]
+ )
+ ];
+ }
+
+ if(isset($out["image"][0])){
+
+ // store nextpage
+ $filter["s"] = $search;
+ $filter["sort"] = $sort;
+ $filter["time"] = $time;
+ $filter["format"] = $format;
+ $filter["size"] = $size;
+ $filter["page"] = $page + 1;
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($filter),
+ "images",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+}
diff --git a/scraper/marginalia.php b/scraper/marginalia.php
new file mode 100644
index 0000000..8fcd9fc
--- /dev/null
+++ b/scraper/marginalia.php
@@ -0,0 +1,580 @@
+<?php
+
+class marginalia{
+ public function __construct(){
+
+ include "lib/anubis.php";
+ $this->anubis = new anubis();
+
+ include_once "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ include "lib/backend.php";
+ $this->backend = new backend("marginalia");
+ }
+
+ public function getfilters($page){
+
+ if(config::MARGINALIA_API_KEY === null){
+
+ $base = [
+ "adtech" => [
+ "display" => "Reduce adtech",
+ "option" => [
+ "no" => "No",
+ "yes" => "Yes"
+ ]
+ ],
+ "recent" => [
+ "display" => "Recent results",
+ "option" => [
+ "no" => "No",
+ "yes" => "Yes"
+ ]
+ ],
+ "intitle" => [
+ "display" => "Search in title",
+ "option" => [
+ "no" => "No",
+ "yes" => "Yes"
+ ]
+ ]
+ ];
+ }else{
+
+ $base = [];
+ }
+
+ return array_merge(
+ $base,
+ [
+ "format" => [
+ "display" => "Format",
+ "option" => [
+ "any" => "Any format",
+ "html5" => "html5",
+ "xhtml" => "xhtml",
+ "html123" => "html123"
+ ]
+ ],
+ "file" => [
+ "display" => "Filetype",
+ "option" => [
+ "any" => "Any filetype",
+ "nomedia" => "Deny media",
+ "media" => "Contains media",
+ "audio" => "Contains audio",
+ "video" => "Contains video",
+ "archive" => "Contains archive",
+ "document" => "Contains document"
+ ]
+ ],
+ "javascript" => [
+ "display" => "Javascript",
+ "option" => [
+ "any" => "Allow JS",
+ "deny" => "Deny JS",
+ "require" => "Require JS"
+ ]
+ ],
+ "trackers" => [
+ "display" => "Trackers",
+ "option" => [
+ "any" => "Allow trackers",
+ "deny" => "Deny trackers",
+ "require" => "Require trackers"
+ ]
+ ],
+ "cookies" => [
+ "display" => "Cookies",
+ "option" => [
+ "any" => "Allow cookies",
+ "deny" => "Deny cookies",
+ "require" => "Require cookies"
+ ]
+ ],
+ "affiliate" => [
+ "display" => "Affiliate links in body",
+ "option" => [
+ "any" => "Allow affiliate links",
+ "deny" => "Deny affiliate links",
+ "require" => "Require affiliate links"
+ ]
+ ]
+ ]
+ );
+ }
+
+ private function get($proxy, $url, $get = [], $get_cookies = 1){
+
+ $curlproc = curl_init();
+
+ switch($get_cookies){
+
+ case 0:
+ $cookies = "";
+ $cookies_tmp = [];
+ curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
+
+ $length = strlen($header);
+
+ $header = explode(":", $header, 2);
+
+ if(trim(strtolower($header[0])) == "set-cookie"){
+
+ $cookie_tmp = explode("=", trim($header[1]), 2);
+
+ $cookies_tmp[trim($cookie_tmp[0])] =
+ explode(";", $cookie_tmp[1], 2)[0];
+ }
+
+ return $length;
+ });
+ break;
+
+ case 1:
+ $cookies = "";
+ break;
+
+ default:
+ $cookies = "Cookie: " . $get_cookies;
+ }
+
+ $headers = [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ $cookies,
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"
+ ];
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ if($get_cookies === 0){
+
+ $cookie = [];
+
+ foreach($cookies_tmp as $key => $value){
+
+ $cookie[] = $key . "=" . $value;
+ }
+
+ curl_close($curlproc);
+ return implode(";", $cookie);
+ }
+
+ return $data;
+ }
+
+ public function web($get){
+
+ $search = [$get["s"]];
+ if(strlen($get["s"]) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $format = $get["format"];
+ $file = $get["file"];
+
+ foreach(
+ [
+ "javascript" => $get["javascript"],
+ "trackers" => $get["trackers"],
+ "cookies" => $get["cookies"],
+ "affiliate" => $get["affiliate"]
+ ]
+ as $key => $value
+ ){
+
+ if($value == "any"){ continue; }
+
+ switch($key){
+
+ case "javascript": $str = "js:true"; break;
+ case "trackers": $str = "special:tracking"; break;
+ case "cookies": $str = "special:cookies"; break;
+ case "affiliate": $str = "special:affiliate"; break;
+ }
+
+ if($value == "deny"){
+ $str = "-" . $str;
+ }
+
+ $search[] = $str;
+ }
+
+ if($format != "any"){
+
+ $search[] = "format:$format";
+ }
+
+ switch($file){
+
+ case "any": break;
+ case "nomedia": $search[] = "-special:media"; break;
+ case "media": $search[] = "special:media"; break;
+
+ default:
+ $search[] = "file:$file";
+ }
+
+ $search = implode(" ", $search);
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ // API scraper
+ if(config::MARGINALIA_API_KEY !== null){
+
+ try{
+ $json =
+ $this->get(
+ $this->backend->get_ip(), // no nextpage
+ "https://api.marginalia-search.com/" . config::MARGINALIA_API_KEY . "/search/" . urlencode($search),
+ [
+ "count" => 20
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get JSON");
+ }
+
+ if($json == "Slow down"){
+
+ throw new Exception("The API key used is rate limited. Please try again in a few minutes.");
+ }
+
+ $json = json_decode($json, true);
+
+ foreach($json["results"] as $result){
+
+ $out["web"][] = [
+ "title" => $result["title"],
+ "description" => str_replace("\n", " ", $result["description"]),
+ "url" => $result["url"],
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+
+ // HTML parser
+ $proxy = $this->backend->get_ip();
+
+ //
+ // Bypass anubis check
+ //
+ /*
+ if(($anubis_key = apcu_fetch("marginalia_cookie")) === false){
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://old-search.marginalia.nu/search",
+ [
+ "query" => $search
+ ]
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get anubis challenge");
+ }
+
+ try{
+
+ $anubis_data = $this->anubis->scrape($html);
+ }catch(Exception $error){
+
+ throw new Exception($error);
+ }
+
+ // send anubis response & get cookies
+ // https://old-search.marginalia.nu/.within.website/x/cmd/anubis/api/pass-challenge?response=0000018966b086834f738bacba6031028adb5aa875974ead197a8b75778baf3a&nonce=39947&redir=https%3A%2F%2Fold-search.marginalia.nu%2F&elapsedTime=1164
+
+ try{
+
+ $anubis_key =
+ $this->get(
+ $proxy,
+ "https://old-search.marginalia.nu/.within.website/x/cmd/anubis/api/pass-challenge",
+ [
+ "response" => $anubis_data["response"],
+ "nonce" => $anubis_data["nonce"],
+ "redir" => "https://old-search.marginalia.nu/",
+ "elapsedTime" => random_int(1000, 2000)
+ ],
+ 0
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to submit anubis challenge");
+ }
+
+ apcu_store("marginalia_cookie", $anubis_key);
+ }*/
+
+ if($get["npt"]){
+
+ [$params, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "web"
+ );
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://old-search.marginalia.nu/search?" . $params,
+ [],
+ //$anubis_key
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+
+ }else{
+ $params = [
+ "query" => $search
+ ];
+
+ foreach(["adtech", "recent", "intitle"] as $v){
+
+ if($get[$v] == "yes"){
+
+ switch($v){
+
+ case "adtech": $params["adtech"] = "reduce"; break;
+ case "recent": $params["recent"] = "recent"; break;
+ case "adtech": $params["searchTitle"] = "title"; break;
+ }
+ }
+ }
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://old-search.marginalia.nu/search",
+ $params,
+ //$anubis_key
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+ }
+
+ $this->fuckhtml->load($html);
+
+ $sections =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "card search-result",
+ "section"
+ );
+
+ foreach($sections as $section){
+
+ $this->fuckhtml->load($section);
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "title",
+ "a"
+ )[0];
+
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "description",
+ "p"
+ );
+
+ if(count($description) !== 0){
+
+ $description =
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ );
+ }else{
+
+ $description = null;
+ }
+
+ $sublinks = [];
+ $sublink_html =
+ $this->fuckhtml
+ ->getElementsByClassName("additional-results");
+
+ if(count($sublink_html) !== 0){
+
+ $this->fuckhtml->load($sublink_html[0]);
+
+ $links =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ foreach($links as $link){
+
+ $sublinks[] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $link
+ ),
+ "date" => null,
+ "description" => null,
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $link["attributes"]["href"]
+ )
+ ];
+ }
+ }
+
+ $out["web"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $title
+ ),
+ "description" => $description,
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $title["attributes"]["href"]
+ ),
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => $sublinks,
+ "table" => []
+ ];
+ }
+
+ // get next page
+ $this->fuckhtml->load($html);
+
+ $pagination =
+ $this->fuckhtml
+ ->getElementsByAttributeValue(
+ "aria-label",
+ "pagination",
+ "nav"
+ );
+
+ if(count($pagination) === 0){
+
+ // no pagination
+ return $out;
+ }
+
+ $this->fuckhtml->load($pagination[0]);
+
+ $pages =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "page-link",
+ "a"
+ );
+
+ $found_current_page = false;
+
+ foreach($pages as $page){
+
+ if(
+ stripos(
+ $page["attributes"]["class"],
+ "active"
+ ) !== false
+ ){
+
+ $found_current_page = true;
+ continue;
+ }
+
+ if($found_current_page){
+
+ // we found current page index, and we iterated over
+ // the next page <a>
+
+ $out["npt"] =
+ $this->backend->store(
+ parse_url(
+ $page["attributes"]["href"],
+ PHP_URL_QUERY
+ ),
+ "web",
+ $proxy
+ );
+ break;
+ }
+ }
+
+ return $out;
+ }
+}
+
diff --git a/scraper/mojeek.php b/scraper/mojeek.php
new file mode 100644
index 0000000..2939be5
--- /dev/null
+++ b/scraper/mojeek.php
@@ -0,0 +1,1194 @@
+<?php
+
+class mojeek{
+ public function __construct(){
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ include "lib/backend.php";
+ $this->backend = new backend("mojeek");
+ }
+
+ public function getfilters($page){
+
+ switch($page){
+
+ case "web":
+ return [
+ "focus" => [
+ "display" => "Focus",
+ "option" => [
+ "any" => "No focus",
+ "blogs" => "Blogs",
+ "Dictionary" => "Dictionary",
+ "Recipes" => "Recipes",
+ "Time" => "Time",
+ "Weather" => "Weather"
+ ]
+ ],
+ "lang" => [
+ "display" => "Language",
+ "option" => [
+ "any" => "Any language",
+ "af" => "Afrikaans",
+ "sq" => "Albanian",
+ "an" => "Aragonese",
+ "ay" => "Aymara",
+ "bi" => "Bislama",
+ "br" => "Breton",
+ "ca" => "Catalan",
+ "kw" => "Cornish",
+ "co" => "Corsican",
+ "hr" => "Croatian",
+ "da" => "Danish",
+ "nl" => "Dutch",
+ "dz" => "Dzongkha",
+ "en" => "English",
+ "fj" => "Fijian",
+ "fi" => "Finnish",
+ "fr" => "French",
+ "gd" => "Gaelic",
+ "gl" => "Galician",
+ "de" => "German",
+ "ht" => "Haitian",
+ "io" => "Ido",
+ "id" => "Indonesian",
+ "ia" => "Interlingua",
+ "ie" => "Interlingue",
+ "ga" => "Irish",
+ "it" => "Italian",
+ "rw" => "Kinyarwanda",
+ "la" => "Latin",
+ "li" => "Limburgish",
+ "lb" => "Luxembourgish",
+ "no" => "Norwegian",
+ "nb" => "Norwegian Bokmål",
+ "nn" => "Norwegian Nynorsk",
+ "oc" => "Occitan (post 1500)",
+ "pl" => "Polish",
+ "pt" => "Portuguese",
+ "rm" => "Romansh",
+ "rn" => "Rundi",
+ "sg" => "Sango",
+ "so" => "Somali",
+ "es" => "Spanish",
+ "sw" => "Swahili",
+ "ss" => "Swati",
+ "sv" => "Swedish",
+ "ty" => "Tahitian",
+ "to" => "Tonga (Tonga Islands)",
+ "ts" => "Tsonga",
+ "vo" => "Volapük",
+ "wa" => "Walloon",
+ "cy" => "Welsh",
+ "xh" => "Xhosa",
+ "zu" => "Zulu"
+ ]
+ ],
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "any" => "No location bias",
+ "af" => "Afghanistan",
+ "ax" => "Åland Islands",
+ "al" => "Albania",
+ "dz" => "Algeria",
+ "as" => "American Samoa",
+ "ad" => "Andorra",
+ "ao" => "Angola",
+ "ai" => "Anguilla",
+ "aq" => "Antarctica",
+ "ag" => "Antigua and Barbuda",
+ "ar" => "Argentina",
+ "am" => "Armenia",
+ "aw" => "Aruba",
+ "au" => "Australia",
+ "at" => "Austria",
+ "az" => "Azerbaijan",
+ "bs" => "Bahamas",
+ "bh" => "Bahrain",
+ "bd" => "Bangladesh",
+ "bb" => "Barbados",
+ "by" => "Belarus",
+ "be" => "Belgium",
+ "bz" => "Belize",
+ "bj" => "Benin",
+ "bm" => "Bermuda",
+ "bt" => "Bhutan",
+ "bo" => "Bolivia (Plurinational State of)",
+ "bq" => "Bonaire, Sint Eustatius and Saba",
+ "ba" => "Bosnia and Herzegovina",
+ "bw" => "Botswana",
+ "bv" => "Bouvet Island",
+ "br" => "Brazil",
+ "io" => "British Indian Ocean Territory",
+ "bn" => "Brunei Darussalam",
+ "bg" => "Bulgaria",
+ "bf" => "Burkina Faso",
+ "bi" => "Burundi",
+ "cv" => "Cabo Verde",
+ "kh" => "Cambodia",
+ "cm" => "Cameroon",
+ "ca" => "Canada",
+ "ky" => "Cayman Islands",
+ "cf" => "Central African Republic",
+ "td" => "Chad",
+ "cl" => "Chile",
+ "cn" => "China",
+ "cx" => "Christmas Island",
+ "cc" => "Cocos (Keeling) Islands",
+ "co" => "Colombia",
+ "km" => "Comoros",
+ "cg" => "Congo",
+ "cd" => "Congo (Democratic Republic of the)",
+ "ck" => "Cook Islands",
+ "cr" => "Costa Rica",
+ "ci" => "Côte d'Ivoire",
+ "hr" => "Croatia",
+ "cu" => "Cuba",
+ "cw" => "Curaçao",
+ "cy" => "Cyprus",
+ "cz" => "Czechia",
+ "dk" => "Denmark",
+ "dj" => "Djibouti",
+ "dm" => "Dominica",
+ "do" => "Dominican Republic",
+ "ec" => "Ecuador",
+ "eg" => "Egypt",
+ "sv" => "El Salvador",
+ "gq" => "Equatorial Guinea",
+ "er" => "Eritrea",
+ "ee" => "Estonia",
+ "et" => "Ethiopia",
+ "fk" => "Falkland Islands (Malvinas)",
+ "fo" => "Faroe Islands",
+ "fj" => "Fiji",
+ "fi" => "Finland",
+ "fr" => "France",
+ "gf" => "French Guiana",
+ "pf" => "French Polynesia",
+ "tf" => "French Southern Territories",
+ "ga" => "Gabon",
+ "gm" => "Gambia",
+ "ge" => "Georgia",
+ "de" => "Germany",
+ "gh" => "Ghana",
+ "gi" => "Gibraltar",
+ "gr" => "Greece",
+ "gl" => "Greenland",
+ "gd" => "Grenada",
+ "gp" => "Guadeloupe",
+ "gu" => "Guam",
+ "gt" => "Guatemala",
+ "gg" => "Guernsey",
+ "gn" => "Guinea",
+ "gw" => "Guinea-Bissau",
+ "gy" => "Guyana",
+ "ht" => "Haiti",
+ "hm" => "Heard Island and McDonald Islands",
+ "va" => "Holy See",
+ "hn" => "Honduras",
+ "hk" => "Hong Kong",
+ "hu" => "Hungary",
+ "is" => "Iceland",
+ "in" => "India",
+ "id" => "Indonesia",
+ "ir" => "Iran (Islamic Republic of)",
+ "iq" => "Iraq",
+ "ie" => "Ireland",
+ "im" => "Isle of Man",
+ "il" => "Israel",
+ "it" => "Italy",
+ "jm" => "Jamaica",
+ "jp" => "Japan",
+ "je" => "Jersey",
+ "jo" => "Jordan",
+ "kz" => "Kazakhstan",
+ "ke" => "Kenya",
+ "ki" => "Kiribati",
+ "kp" => "Korea (Democratic People's Republic of)",
+ "kr" => "Korea (Republic of)",
+ "kw" => "Kuwait",
+ "kg" => "Kyrgyzstan",
+ "la" => "Lao People's Democratic Republic",
+ "lv" => "Latvia",
+ "lb" => "Lebanon",
+ "ls" => "Lesotho",
+ "lr" => "Liberia",
+ "ly" => "Libya",
+ "li" => "Liechtenstein",
+ "lt" => "Lithuania",
+ "lu" => "Luxembourg",
+ "mo" => "Macao",
+ "mk" => "Macedonia (the former Yugoslav Republic of)",
+ "mg" => "Madagascar",
+ "mw" => "Malawi",
+ "my" => "Malaysia",
+ "mv" => "Maldives",
+ "ml" => "Mali",
+ "mt" => "Malta",
+ "mh" => "Marshall Islands",
+ "mq" => "Martinique",
+ "mr" => "Mauritania",
+ "mu" => "Mauritius",
+ "yt" => "Mayotte",
+ "mx" => "Mexico",
+ "fm" => "Micronesia (Federated States of)",
+ "md" => "Moldova (Republic of)",
+ "mc" => "Monaco",
+ "mn" => "Mongolia",
+ "me" => "Montenegro",
+ "ms" => "Montserrat",
+ "ma" => "Morocco",
+ "mz" => "Mozambique",
+ "mm" => "Myanmar",
+ "na" => "Namibia",
+ "nr" => "Nauru",
+ "np" => "Nepal",
+ "nl" => "Netherlands",
+ "nc" => "New Caledonia",
+ "nz" => "New Zealand",
+ "ni" => "Nicaragua",
+ "ne" => "Niger",
+ "ng" => "Nigeria",
+ "nu" => "Niue",
+ "nf" => "Norfolk Island",
+ "mp" => "Northern Mariana Islands",
+ "no" => "Norway",
+ "om" => "Oman",
+ "pk" => "Pakistan",
+ "pw" => "Palau",
+ "ps" => "Palestine, State of",
+ "pa" => "Panama",
+ "pg" => "Papua New Guinea",
+ "py" => "Paraguay",
+ "pe" => "Peru",
+ "ph" => "Philippines",
+ "pn" => "Pitcairn",
+ "pl" => "Poland",
+ "pt" => "Portugal",
+ "pr" => "Puerto Rico",
+ "qa" => "Qatar",
+ "re" => "Réunion",
+ "ro" => "Romania",
+ "ru" => "Russian Federation",
+ "rw" => "Rwanda",
+ "bl" => "Saint Barthélemy",
+ "sh" => "Saint Helena, Ascension and Tristan da Cunha",
+ "kn" => "Saint Kitts and Nevis",
+ "lc" => "Saint Lucia",
+ "mf" => "Saint Martin (French part)",
+ "pm" => "Saint Pierre and Miquelon",
+ "vc" => "Saint Vincent and the Grenadines",
+ "ws" => "Samoa",
+ "sm" => "San Marino",
+ "st" => "Sao Tome and Principe",
+ "sa" => "Saudi Arabia",
+ "sn" => "Senegal",
+ "rs" => "Serbia",
+ "sc" => "Seychelles",
+ "sl" => "Sierra Leone",
+ "sg" => "Singapore",
+ "sx" => "Sint Maarten (Dutch part)",
+ "sk" => "Slovakia",
+ "si" => "Slovenia",
+ "sb" => "Solomon Islands",
+ "so" => "Somalia",
+ "za" => "South Africa",
+ "gs" => "South Georgia and South Sandwich Islands",
+ "ss" => "South Sudan",
+ "es" => "Spain",
+ "lk" => "Sri Lanka",
+ "sd" => "Sudan",
+ "sr" => "Suriname",
+ "sj" => "Svalbard and Jan Mayen",
+ "sz" => "Swaziland",
+ "se" => "Sweden",
+ "ch" => "Switzerland",
+ "sy" => "Syrian Arab Republic",
+ "tw" => "Taiwan",
+ "tj" => "Tajikistan",
+ "tz" => "Tanzania, United Republic of",
+ "th" => "Thailand",
+ "tl" => "Timor-Leste",
+ "tg" => "Togo",
+ "tk" => "Tokelau",
+ "to" => "Tonga",
+ "tt" => "Trinidad and Tobago",
+ "tn" => "Tunisia",
+ "tr" => "Turkey",
+ "tm" => "Turkmenistan",
+ "tc" => "Turks and Caicos Islands",
+ "tv" => "Tuvalu",
+ "ug" => "Uganda",
+ "ua" => "Ukraine",
+ "ae" => "United Arab Emirates",
+ "gb" => "United Kingdom",
+ "us" => "United States of America",
+ "um" => "United States Minor Outlying Islands",
+ "uy" => "Uruguay",
+ "uz" => "Uzbekistan",
+ "vu" => "Vanuatu",
+ "ve" => "Venezuela (Bolivarian Republic of)",
+ "vn" => "Viet Nam",
+ "vg" => "Virgin Islands (British)",
+ "vi" => "Virgin Islands (U.S.)",
+ "wf" => "Wallis and Futuna",
+ "eh" => "Western Sahara",
+ "ye" => "Yemen",
+ "zm" => "Zambia",
+ "zw" => "Zimbabwe"
+ ]
+ ],
+ "region" => [
+ "display" => "Region",
+ "option" => [
+ "any" => "Any region",
+ "eu" => "European Union",
+ "de" => "Germany",
+ "fr" => "France",
+ "uk" => "United Kingdom"
+ ]
+ ],
+ "domain" => [
+ "display" => "Results per domain",
+ "option" => [
+ "1" => "1 result",
+ "2" => "2 results",
+ "3" => "3 results",
+ "4" => "4 results",
+ "5" => "5 results",
+ "10" => "10 results",
+ "0" => "Unlimited",
+ ]
+ ]
+ ];
+ break;
+
+ case "news":
+ return [];
+ }
+ }
+
+ private function get($proxy, $url, $get = []){
+
+ $headers = [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"
+ ];
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ [$token, $proxy] = $this->backend->get($get["npt"], "web");
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://www.mojeek.com" . $token,
+ []
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $lang = $get["lang"];
+ $country = $get["country"];
+ $region = $get["region"];
+ $domain = $get["domain"];
+ $focus = $get["focus"];
+
+ $params = [
+ "q" => $search,
+ "t" => 20, // number of results/page
+ "tn" => 7, // number of news results/page
+ "date" => 1, // show date
+ "tlen" => 128, // max length of title
+ //"dlen" => 511, // max length of description
+ "arc" => ($country == "any" ? "none" : $country) // location. don't use autodetect!
+ ];
+
+ switch($focus){
+
+ case "any": break;
+
+ case "blogs":
+ $params["fmt"] = "sst";
+ $params["sst"] = "1";
+ break;
+
+ default:
+ $params["foc_t"] = $focus;
+ break;
+ }
+
+ if($lang != "any"){
+
+ $params["lb"] = $lang;
+ }
+
+ if($region != "any"){
+
+ $params["reg"] = $region;
+ }
+
+ if($domain != "1"){
+
+ $params["si"] = $domain;
+ }
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://www.mojeek.com/search",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $this->fuckhtml->load($html);
+
+ $this->detect_block();
+
+ $results =
+ $this->fuckhtml
+ ->getElementsByClassName("results-standard", "ul");
+
+ if(count($results) === 0){
+
+ return $out;
+ }
+
+ /*
+ Get all search result divs
+ */
+ foreach($results as $container){
+
+ $this->fuckhtml->load($container);
+ $results =
+ $this->fuckhtml
+ ->getElementsByTagName("li");
+
+ foreach($results as $result){
+
+ $data = [
+ "title" => null,
+ "description" => null,
+ "url" => null,
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+
+ $this->fuckhtml->load($result);
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName("title", "a")[0];
+
+ $data["title"] =
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $title["innerHTML"]
+ )
+ );
+
+ $data["url"] =
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $title["attributes"]["href"]
+ )
+ );
+
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "s", "p"
+ );
+
+ if(count($description) !== 0){
+
+ $data["description"] =
+ $this->titledots(
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ )
+ )
+ );
+ }
+
+ $date =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "mdate",
+ "span"
+ );
+
+ if(count($date) !== 0){
+
+ $data["date"] =
+ strtotime(
+ $this->fuckhtml
+ ->getTextContent(
+ $date[0]
+ )
+ );
+ }
+
+ $out["web"][] = $data;
+ }
+ }
+
+ /*
+ Get instant answers
+ */
+ $this->fuckhtml->load($html);
+
+ $infoboxes =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "infobox infobox-top",
+ "div"
+ );
+
+ foreach($infoboxes as $infobox){
+
+ $answer = [
+ "title" => null,
+ "description" => [],
+ "url" => null,
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+
+ // load first part with title + short definition
+ $infobox_html =
+ explode(
+ "<hr>",
+ $infobox["innerHTML"]
+ );
+
+ $this->fuckhtml->load($infobox_html[0]);
+
+ // title
+ $answer["title"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName("h1")[0]
+ );
+
+ // short definition
+ $definition =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "p"
+ );
+
+ if(count($definition) !== 0){
+
+ $answer["description"][] = [
+ "type" => "quote",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $definition[0]
+ )
+ ];
+ }
+
+ // get thumbnail, if it exists
+ $this->fuckhtml->load($infobox_html[1]);
+
+ $thumb =
+ $this->fuckhtml
+ ->getElementsByClassName("float-right", "img");
+
+ if(count($thumb) !== 0){
+
+ preg_match(
+ '/\/image\?img=([^&]+)/i',
+ $thumb[0]["attributes"]["src"],
+ $matches
+ );
+
+ if(count($matches) === 2){
+
+ // for some reason, if we dont get the image from mojeek
+ // it sometimes fail to fetch the right image URL
+ $answer["thumb"] =
+ "https://mojeek.com" .
+ $this->fuckhtml
+ ->getTextContent(
+ $thumb[0]["attributes"]["src"]
+ );
+ }
+ }
+
+ // get description
+ $ps =
+ $this->fuckhtml
+ ->getElementsByTagName("p");
+
+ $first_tag = true;
+ foreach($ps as $p){
+
+ $this->fuckhtml->load($p);
+
+ if(
+ preg_match(
+ '/^\s*<strong>/i',
+ $p["innerHTML"]
+ )
+ ){
+
+ /*
+ Parse table
+ */
+
+ $strong =
+ $this->fuckhtml
+ ->getElementsByTagName("strong")[0];
+
+ $p["innerHTML"] =
+ str_replace($strong["innerHTML"], "", $p["innerHTML"]);
+
+ $strong =
+ preg_replace(
+ '/:$/',
+ "",
+ ucfirst(
+ $this->fuckhtml
+ ->getTextContent(
+ $strong
+ )
+ )
+ );
+
+ $answer["table"][trim($strong)] =
+ trim(
+ $this->fuckhtml
+ ->getTextContent(
+ $p
+ )
+ );
+
+ continue;
+ }
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByClassName("svg-icon");
+
+ if(count($as) !== 0){
+
+ /*
+ Parse websites
+ */
+ foreach($as as $a){
+
+ $answer["sublink"][
+ ucfirst(explode(" ", $a["attributes"]["class"], 2)[1])
+ ] =
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ );
+ }
+
+ continue;
+ }
+
+ /*
+ Parse text content
+ */
+ $tags =
+ $this->fuckhtml
+ ->getElementsByTagName("*");
+
+ $i = 0;
+ foreach($tags as $tag){
+
+ $c = count($answer["description"]);
+
+ // remove tag from innerHTML
+ $p["innerHTML"] =
+ explode($tag["outerHTML"], $p["innerHTML"], 2);
+
+ if(count($p["innerHTML"]) === 2){
+
+ if(
+ $i === 0 &&
+ $c !== 0 &&
+ $answer["description"][$c - 1]["type"] == "link"
+ ){
+
+ $append = "\n\n";
+ }else{
+
+ $append = "";
+ }
+
+ if($p["innerHTML"][0] != ""){
+ $answer["description"][] = [
+ "type" => "text",
+ "value" => $append . trim($p["innerHTML"][0])
+ ];
+ }
+
+ $p["innerHTML"] = $p["innerHTML"][1];
+ }else{
+
+ $p["innerHTML"] = $p["innerHTML"][0];
+ }
+
+ switch($tag["tagName"]){
+
+ case "a":
+
+ $value =
+ $this->fuckhtml
+ ->getTextContent(
+ $tag
+ );
+
+ if(strtolower($value) == "wikipedia"){
+
+ if($c !== 0){
+ $answer["description"][$c - 1]["value"] =
+ rtrim($answer["description"][$c - 1]["value"]);
+ }
+ break;
+ }
+
+ $answer["description"][] = [
+ "type" => "link",
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $tag["attributes"]["href"]
+ ),
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $tag
+ )
+ ];
+ break;
+ }
+
+ $i++;
+ }
+ }
+
+ // get URL
+ $this->fuckhtml->load($infobox_html[2]);
+
+ $answer["url"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ )[0]
+ ["attributes"]
+ ["href"]
+ );
+
+ // append answer
+ $out["answer"][] = $answer;
+ }
+
+ /*
+ Get news
+ */
+ $this->fuckhtml->load($html);
+
+ $news =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "results news-results",
+ "div"
+ );
+
+ if(count($news) !== 0){
+
+ $this->fuckhtml->load($news[0]);
+
+ $lis =
+ $this->fuckhtml
+ ->getElementsByTagName("li");
+
+ foreach($lis as $li){
+
+ $this->fuckhtml->load($li);
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "ob",
+ "a"
+ );
+
+ if(count($a) === 0){
+
+ continue;
+ }
+
+ $a = $a[0];
+
+ $date =
+ explode(
+ " - ",
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "span"
+ )[0]
+ )
+ );
+
+ $date =
+ strtotime(
+ $date[count($date) - 1]
+ );
+
+ $out["news"][] = [
+ "title" =>
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $a
+ )
+ ),
+ "description" => null,
+ "date" => $date,
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ )
+ ];
+ }
+ }
+
+ /*
+ Get next page
+ */
+ $this->fuckhtml->load($html);
+
+ $pagination =
+ $this->fuckhtml
+ ->getElementsByClassName("pagination");
+
+ if(count($pagination) !== false){
+
+ $this->fuckhtml->load($pagination[0]);
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ foreach($as as $a){
+
+ if($a["innerHTML"] == "Next"){
+
+ $out["npt"] = $this->backend->store(
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ ),
+ "web",
+ $proxy
+ );
+ }
+ }
+ }
+
+ return $out;
+ }
+
+ public function news($get){
+
+ $search = $get["s"];
+
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "news" => []
+ ];
+
+ try{
+ $html =
+ $this->get(
+ $this->backend->get_ip(),
+ "https://www.mojeek.com/search",
+ [
+ "q" => $search,
+ "fmt" => "news"
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+ /*
+ $handle = fopen("scraper/mojeek.html", "r");
+ $html = fread($handle, filesize("scraper/mojeek.html"));
+ fclose($handle);
+ */
+
+ $this->fuckhtml->load($html);
+
+ $this->detect_block();
+
+ $articles =
+ $this->fuckhtml->getElementsByTagName("article");
+
+ foreach($articles as $article){
+
+ $this->fuckhtml->load($article);
+
+ $data = [
+ "title" => null,
+ "author" => null,
+ "description" => null,
+ "date" => null,
+ "thumb" =>
+ [
+ "url" => null,
+ "ratio" => null
+ ],
+ "url" => null
+ ];
+
+ $a = $this->fuckhtml->getElementsByTagName("a")[0];
+
+ $data["title"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["title"]
+ );
+
+ $data["url"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ );
+
+ $p = $this->fuckhtml->getElementsByTagName("p");
+
+ $data["description"] =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "s",
+ $p
+ )[0]
+ )
+ );
+
+ if($data["description"] == ""){
+
+ $data["description"] = null;
+ }
+
+ // get date from big node
+ $date =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "date",
+ $p
+ );
+
+ if(count($date) !== 0){
+
+ $data["date"] =
+ strtotime(
+ $this->fuckhtml
+ ->getTextContent(
+ $date[0]
+ )
+ );
+ }
+
+ // grep date + author
+ $s =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "i",
+ $p
+ )[0];
+
+ $this->fuckhtml->load($s);
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ if(count($a) !== 0){
+
+ // parse big node information
+ $data["author"] =
+ htmlspecialchars_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $a[0]["innerHTML"]
+ )
+ );
+ }else{
+
+ // parse smaller nodes
+ $replace =
+ $this->fuckhtml
+ ->getElementsByTagName("time")[0];
+
+ $data["date"] =
+ strtotime(
+ $this->fuckhtml
+ ->getTextContent(
+ $replace
+ )
+ );
+
+ $s["innerHTML"] =
+ str_replace(
+ $replace["outerHTML"],
+ "",
+ $s["innerHTML"]
+ );
+
+ $data["author"] =
+ preg_replace(
+ '/ &bull; $/',
+ "",
+ $s["innerHTML"]
+ );
+ }
+
+ $out["news"][] = $data;
+ }
+
+ return $out;
+ }
+
+ private function detect_block(){
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "title"
+ );
+
+ if(
+ count($title) !== 0 &&
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]["innerHTML"]
+ ) == "403 - Forbidden"
+ ){
+
+ throw new Exception("Mojeek blocked this instance or request proxy.");
+ }
+ }
+
+ private function titledots($title){
+
+ return trim($title, ". \t\n\r\0\x0B");
+ }
+}
+
diff --git a/scraper/mwmbl.php b/scraper/mwmbl.php
new file mode 100644
index 0000000..631b90c
--- /dev/null
+++ b/scraper/mwmbl.php
@@ -0,0 +1,236 @@
+<?php
+
+class mwmbl{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("mwmbl");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [];
+ }
+
+ private function get($proxy, $url, $get = []){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ // use http2
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Referer: https://beta.mwmbl.org/",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: same-origin",
+ "Priority: u=0, i",
+ "Sec-Fetch-User: ?1"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ try{
+ $html = $this->get(
+ $this->backend->get_ip(), // no next page!
+ "https://beta.mwmbl.org/",
+ [
+ "q" => $search
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch HTML. If you're getting a timeout, make sure you have curl-impersonate setup.");
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $this->fuckhtml->load($html);
+
+ $results =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "result",
+ "li"
+ );
+
+ foreach($results as $result){
+
+ $this->fuckhtml->load($result);
+
+ $p =
+ $this->fuckhtml
+ ->getElementsByTagName("p");
+
+ $sublinks = [];
+
+ $mores =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "result-link-more",
+ "div"
+ );
+
+ foreach($mores as $more){
+
+ $this->fuckhtml->load($more);
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "more",
+ "a"
+ );
+
+ if(count($as) === 0){
+
+ // ?? invalid
+ continue;
+ }
+
+ $sublinks[] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "more-title",
+ "span"
+ )[0]
+ )
+ ),
+ "description" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "more-extract",
+ "span"
+ )[0]
+ )
+ ),
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $as[0]
+ ["attributes"]
+ ["href"]
+ )
+ ];
+ }
+
+ // reset
+ $this->fuckhtml->load($result);
+
+ $out["web"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "title",
+ $p
+ )[0]
+ )
+ ),
+ "description" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "extract",
+ $p
+ )[0]
+ )
+ ),
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName("a")
+ [0]
+ ["attributes"]
+ ["href"]
+ ),
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => $sublinks,
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+
+ private function titledots($title){
+
+ return rtrim($title, "…");
+ }
+}
diff --git a/scraper/pinterest.php b/scraper/pinterest.php
new file mode 100644
index 0000000..4188bce
--- /dev/null
+++ b/scraper/pinterest.php
@@ -0,0 +1,439 @@
+<?php
+
+class pinterest{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("pinterest");
+ }
+
+ public function getfilters($page){
+
+ return [];
+ }
+
+ private function get($proxy, $url, $get = [], &$cookies, $header_data_post = null){
+
+ $curlproc = curl_init();
+
+ if($header_data_post === null){
+
+ // handling GET
+
+ // extract cookies
+ $cookies_tmp = [];
+ curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
+
+ $length = strlen($header);
+
+ $header = explode(":", $header, 2);
+
+ if(trim(strtolower($header[0])) == "set-cookie"){
+
+ $cookie_tmp = explode("=", trim($header[1]), 2);
+
+ $cookies_tmp[trim($cookie_tmp[0])] =
+ explode(";", $cookie_tmp[1], 2)[0];
+ }
+
+ return $length;
+ });
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: application/json, text/javascript, */*, q=0.01",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Referer: https://ca.pinterest.com/",
+ "X-Requested-With: XMLHttpRequest",
+ "X-APP-VERSION: 78f8764",
+ "X-Pinterest-AppState: active",
+ "X-Pinterest-Source-Url: /",
+ "X-Pinterest-PWS-Handler: www/index.js",
+ "screen-dpr: 1",
+ "is-preload-enabled: 1",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "Connection: keep-alive",
+ "Alt-Used: ca.pinterest.com",
+ "Priority: u=0",
+ "TE: trailers"]
+ );
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+ }else{
+
+ // handling POST (pagination)
+ $get = http_build_query($get);
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: application/json, text/javascript, */*, q=0.01",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Content-Type: application/x-www-form-urlencoded",
+ "Content-Length: " . strlen($get),
+ "Referer: https://ca.pinterest.com/",
+ "X-Requested-With: XMLHttpRequest",
+ "X-APP-VERSION: 78f8764",
+ "X-CSRFToken: " . $cookies["csrf"],
+ "X-Pinterest-AppState: active",
+ "X-Pinterest-Source-Url: /search/pins/?rs=ac&len=2&q=" . urlencode($header_data_post) . "&eq=" . urlencode($header_data_post),
+ "X-Pinterest-PWS-Handler: www/search/[scope].js",
+ "screen-dpr: 1",
+ "is-preload-enabled: 1",
+ "Origin: https://ca.pinterest.com",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "Connection: keep-alive",
+ "Alt-Used: ca.pinterest.com",
+ "Cookie: " . $cookies["cookie"],
+ "TE: trailers"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ // http2 bypass
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ if($header_data_post === null){
+
+ if(!isset($cookies_tmp["csrftoken"])){
+
+ throw new Exception("Failed to grep CSRF token");
+ }
+
+ $cookies = "";
+
+ foreach($cookies_tmp as $cookie_name => $cookie_value){
+
+ $cookies .= $cookie_name . "=" . $cookie_value . "; ";
+ }
+
+ $cookies = [
+ "csrf" => $cookies_tmp["csrftoken"],
+ "cookie" => rtrim($cookies, " ;")
+ ];
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$data, $proxy] =
+ $this->backend->get(
+ $get["npt"], "images"
+ );
+
+ $data = json_decode($data, true);
+
+ $search = $data["q"];
+ $cookies = $data["cookies"];
+
+ try{
+ $json =
+ $this->get(
+ $proxy,
+ "https://ca.pinterest.com/resource/BaseSearchResource/get/",
+ [
+ "source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed",
+ "data" => json_encode(
+ [
+ "options" => [
+ "applied_unified_filters" => null,
+ "appliedProductFilters" => "---",
+ "article" => null,
+ "auto_correction_disabled" => false,
+ "corpus" => null,
+ "customized_rerank_type" => null,
+ "domains" => null,
+ "dynamicPageSizeExpGroup" => null,
+ "filters" => null,
+ "journey_depth" => null,
+ "page_size" => null,
+ "price_max" => null,
+ "price_min" => null,
+ "query_pin_sigs" => null,
+ "query" => $data["q"],
+ "redux_normalize_feed" => true,
+ "request_params" => null,
+ "rs" => "typed",
+ "scope" => "pins",
+ "selected_one_bar_modules" => null,
+ "source_id" => null,
+ "source_module_id" => null,
+ "source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed",
+ "top_pin_id" => null,
+ "top_pin_ids" => null,
+ "bookmarks" => [
+ $data["bookmark"]
+ ]
+ ],
+ "context" => []
+ ],
+ JSON_UNESCAPED_SLASHES
+ )
+ ],
+ $cookies,
+ $search
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ // https://ca.pinterest.com/resource/BaseSearchResource/get/?source_url=%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac&data=%7B%22options%22%3A%7B%22applied_unified_filters%22%3Anull%2C%22appliedProductFilters%22%3A%22---%22%2C%22article%22%3Anull%2C%22auto_correction_disabled%22%3Afalse%2C%22corpus%22%3Anull%2C%22customized_rerank_type%22%3Anull%2C%22domains%22%3Anull%2C%22dynamicPageSizeExpGroup%22%3Anull%2C%22filters%22%3Anull%2C%22journey_depth%22%3Anull%2C%22page_size%22%3Anull%2C%22price_max%22%3Anull%2C%22price_min%22%3Anull%2C%22query_pin_sigs%22%3Anull%2C%22query%22%3A%22higurashi%20when%20they%20cry%22%2C%22redux_normalize_feed%22%3Atrue%2C%22request_params%22%3Anull%2C%22rs%22%3A%22ac%22%2C%22scope%22%3A%22pins%22%2C%22selected_one_bar_modules%22%3Anull%2C%22source_id%22%3Anull%2C%22source_module_id%22%3Anull%2C%22source_url%22%3A%22%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac%22%2C%22top_pin_id%22%3Anull%2C%22top_pin_ids%22%3Anull%7D%2C%22context%22%3A%7B%7D%7D&_=1736116313987
+ // source_url=%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac
+ // &data=%7B%22options%22%3A%7B%22applied_unified_filters%22%3Anull%2C%22appliedProductFilters%22%3A%22---%22%2C%22article%22%3Anull%2C%22auto_correction_disabled%22%3Afalse%2C%22corpus%22%3Anull%2C%22customized_rerank_type%22%3Anull%2C%22domains%22%3Anull%2C%22dynamicPageSizeExpGroup%22%3Anull%2C%22filters%22%3Anull%2C%22journey_depth%22%3Anull%2C%22page_size%22%3Anull%2C%22price_max%22%3Anull%2C%22price_min%22%3Anull%2C%22query_pin_sigs%22%3Anull%2C%22query%22%3A%22higurashi%20when%20they%20cry%22%2C%22redux_normalize_feed%22%3Atrue%2C%22request_params%22%3Anull%2C%22rs%22%3A%22ac%22%2C%22scope%22%3A%22pins%22%2C%22selected_one_bar_modules%22%3Anull%2C%22source_id%22%3Anull%2C%22source_module_id%22%3Anull%2C%22source_url%22%3A%22%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac%22%2C%22top_pin_id%22%3Anull%2C%22top_pin_ids%22%3Anull%7D%2C%22context%22%3A%7B%7D%7D
+ // &_=1736116313987
+
+ $source_url = "/search/pins/?q=" . urlencode($search) . "&rs=" . urlencode($search);
+
+ $filter = [
+ "source_url" => $source_url,
+ "rs" => "typed",
+ "data" =>
+ json_encode(
+ [
+ "options" => [
+ "applied_unified_filters" => null,
+ "appliedProductFilters" => "---",
+ "article" => null,
+ "corpus" => null,
+ "customized_rerank_type" => null,
+ "domains" => null,
+ "dynamicPageSizeExpGroup" => null,
+ "filters" => null,
+ "journey_depth" => null,
+ "page_size" => null,
+ "price_max" => null,
+ "price_min" => null,
+ "query_pin_sigs" => null,
+ "query" => $search,
+ "redux_normalize_feed" => true,
+ "request_params" => null,
+ "rs" => "ac",
+ "scope" => "pins", // pins, boards, videos,
+ "selected_one_bar_modules" => null,
+ "source_id" => null,
+ "source_module_id" => null,
+ "source_url" => $source_url,
+ "top_pin_id" => null,
+ "top_pin_ids" => null
+ ],
+ "context" => []
+ ]
+ ),
+ "_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
+ ];
+
+ $proxy = $this->backend->get_ip();
+ $cookies = [];
+
+ try{
+ $json =
+ $this->get(
+ $proxy,
+ "https://ca.pinterest.com/resource/BaseSearchResource/get/",
+ $filter,
+ $cookies,
+ null
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ if(
+ !isset(
+ $json["resource_response"]
+ ["status"]
+ )
+ ){
+
+ throw new Exception("Unknown API failure");
+ }
+
+ if($json["resource_response"]["status"] != "success"){
+
+ $status = "Got non-OK response: " . $json["resource_response"]["status"];
+
+ if(
+ isset(
+ $json["resource_response"]["message"]
+ )
+ ){
+
+ $status .= " - " . $json["resource_response"]["message"];
+ }
+
+ throw new Exception($status);
+ }
+
+ if(
+ isset(
+ $json["resource_response"]["sensitivity"]
+ ["notices"][0]["description"]["text"]
+ )
+ ){
+
+ throw new Exception(
+ "Pinterest returned a notice: " .
+ $json["resource_response"]["sensitivity"]["notices"][0]["description"]["text"]
+ );
+ }
+
+ // get NPT
+ if(isset($json["resource_response"]["bookmark"])){
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode([
+ "q" => $search,
+ "bookmark" => $json["resource_response"]["bookmark"],
+ "cookies" => $cookies
+ ]),
+ "images",
+ $proxy
+ );
+ }
+
+ foreach(
+ $json
+ ["resource_response"]
+ ["data"]
+ ["results"]
+ as $item
+ ){
+
+ switch($item["type"]){
+
+ case "pin":
+ case "board":
+
+ /*
+ Handle image object
+ */
+ $images = array_values($item["images"]);
+ $image = &$images[count($images) - 1]; // original
+ $thumb = &$images[1]; // 236x
+
+ $title = [];
+
+ if(
+ isset($item["grid_title"]) &&
+ trim($item["grid_title"]) != ""
+ ){
+
+ $title[] = $item["grid_title"];
+ }
+
+ if(
+ isset($item["description"]) &&
+ trim($item["description"]) != ""
+ ){
+
+ $title[] = $item["description"];
+ }
+
+ $title = implode(": ", $title);
+
+ if(
+ $title == "" &&
+ isset($item["board"]["name"]) &&
+ trim($item["board"]["name"]) != ""
+ ){
+
+ $title = $item["board"]["name"];
+ }
+
+ if($title == ""){
+
+ $title = null;
+ }
+
+ $out["image"][] = [
+ "title" => $title,
+ "source" => [
+ [
+ "url" => $image["url"],
+ "width" => (int)$image["width"],
+ "height" => (int)$image["height"]
+ ],
+ [
+ "url" => $thumb["url"],
+ "width" => (int)$thumb["width"],
+ "height" => (int)$thumb["height"]
+ ]
+ ],
+ "url" =>
+ $item["link"] === null ?
+ "https://ca.pinterest.com/pin/" . $item["id"] :
+ $item["link"]
+ ];
+ break;
+ }
+ }
+
+ return $out;
+ }
+}
diff --git a/scraper/qwant.php b/scraper/qwant.php
new file mode 100644
index 0000000..ecbd4ec
--- /dev/null
+++ b/scraper/qwant.php
@@ -0,0 +1,993 @@
+<?php
+
+class qwant{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("qwant");
+ }
+
+ public function getfilters($page){
+
+ $base = [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "en_US" => "United States",
+ "fr_FR" => "France",
+ "en_GB" => "Great Britain",
+ "de_DE" => "Germany",
+ "it_IT" => "Italy",
+ "es_AR" => "Argentina",
+ "en_AU" => "Australia",
+ "es_ES" => "Spain (es)",
+ "ca_ES" => "Spain (ca)",
+ "cs_CZ" => "Czech Republic",
+ "ro_RO" => "Romania",
+ "el_GR" => "Greece",
+ "zh_CN" => "China",
+ "zh_HK" => "Hong Kong",
+ "en_NZ" => "New Zealand",
+ "fr_FR" => "France",
+ "th_TH" => "Thailand",
+ "ko_KR" => "South Korea",
+ "sv_SE" => "Sweden",
+ "nb_NO" => "Norway",
+ "da_DK" => "Denmark",
+ "hu_HU" => "Hungary",
+ "et_EE" => "Estonia",
+ "es_MX" => "Mexico",
+ "es_CL" => "Chile",
+ "en_CA" => "Canada (en)",
+ "fr_CA" => "Canada (fr)",
+ "en_MY" => "Malaysia",
+ "bg_BG" => "Bulgaria",
+ "fi_FI" => "Finland",
+ "pl_PL" => "Poland",
+ "nl_NL" => "Netherlands",
+ "pt_PT" => "Portugal",
+ "de_CH" => "Switzerland (de)",
+ "fr_CH" => "Switzerland (fr)",
+ "it_CH" => "Switzerland (it)",
+ "de_AT" => "Austria",
+ "fr_BE" => "Belgium (fr)",
+ "nl_BE" => "Belgium (nl)",
+ "en_IE" => "Ireland",
+ "he_IL" => "Israel"
+ ]
+ ]
+ ];
+
+ switch($page){
+
+ case "web":
+ $base = array_merge(
+ $base,
+ [
+ "time" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "day" => "Past 24 hours",
+ "week" => "Past week",
+ "month" => "Past month"
+ ]
+ ],
+ "extendedsearch" => [
+ // no display, wont show in interface
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ]
+ ]
+ );
+ break;
+
+ case "images":
+ $base = array_merge(
+ $base,
+ [
+ "time" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "day" => "Past 24 hours",
+ "week" => "Past week",
+ "month" => "Past month"
+ ]
+ ],
+ "size" => [
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "large" => "Large",
+ "medium" => "Medium",
+ "small" => "Small"
+ ]
+ ],
+ "color" => [
+ "display" => "Color",
+ "option" => [
+ "any" => "Any color",
+ "coloronly" => "Color only",
+ "monochrome" => "Monochrome",
+ "black" => "Black",
+ "brown" => "Brown",
+ "gray" => "Gray",
+ "white" => "White",
+ "yellow" => "Yellow",
+ "orange" => "Orange",
+ "red" => "Red",
+ "pink" => "Pink",
+ "purple" => "Purple",
+ "blue" => "Blue",
+ "teal" => "Teal",
+ "green" => "Green"
+ ]
+ ],
+ "imagetype" => [
+ "display" => "Type",
+ "option" => [
+ "any" => "Any type",
+ "animatedgif" => "Animated GIF",
+ "photo" => "Photograph",
+ "transparent" => "Transparent"
+ ]
+ ],
+ "license" => [
+ "display" => "License",
+ "option" => [
+ "any" => "Any license",
+ "share" => "Non-commercial reproduction and sharing",
+ "sharecommercially" => "Reproduction and sharing",
+ "modify" => "Non-commercial reproduction, sharing and modification",
+ "modifycommercially" => "Reproduction, sharing and modification",
+ "public" => "Public domain"
+ ]
+ ]
+ ]
+ );
+ break;
+
+ case "videos":
+ $base = array_merge(
+ $base,
+ [
+ "order" => [
+ "display" => "Order by",
+ "option" => [
+ "relevance" => "Relevance",
+ "views" => "Views",
+ "date" => "Most recent",
+ ]
+ ],
+ "source" => [
+ "display" => "Source",
+ "option" => [
+ "any" => "Any source",
+ "youtube" => "YouTube",
+ "dailymotion" => "Dailymotion",
+ ]
+ ]
+ ]
+ );
+ break;
+
+ case "news":
+ $base = array_merge(
+ $base,
+ [
+ "time" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "hour" => "Less than 1 hour ago",
+ "day" => "Past 24 hours",
+ "week" => "Past week",
+ "month" => "Past month"
+ ]
+ ],
+ "order" => [
+ "display" => "Order by",
+ "option" => [
+ "relevance" => "Relevance",
+ "date" => "Most recent"
+ ]
+ ]
+ ]
+ );
+ break;
+ }
+
+ return $base;
+ }
+
+ private function get($proxy, $url, $get = []){
+
+ $headers = [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: application/json, text/plain, */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Origin: https://www.qwant.com",
+ "Referer: https://www.qwant.com/",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-site",
+ "TE: trailers"
+ ];
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ // Bypass HTTP/2 check
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+
+ return $data;
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ // get next page data
+ [$params, $proxy] = $this->backend->get($get["npt"], "web");
+
+ $params = json_decode($params, true);
+
+ }else{
+
+ // get _GET data instead
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ if(strlen($search) > 2048){
+
+ throw new Exception("Search term is too long!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ $params = [
+ "q" => $search,
+ "freshness" => $get["time"],
+ "count" => 10,
+ "locale" => $get["country"],
+ "offset" => 0,
+ "device" => "desktop",
+ "tgp" => 3,
+ "safesearch" => 0,
+ "displayed" => "true"
+ ];
+
+ switch($get["nsfw"]){
+
+ case "yes": $params["safesearch"] = 0; break;
+ case "maybe": $params["safesearch"] = 1; break;
+ case "no": $params["safesearch"] = 2; break;
+ }
+ }
+ /*
+ $handle = fopen("scraper/qwant_web.json", "r");
+ $json = fread($handle, filesize("scraper/qwant_web.json"));
+ fclose($handle);*/
+
+ try{
+ $json =
+ $this->get(
+ $proxy,
+ "https://fdn.qwant.com/v3/search/web",
+ $params
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === NULL){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ if(isset($json["data"]["message"][0])){
+
+ throw new Exception("Server returned an error:\n" . $json["data"]["message"][0]);
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ if(
+ $json["status"] != "success" &&
+ $json["data"]["error_code"] === 5
+ ){
+
+ // no results
+ return $out;
+ }
+
+ $this->detect_errors($json);
+
+ if(!isset($json["data"]["result"]["items"]["mainline"])){
+
+ throw new Exception("Server did not return a result object");
+ }
+
+ // data is OK, parse
+
+ // get instant answer
+ if(
+ $get["extendedsearch"] == "yes" &&
+ isset($json["data"]["result"]["items"]["sidebar"][0]["endpoint"])
+ ){
+
+ try{
+ $answer =
+ $this->get(
+ $proxy,
+ "https://api.qwant.com/v3" .
+ $json["data"]["result"]["items"]["sidebar"][0]["endpoint"],
+ []
+ );
+
+ $answer = json_decode($answer, true);
+
+ if(
+ $answer === null ||
+ $answer["status"] != "success" ||
+ $answer["data"]["result"] === null
+ ){
+
+ throw new Exception();
+ }
+
+ // parse answer
+ $out["answer"][] = [
+ "title" => $answer["data"]["result"]["title"],
+ "description" => [
+ [
+ "type" => "text",
+ "value" => $this->trimdots($answer["data"]["result"]["description"])
+ ]
+ ],
+ "url" => $answer["data"]["result"]["url"],
+ "thumb" =>
+ $answer["data"]["result"]["thumbnail"]["landscape"] == null ?
+ null :
+ $this->unshitimage($answer["data"]["result"]["thumbnail"]["landscape"]),
+ "table" => [],
+ "sublink" => []
+ ];
+
+ }catch(Exception $error){
+
+ // do nothing in case of failure
+ }
+
+ }
+
+ // get word correction
+ if(isset($json["data"]["query"]["queryContext"]["alteredQuery"])){
+
+ $out["spelling"] = [
+ "type" => "including",
+ "using" => $json["data"]["query"]["queryContext"]["alteredQuery"],
+ "correction" => $json["data"]["query"]["queryContext"]["alterationOverrideQuery"]
+ ];
+ }
+
+ // check for next page
+ if($json["data"]["result"]["lastPage"] === false){
+
+ $params["offset"] = $params["offset"] + 10;
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($params),
+ "web",
+ $proxy
+ );
+ }
+
+ // parse results
+ foreach($json["data"]["result"]["items"]["mainline"] as $item){
+
+ switch($item["type"]){ // ignores ads
+
+ case "web":
+
+ $first_iteration = true;
+ foreach($item["items"] as $result){
+
+ if(isset($result["thumbnailUrl"])){
+
+ $thumb = [
+ "url" => $this->unshitimage($result["thumbnailUrl"]),
+ "ratio" => "16:9"
+ ];
+ }else{
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ $sublinks = [];
+ if(isset($result["links"])){
+
+ foreach($result["links"] as $link){
+
+ $sublinks[] = [
+ "title" => $this->trimdots($link["title"]),
+ "date" => null,
+ "description" => isset($link["desc"]) ? $this->trimdots($link["desc"]) : null,
+ "url" => $link["url"]
+ ];
+ }
+ }
+
+ // detect gibberish results
+ if(
+ $first_iteration &&
+ !isset($result["urlPingSuffix"])
+ ){
+
+ throw new Exception("Qwant returned gibberish results");
+ }
+
+ $out["web"][] = [
+ "title" => $this->trimdots($result["title"]),
+ "description" => $this->trimdots($result["desc"]),
+ "url" => $result["url"],
+ "date" => null,
+ "type" => "web",
+ "thumb" => $thumb,
+ "sublink" => $sublinks,
+ "table" => []
+ ];
+
+ $first_iteration = false;
+ }
+ break;
+
+ case "images":
+ foreach($item["items"] as $image){
+
+ $out["image"][] = [
+ "title" => $image["title"],
+ "source" => [
+ [
+ "url" => $image["media"],
+ "width" => (int)$image["width"],
+ "height" => (int)$image["height"]
+ ],
+ [
+ "url" => $this->unshitimage($image["thumbnail"]),
+ "width" => $image["thumb_width"],
+ "height" => $image["thumb_height"]
+ ]
+ ],
+ "url" => $image["url"]
+ ];
+ }
+ break;
+
+ case "videos":
+ foreach($item["items"] as $video){
+
+ $out["video"][] = [
+ "title" => $video["title"],
+ "description" => null,
+ "date" => (int)$video["date"],
+ "duration" => $video["duration"] === null ? null : $video["duration"] / 1000,
+ "views" => null,
+ "thumb" =>
+ $video["thumbnail"] === null ?
+ [
+ "url" => null,
+ "ratio" => null,
+ ] :
+ [
+ "url" => $this->unshitimage($video["thumbnail"]),
+ "ratio" => "16:9",
+ ],
+ "url" => $video["url"]
+ ];
+ }
+ break;
+
+ case "related_searches":
+ foreach($item["items"] as $related){
+
+ $out["related"][] = $related["text"];
+ }
+ break;
+ }
+ }
+
+ return $out;
+ }
+
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$params, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "images"
+ );
+
+ $params = json_decode($params, true);
+ }else{
+
+ $search = $get["s"];
+
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ $params = [
+ "t" => "images",
+ "q" => $search,
+ "count" => 125,
+ "locale" => $get["country"],
+ "offset" => 0, // increment by 125
+ "device" => "desktop",
+ "tgp" => 3
+ ];
+
+ if($get["time"] != "any"){
+
+ $params["freshness"] = $get["time"];
+ }
+
+ foreach(["size", "color", "imagetype", "license"] as $p){
+
+ if($get[$p] != "any"){
+
+ $params[$p] = $get[$p];
+ }
+ }
+
+ switch($get["nsfw"]){
+
+ case "yes": $params["safesearch"] = 0; break;
+ case "maybe": $params["safesearch"] = 1; break;
+ case "no": $params["safesearch"] = 2; break;
+ }
+ }
+
+ try{
+ $json = $this->get(
+ $proxy,
+ "https://api.qwant.com/v3/search/images",
+ $params,
+ );
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get JSON");
+ }
+
+ /*
+ $handle = fopen("scraper/yandex.json", "r");
+ $json = fread($handle, filesize("scraper/yandex.json"));
+ fclose($handle);*/
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $this->detect_errors($json);
+
+ if(isset($json["data"]["result"]["items"]["mainline"])){
+
+ throw new Exception("Qwant returned gibberish results");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ if($json["data"]["result"]["lastPage"] === false){
+
+ $params["offset"] = $params["offset"] + 125;
+
+ $out["npt"] = $this->backend->store(
+ json_encode($params),
+ "images",
+ $proxy
+ );
+ }
+
+ foreach($json["data"]["result"]["items"] as $image){
+
+ $out["image"][] = [
+ "title" => $this->trimdots($image["title"]),
+ "source" => [
+ [
+ "url" => $image["media"],
+ "width" => $image["width"],
+ "height" => $image["height"]
+ ],
+ [
+ "url" => $this->unshitimage($image["thumbnail"]),
+ "width" => $image["thumb_width"],
+ "height" => $image["thumb_height"]
+ ]
+ ],
+ "url" => $image["url"]
+ ];
+ }
+
+ return $out;
+ }
+
+ public function video($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $params = [
+ "t" => "videos",
+ "q" => $search,
+ "count" => 50,
+ "locale" => $get["country"],
+ "offset" => 0, // dont implement pagination
+ "device" => "desktop",
+ "tgp" => 3
+ ];
+
+ switch($get["nsfw"]){
+
+ case "yes": $params["safesearch"] = 0; break;
+ case "maybe": $params["safesearch"] = 1; break;
+ case "no": $params["safesearch"] = 2; break;
+ }
+
+ try{
+ $json =
+ $this->get(
+ $this->backend->get_ip(),
+ "https://api.qwant.com/v3/search/videos",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch JSON");
+ }
+
+ /*
+ $handle = fopen("scraper/yandex-video.json", "r");
+ $json = fread($handle, filesize("scraper/yandex-video.json"));
+ fclose($handle);
+ */
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Could not parse JSON");
+ }
+
+ $this->detect_errors($json);
+
+ if(isset($json["data"]["result"]["items"]["mainline"])){
+
+ throw new Exception("Qwant returned gibberish results");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ foreach($json["data"]["result"]["items"] as $video){
+
+ if(empty($video["thumbnail"])){
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }else{
+
+ $thumb = [
+ "url" => $this->unshitimage($video["thumbnail"]),
+ "ratio" => "16:9"
+ ];
+ }
+
+ $duration = (int)$video["duration"];
+
+ $out["video"][] = [
+ "title" => $video["title"],
+ "description" => $this->limitstrlen($video["desc"]),
+ "author" => [
+ "name" => $video["channel"],
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => (int)$video["date"],
+ "duration" => $duration === 0 ? null : $duration,
+ "views" => null,
+ "thumb" => $thumb,
+ "url" => preg_replace("/\?syndication=.+/", "", $video["url"])
+ ];
+ }
+
+ return $out;
+ }
+
+ public function news($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $params = [
+ "t" => "news",
+ "q" => $search,
+ "count" => 50,
+ "locale" => $get["country"],
+ "offset" => 0, // dont implement pagination
+ "device" => "desktop",
+ "tgp" => 3
+ ];
+
+ switch($get["nsfw"]){
+
+ case "yes": $params["safesearch"] = 0; break;
+ case "maybe": $params["safesearch"] = 1; break;
+ case "no": $params["safesearch"] = 2; break;
+ }
+
+ try{
+ $json =
+ $this->get(
+ $this->backend->get_ip(),
+ "https://api.qwant.com/v3/search/news",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch JSON");
+ }
+
+ /*
+ $handle = fopen("scraper/yandex-video.json", "r");
+ $json = fread($handle, filesize("scraper/yandex-video.json"));
+ fclose($handle);
+ */
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Could not parse JSON");
+ }
+
+ $this->detect_errors($json);
+
+ if(isset($json["data"]["result"]["items"]["mainline"])){
+
+ throw new Exception("Qwant returned gibberish results");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "news" => []
+ ];
+
+ foreach($json["data"]["result"]["items"] as $news){
+
+ if(empty($news["media"][0]["pict_big"]["url"])){
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }else{
+
+ $thumb = [
+ "url" => $this->unshitimage($news["media"][0]["pict_big"]["url"]),
+ "ratio" => "16:9"
+ ];
+ }
+
+ $out["news"][] = [
+ "title" => $news["title"],
+ "author" => $news["press_name"],
+ "description" => $this->trimdots($news["desc"]),
+ "date" => (int)$news["date"],
+ "thumb" => $thumb,
+ "url" => $news["url"]
+ ];
+ }
+
+ return $out;
+ }
+
+ private function detect_errors($json){
+
+ if(
+ isset($json["status"]) &&
+ $json["status"] == "error"
+ ){
+
+ if(isset($json["data"]["error_data"]["captchaUrl"])){
+
+ throw new Exception("Qwant returned a captcha");
+ }elseif(isset($json["data"]["error_data"]["error_code"])){
+
+ throw new Exception(
+ "Qwant returned an API error: " .
+ $json["data"]["error_data"]["error_code"]
+ );
+ }
+
+ throw new Exception("Qwant returned an API error");
+ }
+ }
+
+ private function limitstrlen($text){
+
+ return explode("\n", wordwrap($text, 300, "\n"))[0];
+ }
+
+ private function trimdots($text){
+
+ return trim($text, ". ");
+ }
+
+ private function unshitimage($url){
+
+ // https://s1.qwant.com/thumbr/0x0/8/d/f6de4deb2c2b12f55d8bdcaae576f9f62fd58a05ec0feeac117b354d1bf5c2/th.jpg?u=https%3A%2F%2Fwww.bing.com%2Fth%3Fid%3DOIP.vvDWsagzxjoKKP_rOqhwrQAAAA%26w%3D160%26h%3D160%26c%3D7%26pid%3D5.1&q=0&b=1&p=0&a=0
+ // https://s2.qwant.com/thumbr/474x289/7/f/412d13b3fe3a03eb2b89633c8e88b609b7d0b93cdd9a5e52db3c663e41e65e/th.jpg?u=https%3A%2F%2Ftse.mm.bing.net%2Fth%3Fid%3DOIP.9Tm_Eo6m7V7ltN19mxduDgHaEh%26pid%3DApi&q=0&b=1&p=0&a=0
+
+ $image = parse_url($url);
+
+ if(
+ !isset($image["host"]) ||
+ !isset($image["query"])
+ ){
+
+ // cant do anything
+ return $url;
+ }
+
+ $id = null;
+
+ if(
+ preg_match(
+ '/s[0-9]+\.qwant\.com$/',
+ $image["host"]
+ )
+ ){
+
+ parse_str($image["query"], $str);
+
+ // we're being served a proxy URL
+ if(isset($str["u"])){
+
+ $bing_url = $str["u"];
+ }else{
+
+ // give up
+ return $url;
+ }
+ }
+
+ // parse bing URL
+ $id = null;
+ $image = parse_url($bing_url);
+
+ if(isset($image["query"])){
+
+ parse_str($image["query"], $str);
+
+ if(isset($str["id"])){
+
+ $id = $str["id"];
+ }
+ }
+
+ if($id === null){
+
+ $id = explode("/th/id/", $image["path"], 2);
+
+ if(count($id) !== 2){
+
+ // malformed
+ return $url;
+ }
+
+ $id = $id[1];
+ }
+
+ if(is_array($id)){
+
+ // fuck off, let proxy.php deal with it
+ return $url;
+ }
+
+ return "https://" . $image["host"] . "/th?id=" . rawurlencode($id);
+ }
+}
diff --git a/scraper/sc.php b/scraper/sc.php
new file mode 100644
index 0000000..7083c42
--- /dev/null
+++ b/scraper/sc.php
@@ -0,0 +1,512 @@
+<?php
+
+class sc{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("sc");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [
+ "type" => [
+ "display" => "Type",
+ "option" => [
+ "any" => "Any type",
+ "track" => "Tracks",
+ "author" => "People",
+ "album" => "Albums",
+ "playlist" => "Playlists",
+ "goplus" => "Go+ Tracks"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = [], $web_req = false){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ // use http2
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ if($web_req === false){
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Referer: https://soundcloud.com/",
+ "Origin: https://soundcloud.com",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-site",
+ "Priority: u=1"]
+ );
+ }else{
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: cross-site",
+ "Priority: u=1",
+ "TE: trailers"]
+ );
+ }
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function music($get, $last_attempt = false){
+
+ if($get["npt"]){
+
+ [$params, $proxy] = $this->backend->get($get["npt"], "music");
+ $params = json_decode($params, true);
+
+ $url = $params["url"];
+ unset($params["url"]);
+
+ }else{
+
+ // normal search:
+ // https://api-v2.soundcloud.com/search?q=freddie%20dredd&variant_ids=&facet=model&user_id=351062-302234-707916-795081&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
+
+ // soundcloud go+ search:
+ // https://api-v2.soundcloud.com/search/tracks?q=freddie%20dredd&variant_ids=&filter.content_tier=SUB_HIGH_TIER&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
+
+ // tracks search:
+ // https://api-v2.soundcloud.com/search/tracks?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
+
+ // users search:
+ // https://api-v2.soundcloud.com/search/users?q=freddie%20dredd&variant_ids=&facet=place&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
+
+ // albums search:
+ // https://api-v2.soundcloud.com/search/albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
+
+ // playlists search:
+ // https://api-v2.soundcloud.com/search/playlists_without_albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $type = $get["type"];
+ $proxy = $this->backend->get_ip();
+ $token = $this->get_token($proxy);
+
+ switch($type){
+
+ case "any":
+ $url = "https://api-v2.soundcloud.com/search";
+ $params = [
+ "q" => $search,
+ "variant_ids" => "",
+ "facet" => "model",
+ "client_id" => $token,
+ "limit" => 20,
+ "offset" => 0,
+ "linked_partitioning" => 1,
+ "app_version" => 1713542117,
+ "app_locale" => "en"
+ ];
+ break;
+
+ case "track":
+ $url = "https://api-v2.soundcloud.com/search/tracks";
+ $params = [
+ "q" => $search,
+ "variant_ids" => "",
+ "facet_genre" => "",
+ "client_id" => $token,
+ "limit" => 20,
+ "offset" => 0,
+ "linked_partitioning" => 1,
+ "app_version" => 1713542117,
+ "app_locale" => "en"
+ ];
+ break;
+
+ case "author":
+ $url = "https://api-v2.soundcloud.com/search/users";
+ $params = [
+ "q" => $search,
+ "variant_ids" => "",
+ "facet" => "place",
+ "client_id" => $token,
+ "limit" => 20,
+ "offset" => 0,
+ "linked_partitioning" => 1,
+ "app_version" => 1713542117,
+ "app_locale" => "en"
+ ];
+ break;
+
+ case "album":
+ $url = "https://api-v2.soundcloud.com/search/albums";
+ $params = [
+ "q" => $search,
+ "variant_ids" => "",
+ "facet" => "genre",
+ "client_id" => $token,
+ "limit" => 20,
+ "offset" => 0,
+ "linked_partitioning" => 1,
+ "app_version" => 1713542117,
+ "app_locale" => "en"
+ ];
+ break;
+
+ case "playlist":
+ $url = "https://api-v2.soundcloud.com/search/playlists_without_albums";
+ $params = [
+ "q" => $search,
+ "variant_ids" => "",
+ "facet" => "genre",
+ "client_id" => $token,
+ "limit" => 20,
+ "offset" => 0,
+ "linked_partitioning" => 1,
+ "app_version" => 1713542117,
+ "app_locale" => "en"
+ ];
+ break;
+
+ case "goplus":
+ $url = "https://api-v2.soundcloud.com/search/tracks";
+ $params = [
+ "q" => $search,
+ "variant_ids" => "",
+ "filter.content_tier" => "SUB_HIGH_TIER",
+ "facet" => "genre",
+ "client_id" => $token,
+ "limit" => 20,
+ "offset" => 0,
+ "linked_partitioning" => 1,
+ "app_version" => 1713542117,
+ "app_locale" => "en"
+ ];
+ break;
+ }
+ }
+
+ try{
+
+ $json = $this->get($proxy, $url, $params);
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ /*
+ $handle = fopen("scraper/soundcloud.json", "r");
+ $json = fread($handle, filesize("scraper/soundcloud.json"));
+ fclose($handle);
+ */
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ if($last_attempt === true){
+
+ throw new Exception("Fetched an invalid token (please report!!)");
+ }
+
+ // token might've expired, get a new one and re-try search
+ $this->get_token($proxy);
+ return $this->music($get, true);
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "song" => [],
+ "playlist" => [],
+ "album" => [],
+ "podcast" => [],
+ "author" => [],
+ "user" => []
+ ];
+
+ /*
+ Get next page
+ */
+ if(isset($json["next_href"])){
+
+ $params["query_urn"] = $json["query_urn"];
+ $params["offset"] = $params["offset"] + 20;
+ $params["url"] = $url; // we will remove this later
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($params),
+ "music",
+ $proxy
+ );
+ }
+
+ /*
+ Scrape items
+ */
+ foreach($json["collection"] as $item){
+
+ switch($item["kind"]){
+
+ case "user":
+ // parse author
+ $out["author"][] = [
+ "title" => $item["username"],
+ "followers" => $item["followers_count"],
+ "description" => trim($item["track_count"] . " songs. " . $this->limitstrlen($item["description"])),
+ "thumb" => [
+ "url" => $item["avatar_url"],
+ "ratio" => "1:1"
+ ],
+ "url" => $item["permalink_url"]
+ ];
+ break;
+
+ case "playlist":
+ // parse playlist
+ $description = [];
+ $count = 0;
+
+ foreach($item["tracks"] as $song){
+
+ $count++;
+
+ if(!isset($song["title"])){
+
+ continue;
+ }
+
+ $description[] = $song["title"];
+ }
+
+ if(count($description) !== 0){
+
+ $description = trim($count . " songs. " . implode(", ", $description));
+ }else{
+
+ $description = "";
+ }
+
+ if(
+ isset($item["artwork_url"]) &&
+ !empty($item["artwork_url"])
+ ){
+
+ $thumb = [
+ "ratio" => "1:1",
+ "url" => $item["artwork_url"]
+ ];
+
+ }elseif(
+ isset($item["tracks"][0]["artwork_url"]) &&
+ !empty($item["tracks"][0]["artwork_url"])
+ ){
+
+ $thumb = [
+ "ratio" => "1:1",
+ "url" => $item["tracks"][0]["artwork_url"]
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["playlist"][] = [
+ "title" => $item["title"],
+ "description" => $this->limitstrlen($description),
+ "author" => [
+ "name" => $item["user"]["username"],
+ "url" => $item["user"]["permalink_url"],
+ "avatar" => $item["user"]["avatar_url"]
+ ],
+ "thumb" => $thumb,
+ "date" => strtotime($item["created_at"]),
+ "duration" => $item["duration"] / 1000,
+ "url" => $item["permalink_url"]
+ ];
+ break;
+
+ case "track":
+ if(stripos($item["monetization_model"], "TIER") === false){
+
+ $stream = [
+ "endpoint" => "sc",
+ "url" =>
+ $item["media"]["transcodings"][0]["url"] .
+ "?client_id=" . $token .
+ "&track_authorization=" .
+ $item["track_authorization"]
+ ];
+ }else{
+
+ $stream = [
+ "endpoint" => null,
+ "url" => null
+ ];
+ }
+
+ // parse track
+ $out["song"][] = [
+ "title" => $item["title"],
+ "description" => $item["description"] == "" ? null : $this->limitstrlen($item["description"]),
+ "url" => $item["permalink_url"],
+ "views" => $item["playback_count"],
+ "author" => [
+ "name" => $item["user"]["username"],
+ "url" => $item["user"]["permalink_url"],
+ "avatar" => $item["user"]["avatar_url"]
+ ],
+ "thumb" => [
+ "ratio" => "1:1",
+ "url" => $item["artwork_url"]
+ ],
+ "date" => strtotime($item["created_at"]),
+ "duration" => (int)$item["full_duration"] / 1000,
+ "stream" => $stream
+ ];
+ break;
+ }
+ }
+
+ return $out;
+ }
+
+ public function get_token($proxy){
+
+ $token = apcu_fetch("sc_token");
+
+ if($token !== false){
+
+ return $token;
+ }
+
+ // search through all javascript components on the main page
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://soundcloud.com",
+ [],
+ true
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch front page");
+ }
+
+ $this->fuckhtml->load($html);
+
+ $scripts =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "script"
+ );
+
+ foreach($scripts as $script){
+
+ if(
+ !isset($script["attributes"]["src"]) ||
+ strpos($script["attributes"]["src"], "sndcdn.com") === false
+ ){
+
+ continue;
+ }
+
+ try{
+ $js =
+ $this->get(
+ $proxy,
+ $script["attributes"]["src"],
+ []
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search token");
+ }
+
+ preg_match(
+ '/client_id=([^"]+)/',
+ $js,
+ $token
+ );
+
+ if(isset($token[1])){
+
+ apcu_store("sc_token", $token[1]);
+ return $token[1];
+ break;
+ }
+ }
+
+ throw new Exception("Did not find a Soundcloud token in the Javascript blobs");
+ }
+
+ private function limitstrlen($text){
+
+ return
+ explode(
+ "\n",
+ wordwrap(
+ str_replace(
+ ["\n\r", "\r\n", "\n", "\r"],
+ " ",
+ $text
+ ),
+ 300,
+ "\n"
+ ),
+ 2
+ )[0];
+ }
+}
diff --git a/scraper/sepiasearch.php b/scraper/sepiasearch.php
new file mode 100644
index 0000000..c59e12f
--- /dev/null
+++ b/scraper/sepiasearch.php
@@ -0,0 +1,541 @@
+<?php
+
+class sepiasearch{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("sepiasearch");
+ }
+
+ public function getfilters($page){
+
+ return [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes", // &sensitiveContent=both
+ "no" => "No" // &sensitiveContent=false
+ ]
+ ],
+ "language" => [
+ "display" => "Language", // &language=
+ "option" => [
+ "any" => "Any language",
+ "en" => "English",
+ "fr" => "Français",
+ "ar" => "العربية",
+ "ca" => "Català",
+ "cs" => "Čeština",
+ "de" => "Deutsch",
+ "el" => "ελληνικά",
+ "eo" => "Esperanto",
+ "es" => "Español",
+ "eu" => "Euskara",
+ "fa" => "فارسی",
+ "fi" => "Suomi",
+ "gd" => "Gàidhlig",
+ "gl" => "Galego",
+ "hr" => "Hrvatski",
+ "hu" => "Magyar",
+ "is" => "Íslenska",
+ "it" => "Italiano",
+ "ja" => "日本語",
+ "kab" => "Taqbaylit",
+ "nl" => "Nederlands",
+ "no" => "Norsk",
+ "oc" => "Occitan",
+ "pl" => "Polski",
+ "pt" => "Português (Brasil)",
+ "pt-PT" => "Português (Portugal)",
+ "ru" => "Pусский",
+ "sk" => "Slovenčina",
+ "sq" => "Shqip",
+ "sv" => "Svenska",
+ "th" => "ไทย",
+ "tok" => "Toki Pona",
+ "tr" => "Türkçe",
+ "uk" => "украї́нська мо́ва",
+ "vi" => "Tiếng Việt",
+ "zh-Hans" => "简体中文(中国)",
+ "zh-Hant" => "繁體中文(台灣)"
+ ]
+ ],
+ "type" => [
+ "display" => "Result type", // i handle this
+ "option" => [
+ "videos" => "Videos",
+ "playlists" => "Playlists",
+ "channels" => "Channels"
+ ]
+ ],
+ "sort" => [
+ "display" => "Sort by",
+ "option" => [
+ "best" => "Best match", // no filter
+ "-publishedAt" => "Newest", // sort=-publishedAt
+ "publishedAt" => "Oldest" // sort=publishedAt
+ ]
+ ],
+ "newer" => [ // &startDate=2025-07-26T04:00:00.000Z
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "duration" => [
+ "display" => "Duration",
+ "option" => [
+ "any" => "Any duration",
+ "short" => "Short (0-4mins)", // &durationRange=short
+ "medium" => "Medium (4-10 mins)",
+ "long" => "Long (10+ mins)",
+ ]
+ ],
+ "category" => [
+ "display" => "Category", // &categoryOneOf[]=
+ "option" => [
+ "any" => "Any category",
+ "1" => "Music",
+ "2" => "Films",
+ "3" => "Vehicles",
+ "4" => "Art",
+ "5" => "Sports",
+ "6" => "Travels",
+ "7" => "Gaming",
+ "8" => "People",
+ "9" => "Comedy",
+ "10" => "Entertainment",
+ "11" => "News & Politics",
+ "12" => "How To",
+ "13" => "Education",
+ "14" => "Activism",
+ "15" => "Science & Technology",
+ "16" => "Animals",
+ "17" => "Kids",
+ "18" => "Food"
+ ]
+ ],
+ "display" => [
+ "display" => "Display",
+ "option" => [
+ "any" => "Everything",
+ "true" => "Live videos", // &isLive=true
+ "false" => "VODs" // &isLive=false
+ ]
+ ],
+ "license" => [
+ "display" => "License", // &license=
+ "option" => [
+ "any" => "Any license",
+ "1" => "Attribution",
+ "2" => "Attribution - Share Alike",
+ "3" => "Attribution - No Derivatives",
+ "4" => "Attribution - Non Commercial",
+ "5" => "Attribution - Non Commercial - Share Alike",
+ "6" => "Attribution - Non Commercial - No Derivatives",
+ "7" => "Public Domain Dedication"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = []){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ curl_setopt(
+ $curlproc,
+ CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: application/json, text/plain, */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Referer: https://sepiasearch.org/search",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "Priority: u=0",
+ "TE: trailers"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function video($get){
+
+ if($get["npt"]){
+
+ [$npt, $proxy] =
+ $this->backend
+ ->get(
+ $get["npt"],
+ "videos"
+ );
+
+ $npt = json_decode($npt, true);
+ $type = $npt["type"];
+ $npt = $npt["npt"];
+ }else{
+
+ $proxy = $this->backend->get_ip();
+
+ $npt = [
+ "search" => $get["s"],
+ "start" => 0,
+ "count" => 20
+ ];
+
+ if($get["type"] == "videos"){
+
+ //
+ // Parse video filters
+ //
+ switch($get["nsfw"]){
+
+ case "yes": $npt["nsfw"] = "both"; break;
+ case "no": $npt["nsfw"] = "false"; break;
+ }
+
+ $npt["boostLanguages[]"] = "en";
+ if($get["language"] != "any"){
+
+ $npt["languageOneOf[]"] = $get["language"];
+ }
+
+ if($get["sort"] != "best"){
+
+ $npt["sort"] = $get["sort"];
+ }
+
+ if($get["newer"] !== false){
+
+ $date = new DateTime("@{$get["newer"]}");
+ $date->setTimezone(new DateTimeZone("UTC"));
+ $formatted = $date->format("Y-m-d\TH:i:s.000\Z");
+
+ $npt["startDate"] = $formatted;
+ }
+
+ switch($get["duration"]){
+
+ case "short":
+ $npt["durationMax"] = 240;
+ break;
+
+ case "medium":
+ $npt["durationMin"] = 240;
+ $npt["durationMax"] = 600;
+ break;
+
+ case "long":
+ $npt["durationMin"] = 600;
+ break;
+ }
+
+ if($get["category"] != "any"){
+
+ $npt["categoryOneOf[]"] = $get["category"];
+ }
+
+ if($get["display"] != "any"){
+
+ $npt["isLive"] = $get["display"];
+ }
+
+ if($get["license"] != "any"){
+
+ // typo in license, lol
+ $npt["licenceOneOf[]"] = $get["license"];
+ }
+ }
+
+ $type = $get["type"];
+ }
+
+ switch($type){
+
+ case "videos":
+ $url = "https://sepiasearch.org/api/v1/search/videos";
+ break;
+
+ case "channels":
+ $url = "https://sepiasearch.org/api/v1/search/video-channels";
+ break;
+
+ case "playlists":
+ $url = "https://sepiasearch.org/api/v1/search/video-playlists";
+ break;
+ }
+
+ //$json = file_get_contents("scraper/sepia.json");
+ try{
+
+ $json =
+ $this->get(
+ $proxy,
+ $url,
+ $npt
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to parse JSON");
+ }
+
+ if(isset($json["errors"])){
+
+ $msg = [];
+ foreach($json["errors"] as $error){
+
+ if(isset($error["msg"])){
+
+ $msg[] = $error["msg"];
+ }
+ }
+
+ throw new Exception("Sepia Search returned error(s): " . implode(", ", $msg));
+ }
+
+ if(!isset($json["data"])){
+
+ throw new Exception("Sepia Search did not return a data object");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+
+ switch($get["type"]){
+
+ case "videos":
+ foreach($json["data"] as $video){
+
+ if(count($video["account"]["avatars"]) !== 0){
+
+ $avatar =
+ $video["account"]["avatars"][count($video["account"]["avatars"]) - 1]["url"];
+ }else{
+
+ $avatar = null;
+ }
+
+ if($video["thumbnailUrl"] === null){
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" => $video["thumbnailUrl"]
+ ];
+ }
+
+ if($video["isLive"]){
+
+ $append = "livestream";
+ }else{
+
+ $append = "video";
+ }
+
+ $out[$append][] = [
+ "title" => $video["name"],
+ "description" =>
+ $this->limitstrlen(
+ $this->titledots(
+ $video["description"]
+ )
+ ),
+ "author" => [
+ "name" => $video["account"]["displayName"] . " ({$video["account"]["name"]})",
+ "url" => $video["account"]["url"],
+ "avatar" => $avatar
+ ],
+ "date" => strtotime($video["publishedAt"]),
+ "duration" => $video["isLive"] ? "_LIVE" : $video["duration"],
+ "views" => $video["views"],
+ "thumb" => $thumb,
+ "url" => $video["url"]
+ ];
+ }
+ break;
+
+ case "playlists":
+ foreach($json["data"] as $playlist){
+
+ if(count($playlist["ownerAccount"]["avatars"]) !== 0){
+
+ $avatar =
+ $playlist["ownerAccount"]["avatars"][count($playlist["ownerAccount"]["avatars"]) - 1]["url"];
+ }else{
+
+ $avatar = null;
+ }
+
+ if($playlist["thumbnailUrl"] === null){
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" => $playlist["thumbnailUrl"]
+ ];
+ }
+
+ $out["playlist"][] = [
+ "title" => $playlist["displayName"],
+ "description" =>
+ $this->limitstrlen(
+ $this->titledots(
+ $playlist["description"]
+ )
+ ),
+ "author" => [
+ "name" => $playlist["ownerAccount"]["displayName"] . " ({$playlist["ownerAccount"]["name"]})",
+ "url" => $playlist["ownerAccount"]["url"],
+ "avatar" => $avatar
+ ],
+ "date" => strtotime($playlist["createdAt"]),
+ "duration" => $playlist["videosLength"],
+ "views" => null,
+ "thumb" => $thumb,
+ "url" => $playlist["url"]
+ ];
+ }
+ break;
+
+ case "channels":
+ foreach($json["data"] as $channel){
+
+ if(count($channel["avatars"]) !== 0){
+
+ $thumb = [
+ "ratio" => "1:1",
+ "url" => $channel["avatars"][count($channel["avatars"]) - 1]["url"]
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["author"][] = [
+ "title" => $channel["displayName"] . " ({$channel["name"]})",
+ "followers" => $channel["followersCount"],
+ "description" =>
+ $channel["videosCount"] . " videos. " .
+ $this->limitstrlen(
+ $this->titledots(
+ $channel["description"]
+ )
+ ),
+ "thumb" => $thumb,
+ "url" => $channel["url"]
+ ];
+ }
+ break;
+ }
+
+ // get next page
+ if($json["total"] - 20 > $npt["start"]){
+
+ $npt["start"] += 20;
+
+ $npt = [
+ "type" => $get["type"],
+ "npt" => $npt
+ ];
+
+ $out["npt"] =
+ $this->backend
+ ->store(
+ json_encode($npt),
+ "videos",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+ private function titledots($title){
+
+ $substr = substr($title, -3);
+
+ if(
+ $substr == "..." ||
+ $substr == "…"
+ ){
+
+ return trim(substr($title, 0, -3), " \n\r\t\v\x00\0\x0B\xc2\xa0");
+ }
+
+ return trim($title, " \n\r\t\v\x00\0\x0B\xc2\xa0");
+ }
+
+ private function limitstrlen($text){
+
+ return
+ explode(
+ "\n",
+ wordwrap(
+ str_replace(
+ ["\n\r", "\r\n", "\n", "\r"],
+ " ",
+ $text
+ ),
+ 300,
+ "\n"
+ ),
+ 2
+ )[0];
+ }
+}
diff --git a/scraper/solofield.php b/scraper/solofield.php
new file mode 100644
index 0000000..4fe10e4
--- /dev/null
+++ b/scraper/solofield.php
@@ -0,0 +1,668 @@
+<?php
+
+class solofield{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("solofield");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No",
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = []){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Referer: https://solofield.net",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Cookie: cross-site-cookie=name; lno=35842050",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: same-origin",
+ "Sec-Fetch-User: ?1"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ [$query, $proxy] = $this->backend->get($get["npt"], "web");
+
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://solofield.net/search?" . $query,
+ []
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ }else{
+
+ $proxy = $this->backend->get_ip();
+
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://solofield.net/search",
+ [
+ "q" => $get["s"],
+ "ie" => "UTF-8",
+ "oe" => "UTF-8",
+ "hl" => "ja", // changing this doesnt do anything
+ "lr" => "lang_ja", // same here
+ //"ls" => "", // ??
+ "f" => ($get["nsfw"] == "yes" ? "off" : "on")
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ // check for errors and load the result div
+ if($this->error_and_load($html)){
+
+ return $out;
+ }
+
+ $items =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "g0",
+ "li"
+ );
+
+ foreach($items as $item){
+
+ $this->fuckhtml->load($item);
+
+ $title_tag =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "r",
+ "h3"
+ );
+
+ if(count($title_tag) === 0){
+
+ continue;
+ }
+
+ $this->fuckhtml->load($title_tag[0]);
+
+ $link =
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ )[0]
+ ["attributes"]
+ ["href"]
+ );
+
+ $this->fuckhtml->load($item);
+ $thumb =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "webshot",
+ "img"
+ );
+
+ if(count($thumb) !== 0){
+
+ $uri =
+ $this->fuckhtml
+ ->getTextContent(
+ $thumb[0]
+ ["attributes"]
+ ["src"]
+ );
+
+ if(stripos($uri, "now_printing") === false){
+
+ $thumb = [
+ "ratio" => "1:1",
+ "url" =>
+ "https://solofield.net" .
+ $this->fuckhtml
+ ->getTextContent(
+ $thumb[0]
+ ["attributes"]
+ ["src"]
+ )
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["web"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $title_tag[0]
+ ),
+ "description" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "s",
+ "div"
+ )[0]
+ ),
+ "url" => $link,
+ "date" => null,
+ "type" => "web",
+ "thumb" => $thumb,
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ // get next page
+ $this->get_npt($html, $proxy, $out, "web");
+
+ return $out;
+ }
+
+
+ public function image($get){
+
+ // no pagination
+ $html =
+ $this->get(
+ $this->backend->get_ip(),
+ "https://solofield.net/isearch",
+ [
+ "q" => $get["s"],
+ "ie" => "UTF-8",
+ "oe" => "UTF-8",
+ "hl" => "ja", // changing this doesnt do anything
+ //"lr" => "lang_ja", // same here
+ "ls" => "", // ??
+ "f" => ($get["nsfw"] == "yes" ? "off" : "on")
+ ]
+ );
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ // check for errors and load the result div
+ if($this->error_and_load($html)){
+
+ return $out;
+ }
+
+ $images =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "li"
+ );
+
+ foreach($images as $image){
+
+ $this->fuckhtml->load($image);
+
+ $img =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "img"
+ );
+
+ if(count($img) === 0){
+
+ // ?? invalid
+ continue;
+ }
+
+ $img = $img[0];
+
+ $size =
+ explode(
+ "x",
+ $this->fuckhtml
+ ->getTextContent(
+ $image
+ ),
+ 2
+ );
+
+ $size = [
+ (int)trim($size[0]), // width
+ (int)trim($size[1]) // height
+ ];
+
+ $out["image"][] = [
+ "title" => null,
+ "source" => [
+ [
+ "url" =>
+ "https://solofield.net/" .
+ $this->fuckhtml
+ ->getTextContent(
+ $img["attributes"]["src"]
+ ),
+ "width" => $size[0],
+ "height" => $size[1]
+ ]
+ ],
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ )[0]
+ ["attributes"]
+ ["href"]
+ )
+ ];
+ }
+
+ return $out;
+ }
+
+
+ public function video($get){
+
+ if($get["npt"]){
+
+ [$query, $proxy] = $this->backend->get($get["npt"], "videos");
+
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://solofield.net/vsearch?" . $query,
+ []
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ }else{
+
+ $proxy = $this->backend->get_ip();
+
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://solofield.net/vsearch",
+ [
+ "q" => $get["s"],
+ "ie" => "UTF-8",
+ "oe" => "UTF-8",
+ "hl" => "ja", // changing this doesnt do anything
+ //"lr" => "lang_ja", // same here
+ "ls" => "", // ??
+ "f" => ($get["nsfw"] == "yes" ? "off" : "on")
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ // check for errors and load the result div
+ if($this->error_and_load($html)){
+
+ return $out;
+ }
+
+ $items =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "li"
+ );
+
+ foreach($items as $item){
+
+ $this->fuckhtml->load($item);
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ if(count($as) === 0){
+
+ continue;
+ }
+
+ $thumb =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "img"
+ );
+
+ if(count($thumb) !== 0){
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" =>
+ "https://solofield.net/" .
+ $thumb[0]
+ ["attributes"]
+ ["src"]
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $date =
+ $this->fuckhtml
+ ->getElementsByAttributeValue(
+ "style",
+ "font-size: 10px;",
+ "span"
+ );
+
+ if(count($date) !== 0){
+
+ $date =
+ $this->unfuckdate(
+ $this->fuckhtml
+ ->getTextContent(
+ $date[0]
+ )
+ );
+ }else{
+
+ $date = null;
+ }
+
+ $center_td =
+ $this->fuckhtml
+ ->getElementsByAttributeValue(
+ "align",
+ "center",
+ "td"
+ );
+
+ if(count($center_td) === 2){
+
+ $duration =
+ $this->fuckhtml
+ ->getTextContent(
+ $this->hms2int(
+ $center_td[0]
+ )
+ );
+ }else{
+
+ $duration = null;
+ }
+
+ $out["video"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $as[1]
+ ),
+ "description" => null,
+ "author" => [
+ "name" => null,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => $date,
+ "duration" => $duration,
+ "views" => null,
+ "thumb" => $thumb,
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $as[0]
+ ["attributes"]
+ ["href"]
+ )
+ ];
+ }
+
+ // get next page
+ $this->get_npt($html, $proxy, $out, "videos");
+
+ return $out;
+ }
+
+
+ private function get_npt($html, $proxy, &$out, $type){
+
+ // get next page
+ $this->fuckhtml->load($html);
+
+ $pjs =
+ $this->fuckhtml
+ ->getElementById(
+ "pjs"
+ );
+
+ if($pjs){
+
+ $alnk =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "alnk",
+ "span"
+ );
+
+ foreach($alnk as $lnk){
+
+ if(
+ stripos(
+ $this->fuckhtml
+ ->getTextContent(
+ $lnk
+ ),
+ "Next"
+ ) !== false
+ ){
+
+ $this->fuckhtml->load($lnk);
+
+ $out["npt"] =
+ $this->backend->store(
+ parse_url(
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ )[0]
+ ["attributes"]
+ ["href"],
+ PHP_URL_QUERY
+ ),
+ $type,
+ $proxy
+ );
+ }
+ }
+ }
+ }
+
+ private function error_and_load($html){
+
+ if(strlen($html) === 0){
+
+ throw new Exception("Solofield blocked the request IP");
+ }
+
+ $this->fuckhtml->load($html);
+
+ $list =
+ $this->fuckhtml
+ ->getElementById(
+ "list",
+ "div"
+ );
+
+ if($list === false){
+
+ $nosearch =
+ $this->fuckhtml
+ ->getElementById(
+ "nosearch",
+ "div"
+ );
+
+ if($nosearch){
+
+ return true;
+ }
+
+ throw new Exception("Failed to grep search list");
+ }
+
+ $this->fuckhtml->load($list);
+ return false;
+ }
+
+ private function unfuckdate($date){
+
+ return
+ strtotime(
+ rtrim(
+ preg_replace(
+ '/[^0-9]+/',
+ "-",
+ explode(
+ ":",
+ $date,
+ 2
+ )[1]
+ ),
+ "-"
+ )
+ );
+ }
+
+ private function hms2int($time){
+
+ $parts = explode(":", $time, 3);
+ $time = 0;
+
+ if(count($parts) === 3){
+
+ // hours
+ $time = $time + ((int)$parts[0] * 3600);
+ array_shift($parts);
+ }
+
+ if(count($parts) === 2){
+
+ // minutes
+ $time = $time + ((int)$parts[0] * 60);
+ array_shift($parts);
+ }
+
+ // seconds
+ $time = $time + (int)$parts[0];
+
+ return $time;
+ }
+}
diff --git a/scraper/spotify.php b/scraper/spotify.php
new file mode 100644
index 0000000..79f61a6
--- /dev/null
+++ b/scraper/spotify.php
@@ -0,0 +1,726 @@
+<?php
+
+class spotify{
+
+ private const req_web = 0;
+ private const req_api = 1;
+ private const req_clientid = 2;
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("spotify");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [
+ "category" => [
+ "display" => "Category",
+ "option" => [
+ "any" => "All (no pagination)",
+ "audiobooks" => "Audiobooks",
+ "tracks" => "Songs",
+ "artists" => "Artists",
+ "playlists" => "Playlists",
+ "albums" => "Albums",
+ "podcastAndEpisodes" => "Podcasts & Shows (no pagination)",
+ "episodes" => "Episodes",
+ "users" => "Profiles"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = [], $reqtype = self::req_web, $bearer = null, $token = null){
+
+ $curlproc = curl_init();
+
+ switch($reqtype){
+
+ case self::req_api:
+ $headers = [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: application/json",
+ "Accept-Language: en",
+ "app-platform: WebPlayer",
+ "authorization: Bearer {$bearer}",
+ "client-token: {$token}",
+ "content-type: application/json;charset=UTF-8",
+ "Origin: https://open.spotify.com",
+ "Referer: https://open.spotify.com/",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-site",
+ "spotify-app-version: 1.2.27.93.g7aee53d4",
+ "TE: trailers"
+ ];
+ break;
+
+ case self::req_web:
+ $headers = [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: cross-site"
+ ];
+ break;
+
+ case self::req_clientid:
+ $get = json_encode($get);
+
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
+
+ $headers = [
+ "User-Agent:" . config::USER_AGENT,
+ "Accept: application/json",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br",
+ "Referer: https://open.spotify.com/",
+ "content-type: application/json",
+ "Content-Length: " . strlen($get),
+ "Origin: https://open.spotify.com",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-site",
+ "TE: trailers"
+ ];
+ break;
+ }
+
+ if($reqtype !== self::req_clientid){
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function music($get){
+
+ $search = $get["s"];
+ $ip = $this->backend->get_ip();
+ $category = $get["category"];
+
+ /*
+ audiobooks first and second page decoded
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAudiobooks&variables={"searchTerm":"freddie+dredd","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"8758e540afdba5afa3c5246817f6bd31d86a15b3f5666c363dd017030f35d785"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAudiobooks&variables={"searchTerm":"freddie+dredd","offset":30,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"8758e540afdba5afa3c5246817f6bd31d86a15b3f5666c363dd017030f35d785"}}
+ */
+
+ /*
+ songs
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchTracks&variables={"searchTerm":"asmr","offset":0,"limit":100,"numberOfTopResults":20,"includeAudiobooks":false}&extensions={"persistedQuery":{"version":1,"sha256Hash":"16c02d6304f5f721fc2eb39dacf2361a4543815112506a9c05c9e0bc9733a679"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchTracks&variables={"searchTerm":"asmr","offset":100,"limit":100,"numberOfTopResults":20,"includeAudiobooks":false}&extensions={"persistedQuery":{"version":1,"sha256Hash":"16c02d6304f5f721fc2eb39dacf2361a4543815112506a9c05c9e0bc9733a679"}}
+ */
+
+ /*
+ artists
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchArtists&variables={"searchTerm":"asmr","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"b8840daafdda9a9ceadb7c5774731f63f9eca100445d2d94665f2dc58b45e2b9"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchArtists&variables={"searchTerm":"asmr","offset":30,"limit":23,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"b8840daafdda9a9ceadb7c5774731f63f9eca100445d2d94665f2dc58b45e2b9"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchArtists&variables={"searchTerm":"asmr","offset":53,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"b8840daafdda9a9ceadb7c5774731f63f9eca100445d2d94665f2dc58b45e2b9"}}
+ */
+
+ /*
+ playlists
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchPlaylists&variables={"searchTerm":"asmr","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"19b4143a0500ccec189ca0f4a0316bc2c615ecb51ce993ba4d7d08afd1d87aa4"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchPlaylists&variables={"searchTerm":"asmr","offset":30,"limit":3,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"19b4143a0500ccec189ca0f4a0316bc2c615ecb51ce993ba4d7d08afd1d87aa4"}}
+ */
+
+ /*
+ albums
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAlbums&variables={"searchTerm":"asmr","offset":33,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"e93b13cda461482da2940467eb2beed9368e9bb2fff37df3fb6633fc61271a27"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAlbums&variables={"searchTerm":"asmr","offset":33,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"e93b13cda461482da2940467eb2beed9368e9bb2fff37df3fb6633fc61271a27"}}
+ */
+
+ /*
+ podcasts & shows (contains authors, no pagination)
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchFullEpisodes&variables={"searchTerm":"asmr","offset":0,"limit":30}&extensions={"persistedQuery":{"version":1,"sha256Hash":"9f996251c9781fabce63f1a9980b5287ea33bc5e8c8953d0c4689b09936067a1"}}
+ */
+
+ /*
+ episodes
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchDesktop&variables={"searchTerm":"asmr","offset":0,"limit":10,"numberOfTopResults":5,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"da03293d92a2cfc5e24597dcdc652c0ad135e1c64a78fddbf1478a7e096bea44"}}
+ ??? https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchFullEpisodes&variables={"searchTerm":"asmr","offset":60,"limit":30}&extensions={"persistedQuery":{"version":1,"sha256Hash":"9f996251c9781fabce63f1a9980b5287ea33bc5e8c8953d0c4689b09936067a1"}}
+ */
+
+ /*
+ profiles
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchUsers&variables={"searchTerm":"asmr","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"02026f48ab5001894e598904079b620ebc64f2d53b55ca20c3858abd3a46c5fb"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchUsers&variables={"searchTerm":"asmr","offset":30,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"02026f48ab5001894e598904079b620ebc64f2d53b55ca20c3858abd3a46c5fb"}}
+ */
+
+ // get HTML
+ try{
+
+ $html =
+ $this->get(
+ $ip,
+ "https://open.spotify.com/search/" .
+ rawurlencode($search) .
+ ($category != "any" ? "/" . $category : ""),
+ []
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get initial search page");
+ }
+
+ // grep bearer and client ID
+ $this->fuckhtml->load($html);
+
+ $script =
+ $this->fuckhtml
+ ->getElementById(
+ "session",
+ "script"
+ );
+
+ if($script === null){
+
+ throw new Exception("Failed to grep bearer token");
+ }
+
+ $script =
+ json_decode(
+ $script["innerHTML"],
+ true
+ );
+
+ $bearer = $script["accessToken"];
+ $client_id = $script["clientId"];
+
+ // hit client ID endpoint
+ try{
+
+ $token =
+ json_decode(
+ $this->get(
+ $ip,
+ "https://clienttoken.spotify.com/v1/clienttoken",
+ [ // !! that shit must be sent as json data
+ "client_data" => [
+ "client_id" => $client_id,
+ "client_version" => "1.2.27.93.g7aee53d4",
+ "js_sdk_data" => [
+ "device_brand" => "unknown",
+ "device_id" => "4c7ca20117ca12288ea8fc7118a9118c",
+ "device_model" => "unknown",
+ "device_name" => "computer",
+ "os" => "windows",
+ "os_version" => "NT 10.0"
+ ]
+ ]
+ ],
+ self::req_clientid
+ ),
+ true
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch token");
+ }
+
+ if($token === null){
+
+ throw new Exception("Failed to decode token");
+ }
+
+ $token = $token["granted_token"]["token"];
+
+ try{
+
+ switch($get["option"]){
+
+ case "any":
+ $variables = [
+ "searchTerm" => $search,
+ "offset" => 0,
+ "limit" => 10,
+ "numberOfTopResults" => 5,
+ "includeAudiobooks" => true
+ ];
+ break;
+
+ case "audiobooks":
+
+ break;
+ }
+
+ $payload =
+ $this->get(
+ $ip,
+ "https://api-partner.spotify.com/pathfinder/v1/query",
+ [
+ "operationName" => "searchDesktop",
+ "variables" =>
+ json_encode(
+ [
+ "searchTerm" => $search,
+ "offset" => 0,
+ "limit" => 10,
+ "numberOfTopResults" => 5,
+ "includeAudiobooks" => true
+ ]
+ ),
+ "extensions" =>
+ json_encode(
+ [
+ "persistedQuery" => [
+ "version" => 1,
+ "sha256Hash" => "21969b655b795601fb2d2204a4243188e75fdc6d3520e7b9cd3f4db2aff9591e" // ?
+ ]
+ ]
+ )
+ ],
+ self::req_api,
+ $bearer,
+ $token
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON results");
+ }
+
+ if($payload == "Token expired"){
+
+ throw new Exception("Grepped spotify token has expired");
+ }
+
+ $payload = json_decode($payload, true);
+
+ if($payload === null){
+
+ throw new Exception("Failed to decode JSON results");
+ }
+
+ //$payload = json_decode(file_get_contents("scraper/spotify.json"), true);
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "song" => [],
+ "playlist" => [],
+ "album" => [],
+ "podcast" => [],
+ "author" => [],
+ "user" => []
+ ];
+
+ // get songs
+ foreach($payload["data"]["searchV2"]["tracksV2"]["items"] as $result){
+
+ if(isset($result["item"])){
+
+ $result = $result["item"];
+ }
+
+ if(isset($result["data"])){
+
+ $result = $result["data"];
+ }
+
+ [$artist, $artist_link] = $this->get_artists($result["artists"]);
+
+ $out["song"][] = [
+ "title" => $result["name"],
+ "description" => null,
+ "url" => "https://open.spotify.com/track/" . $result["id"],
+ "views" => null,
+ "author" => [
+ "name" => $artist,
+ "url" => $artist_link,
+ "avatar" => null
+ ],
+ "thumb" => $this->get_thumb($result["albumOfTrack"]["coverArt"]),
+ "date" => null,
+ "duration" => $result["duration"]["totalMilliseconds"] / 1000,
+ "stream" => [
+ "endpoint" => "spotify",
+ "url" => "track." . $result["id"]
+ ]
+ ];
+ }
+
+ // get playlists
+ foreach($payload["data"]["searchV2"]["playlists"]["items"] as $playlist){
+
+ if(isset($playlist["data"])){
+
+ $playlist = $playlist["data"];
+ }
+
+ $avatar = $this->get_thumb($playlist["ownerV2"]["data"]["avatar"]);
+
+ $out["playlist"][] = [
+ "title" => $playlist["name"],
+ "description" => null,
+ "author" => [
+ "name" => $playlist["ownerV2"]["data"]["name"],
+ "url" =>
+ "https://open.spotify.com/user/" .
+ explode(
+ ":",
+ $playlist["ownerV2"]["data"]["uri"],
+ 3
+ )[2],
+ "avatar" => $avatar["url"]
+ ],
+ "thumb" => $this->get_thumb($playlist["images"]["items"][0]),
+ "date" => null,
+ "duration" => null,
+ "url" =>
+ "https://open.spotify.com/playlist/" .
+ explode(
+ ":",
+ $playlist["uri"],
+ 3
+ )[2]
+ ];
+ }
+
+ // get albums
+ foreach($payload["data"]["searchV2"]["albums"]["items"] as $album){
+
+ if(isset($album["data"])){
+
+ $album = $album["data"];
+ }
+
+ [$artist, $artist_link] = $this->get_artists($album["artists"]);
+
+ $out["album"][] = [
+ "title" => $album["name"],
+ "description" => null,
+ "author" => [
+ "name" => $artist,
+ "url" => $artist_link,
+ "avatar" => null
+ ],
+ "thumb" => $this->get_thumb($album["coverArt"]),
+ "date" => mktime(0, 0, 0, 0, 32, $album["date"]["year"]),
+ "duration" => null,
+ "url" =>
+ "https://open.spotify.com/album/" .
+ explode(
+ ":",
+ $album["uri"],
+ 3
+ )[2]
+ ];
+ }
+
+ // get podcasts
+ foreach($payload["data"]["searchV2"]["podcasts"]["items"] as $podcast){
+
+ if(isset($podcast["data"])){
+
+ $podcast = $podcast["data"];
+ }
+
+ $description = [];
+ foreach($podcast["topics"]["items"] as $subject){
+
+ $description[] = $subject["title"];
+ }
+
+ $description = implode(", ", $description);
+
+ if($description == ""){
+
+ $description = null;
+ }
+
+ $out["podcast"][] = [
+ "title" => $podcast["name"],
+ "description" => $description,
+ "author" => [
+ "name" => $podcast["publisher"]["name"],
+ "url" => null,
+ "avatar" => null
+ ],
+ "thumb" => $this->get_thumb($podcast["coverArt"]),
+ "date" => null,
+ "duration" => null,
+ "url" =>
+ "https://open.spotify.com/show/" .
+ explode(
+ ":",
+ $podcast["uri"],
+ 3
+ )[2],
+ "stream" => [
+ "endpoint" => null,
+ "url" => null
+ ]
+ ];
+ }
+
+ // get audio books (put in podcasts)
+ foreach($payload["data"]["searchV2"]["audiobooks"]["items"] as $podcast){
+
+ if(isset($podcast["data"])){
+
+ $podcast = $podcast["data"];
+ }
+
+ $description = [];
+ foreach($podcast["topics"]["items"] as $subject){
+
+ $description[] = $subject["title"];
+ }
+
+ $description = implode(", ", $description);
+
+ if($description == ""){
+
+ $description = null;
+ }
+
+ $authors = [];
+ foreach($podcast["authors"] as $author){
+
+ $authors[] = $author["name"];
+ }
+
+ $authors = implode(", ", $authors);
+
+ if($authors == ""){
+
+ $authors = null;
+ }
+
+ $uri =
+ explode(
+ ":",
+ $podcast["uri"],
+ 3
+ )[2];
+
+ $out["podcast"][] = [
+ "title" => $podcast["name"],
+ "description" => $description,
+ "author" => [
+ "name" => $authors,
+ "url" => null,
+ "avatar" => null
+ ],
+ "thumb" => $this->get_thumb($podcast["coverArt"]),
+ "date" => strtotime($podcast["publishDate"]["isoString"]),
+ "duration" => null,
+ "url" => "https://open.spotify.com/show/" . $uri,
+ "stream" => [
+ "endpoint" => "spotify",
+ "url" => "episode." . $uri
+ ]
+ ];
+ }
+
+ // get episodes (and place them in podcasts)
+ foreach($payload["data"]["searchV2"]["episodes"]["items"] as $podcast){
+
+ if(isset($podcast["data"])){
+
+ $podcast = $podcast["data"];
+ }
+
+ $out["podcast"][] = [
+ "title" => $podcast["name"],
+ "description" => $this->limitstrlen($podcast["description"]),
+ "author" => [
+ "name" =>
+ isset(
+ $podcast["podcastV2"]["data"]["publisher"]["name"]
+ ) ?
+ $podcast["podcastV2"]["data"]["publisher"]["name"]
+ : null,
+ "url" => null,
+ "avatar" => null
+ ],
+ "thumb" => $this->get_thumb($podcast["coverArt"]),
+ "date" => strtotime($podcast["releaseDate"]["isoString"]),
+ "duration" => $podcast["duration"]["totalMilliseconds"] / 1000,
+ "url" =>
+ "https://open.spotify.com/show/" .
+ explode(
+ ":",
+ $podcast["uri"],
+ 3
+ )[2],
+ "stream" => [
+ "endpoint" => null,
+ "url" => null
+ ]
+ ];
+ }
+
+ // get authors
+ foreach($payload["data"]["searchV2"]["artists"]["items"] as $user){
+
+ if(isset($user["data"])){
+
+ $user = $user["data"];
+ }
+
+ $avatar = $this->get_thumb($user["visuals"]["avatarImage"]);
+
+ $out["author"][] = [
+ "title" =>
+ (
+ $user["profile"]["verified"] === true ?
+ "✓ " : ""
+ ) .
+ $user["profile"]["name"],
+ "followers" => null,
+ "description" => null,
+ "thumb" => $avatar,
+ "url" =>
+ "https://open.spotify.com/artist/" .
+ explode(
+ ":",
+ $user["uri"],
+ 3
+ )[2]
+ ];
+ }
+
+ // get users
+ foreach($payload["data"]["searchV2"]["users"]["items"] as $user){
+
+ if(isset($user["data"])){
+
+ $user = $user["data"];
+ }
+
+ $avatar = $this->get_thumb($user["avatar"]);
+
+ $out["user"][] = [
+ "title" => $user["displayName"] . " (@{$user["id"]})",
+ "followers" => null,
+ "description" => null,
+ "thumb" => $avatar,
+ "url" => "https://open.spotify.com/user/" . $user["id"]
+ ];
+ }
+
+ return $out;
+ }
+
+ private function get_artists($artists){
+
+ $artist_out = [];
+
+ foreach($artists["items"] as $artist){
+
+ $artist_out[] = $artist["profile"]["name"];
+ }
+
+ $artist_out =
+ implode(", ", $artist_out);
+
+ if($artist_out == ""){
+
+ return [null, null];
+ }
+
+ $artist_link =
+ $artist === null ?
+ null :
+ "https://open.spotify.com/artist/" .
+ explode(
+ ":",
+ $artists["items"][0]["uri"]
+ )[2];
+
+ return [$artist_out, $artist_link];
+ }
+
+ private function get_thumb($cover){
+
+ $thumb_out = null;
+
+ if($cover !== null){
+ foreach($cover["sources"] as $thumb){
+
+ if(
+ $thumb_out === null ||
+ (int)$thumb["width"] > $thumb_out["width"]
+ ){
+
+ $thumb_out = $thumb;
+ }
+ }
+ }
+
+ if($thumb_out === null){
+
+ return [
+ "url" => null,
+ "ratio" => null
+ ];
+ }else{
+
+ return [
+ "url" => $thumb_out["url"],
+ "ratio" => "1:1"
+ ];
+ }
+ }
+
+ private function limitstrlen($text){
+
+ return
+ explode(
+ "\n",
+ wordwrap(
+ str_replace(
+ ["\n\r", "\r\n", "\n", "\r"],
+ " ",
+ $text
+ ),
+ 300,
+ "\n"
+ ),
+ 2
+ )[0];
+ }
+}
diff --git a/scraper/startpage.php b/scraper/startpage.php
new file mode 100644
index 0000000..e48a429
--- /dev/null
+++ b/scraper/startpage.php
@@ -0,0 +1,1584 @@
+<?php
+
+class startpage{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("startpage");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ switch($page){
+ case "web":
+ return [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "any" => "All Regions",
+ "es_AR" => "Argentina",
+ "en_AU" => "Australia",
+ "de_AT" => "Austria",
+ "ru_BY" => "Belarus",
+ "fr_BE" => "Belgium (FR)",
+ "nl_BE" => "Belgium (NL)",
+ "bg_BG" => "Bulgaria",
+ "en_CA" => "Canada (EN)",
+ "fr_CA" => "Canada (FR)",
+ "es_CL" => "Chile",
+ "es_CO" => "Colombia",
+ "cs_CZ" => "Czech Republic",
+ "da_DK" => "Denmark",
+ "ar_EG" => "Egypt",
+ "et_EE" => "Estonia",
+ "fi_FI" => "Finland",
+ "fr_FR" => "France",
+ "de_DE" => "Germany",
+ "el_GR" => "Greece",
+ "hu_HU" => "Hungary",
+ "hi_IN" => "India (HI)",
+ "en_IN" => "India (EN)",
+ "id_ID" => "Indonesia (ID)",
+ "en_ID" => "Indonesia (EN)",
+ "en_IE" => "Ireland",
+ "it_IT" => "Italy",
+ "ja_JP" => "Japan",
+ "ko_KR" => "Korea",
+ "ms_MY" => "Malaysia (MS)",
+ "en_MY" => "Malaysia (EN)",
+ "es_MX" => "Mexico",
+ "nl_NL" => "Netherlands",
+ "en_NZ" => "New Zealand",
+ "no_NO" => "Norway",
+ "es_PE" => "Peru",
+ "fil_PH" => "Philippines (FIL)",
+ "en_PH" => "Philippines (EN)",
+ "pl_PL" => "Poland",
+ "pt_PT" => "Portugal",
+ "ro_RO" => "Romania",
+ "ru_RU" => "Russia",
+ "ms_SG" => "Singapore (MS)",
+ "en_SG" => "Singapore (EN)",
+ "es_ES" => "Spain (ES)",
+ "ca_ES" => "Spain (CA)",
+ "sv_SE" => "Sweden",
+ "de_CH" => "Switzerland (DE)",
+ "fr_CH" => "Switzerland (FR)",
+ "it_CH" => "Switzerland (IT)",
+ "tr_TR" => "Turkey",
+ "uk_UA" => "Ukraine",
+ "en_US" => "US (EN)",
+ "es_US" => "US (ES)",
+ "es_UY" => "Uruguay",
+ "es_VE" => "Venezuela",
+ "vi_VN" => "Vietnam (VI)",
+ "en_VN" => "Vietnam (EN)",
+ "en_ZA" => "South Africa"
+ ]
+ ],
+ "nsfw" => [ // qadf
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes", // qadf=none
+ "no" => "No" // qadf=heavy
+ ]
+ ],
+ "time" => [ // with_date
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "d" => "Past 24 hours",
+ "w" => "Past week",
+ "m" => "Past month",
+ "y" => "Past year",
+ ]
+ ],
+ "extendedsearch" => [
+ // undefined display, so it wont show in frontend
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ]
+ ];
+ break;
+
+ case "images":
+ return [
+ "nsfw" => [ // qadf
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes", // qadf=none
+ "no" => "No" // qadf=heavy
+ ]
+ ],
+ "size" => [ // flimgsize
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "Small" => "Small",
+ "Medium" => "Medium",
+ "Large" => "Large",
+ "Wallpaper" => "Wallpaper",
+ // from here, image-size-select, var prefix = isz:lt,islt:
+ "qsvgs" => "Larger than 400x300",
+ "vga" => "Larger than 640x480",
+ "svga" => "Larger than 800x600",
+ "xga" => "Larger than 1024x768",
+ "qsvgs" => "Larger than 400x300",
+ "2mp" => "Larger than 2 MP (1600x1200)",
+ "4mp" => "Larger than 4 MP (2272x1704)",
+ "6mp" => "Larger than 6 MP (2816x2112)",
+ "8mp" => "Larger than 8 MP (3264x2448)",
+ "10mp" => "Larger than 10 MP (3648x2736)",
+ "12mp" => "Larger than 12 MP (4096x3072)",
+ "15mp" => "Larger than 15 MP (4480x3360)",
+ "20mp" => "Larger than 20 MP (5120x3840)",
+ "40mp" => "Larger than 40 MP (7216x5412)",
+ "70mp" => "Larger than 70 MP (9600x7200)"
+ ]
+ ],
+ "color" => [ // flimgcolor
+ "display" => "Color",
+ "option" => [
+ "any" => "Any color",
+ // from here, var prefix = ic:
+ "color" => "Color only",
+ "bnw" => "Black & white", // set to "gray"
+ // from here, var prefix = ic:specific,isc:
+ "red" => "Red",
+ "orange" => "Orange",
+ "yellow" => "Yellow",
+ "green" => "Green",
+ "teal" => "Teal",
+ "blue" => "Blue",
+ "purple" => "Purple",
+ "pink" => "Pink",
+ "white" => "White",
+ "gray" => "Gray",
+ "black" => "Black",
+ "brown" => "Brown"
+ ]
+ ],
+ "type" => [ // flimgtype
+ "display" => "Type",
+ "option" => [
+ "any" => "Any type",
+ "AnimatedGif" => "Animated GIF",
+ "Clipart" => "Clip Art",
+ "Line" => "Line Drawing",
+ "Photo" => "Photograph",
+ "Transparent" => "Transparent Background"
+ ]
+ ],
+ "license" => [ // flimglicense
+ "display" => "License",
+ "option" => [
+ "any" => "Any license",
+ "p" => "Public domain",
+ "s" => "Free to share",
+ "sc" => "Free to share commercially",
+ "m" => "Free to modify",
+ "mc" => "Free to modify commercially"
+ ]
+ ]
+ ];
+ break;
+
+ case "videos":
+ return [
+ "nsfw" => [ // qadf
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes", // qadf=none
+ "no" => "No" // qadf=heavy
+ ]
+ ],
+ "sort" => [
+ "display" => "Sort by",
+ "option" => [
+ "relevance" => "Most relevant",
+ "popular" => "Most popular",
+ "recent" => "Most recent"
+ ]
+ ],
+ "duration" => [ // with_duration
+ "display" => "Duration",
+ "option" => [
+ "any" => "Any duration",
+ "short" => "Short",
+ "medium" => "Medium",
+ "long" => "Long"
+ ]
+ ]
+ ];
+ break;
+
+ case "news":
+ return [
+ "nsfw" => [ // qadf
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes", // qadf=none
+ "no" => "No" // qadf=heavy
+ ]
+ ],
+ "time" => [ // with_date
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "d" => "Past 24 hours",
+ "w" => "Past week",
+ "m" => "Past month"
+ ]
+ ]
+ ];
+ break;
+
+ //preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEazerbaijaniN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius; Domain=startpage.com; Expires=Mon, 28 Oct 2024 20:21:58 GMT; Secure; Path=/
+ //preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius; Domain=startpage.com; Expires=Mon, 28 Oct 2024 20:22:52 GMT; Secure; Path=/
+ }
+ }
+
+ private function get($proxy, $url, $get = [], $post = false, $is_xhr = false){
+
+ $curlproc = curl_init();
+
+ if($post === true){
+
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
+
+ }elseif($get !== []){
+
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ // http2 bypass
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ if($is_xhr === true){
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: application/json",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Referer: https://www.startpage.com/",
+ "Content-Type: application/json",
+ "Content-Length: " . strlen($get),
+ "Origin: https://www.startpage.com/",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "TE: trailers"]
+ );
+
+ }elseif($post === true){
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Referer: https://www.startpage.com/",
+ "Content-Type: application/x-www-form-urlencoded",
+ "Content-Length: " . strlen($get),
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=0, i",
+ "TE: trailers"]
+ );
+ }else{
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=0, i",
+ "TE: trailers"]
+ );
+ }
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ [$post, $proxy] = $this->backend->get($get["npt"], "web");
+
+ try{
+ $html = $this->get(
+ $proxy,
+ "https://www.startpage.com/sp/search",
+ $post,
+ true
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ $get_instant_answer = false;
+
+ }else{
+
+ $proxy = $this->backend->get_ip();
+
+ $params = [
+ "query" => $get["s"],
+ "cat" => "web",
+ "pl" => "opensearch"
+ ];
+
+ if($get["nsfw"] == "no"){
+
+ $params["qadf"] = "heavy";
+ $get_instant_answer = false;
+ }else{
+
+ $get_instant_answer = true;
+ }
+
+ if($get["country"] !== "any"){
+
+ $params["qsr"] = $get["country"];
+ }
+
+ if($get["time"] !== "any"){
+
+ $params["with_date"] = $get["time"];
+ }
+
+ try{
+ $html = $this->get(
+ $proxy,
+ "https://www.startpage.com/sp/search",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ //$html = file_get_contents("scraper/startpage.html");
+ }
+
+ $this->detect_captcha($html);
+
+ if(
+ preg_match(
+ '/React\.createElement\(UIStartpage\.AppSerpWeb, ?(.+)\),?$/m',
+ $html,
+ $matches
+ ) === 0
+ ){
+
+ throw new Exception("Failed to grep JSON object");
+ }
+
+ $json = json_decode($matches[1], true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ //print_r($json);
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ // get npt
+ $out["npt"] = $this->parse_npt($json, "web", $proxy);
+
+ foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
+
+ if(!isset($category["display_type"])){
+
+ continue;
+ }
+
+ switch($category["display_type"]){
+
+ case "web-google":
+ foreach($category["results"] as $result){
+
+ $sublinks = [];
+
+ foreach($result["siteLinks"] as $sublink){
+
+ $sublinks[] = [
+ "title" => $sublink["title"],
+ "description" => null,
+ "url" => $sublink["clickUrl"]
+ ];
+ }
+
+ $description =
+ explode(
+ "...",
+ $this->titledots(
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $result["description"]
+ )
+ )
+ ),
+ 2
+ );
+
+ $date = strtotime(trim($description[0]));
+
+ if(
+ $date === false ||
+ count($description) !== 2 ||
+ strlen($description[0]) > 14
+ ){
+
+ // no date found
+ $description =
+ implode(
+ " ... ",
+ $description
+ );
+
+ $date = null;
+ }else{
+
+ // date found
+ $description = ltrim($description[1]);
+ }
+
+ $out["web"][] = [
+ "title" =>
+ $this->titledots(
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $result["title"]
+ )
+ )
+ ),
+ "description" => $description,
+ "url" => $result["clickUrl"],
+ "date" => $date,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => $sublinks,
+ "table" => []
+ ];
+ }
+ break;
+
+ case "images-qi-top":
+ foreach($category["results"] as $result){
+
+ $out["image"][] = [
+ "title" =>
+ $this->titledots(
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $result["title"]
+ )
+ )
+ ),
+ "source" => [
+ [
+ "url" => $result["rawImageUrl"],
+ "width" => (int)$result["width"],
+ "height" => (int)$result["height"]
+ ],
+ [
+ "url" => $this->unshitimage($result["mdThumbnailUrl"]),
+ "width" => (int)$result["mdThumbnailWidth"],
+ "height" => (int)$result["mdThumbnailHeight"]
+ ]
+ ],
+ "url" =>
+ $result["altClickUrl"]
+ ];
+ }
+ break;
+
+ case "spellsuggest-google":
+ $out["spelling"] =
+ [
+ "type" => "including",
+ "using" => $json["render"]["query"],
+ "correction" => $category["results"][0]["query"]
+ ];
+ break;
+
+ case "dictionary-qi":
+ foreach($category["results"] as $result){
+
+ $answer = [
+ "title" => $result["word"],
+ "description" => [],
+ "url" => null,
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+
+ foreach($result["lexical_categories"] as $lexic_type => $definitions){
+
+ $answer["description"][] = [
+ "type" => "title",
+ "value" => $lexic_type
+ ];
+
+ $i = 0;
+
+ foreach($definitions as $definition){
+
+ $text_definition = trim($definition["definition"]);
+ $text_example = trim($definition["example"]);
+ $text_synonyms = implode(", ", $definition["synonyms"]);
+
+ if($text_definition != ""){
+
+ $i++;
+
+ $c = count($answer["description"]) - 1;
+ if(
+ $c !== 0 &&
+ $answer["description"][$c]["type"] == "text"
+ ){
+
+ $answer["description"][$c]["value"] .=
+ "\n\n" . $i . ". " . $text_definition;
+
+ }else{
+
+ $answer["description"][] = [
+ "type" => "text",
+ "value" => $i . ". " . $text_definition
+ ];
+ }
+ }
+
+ if($text_example != ""){
+
+ $answer["description"][] = [
+ "type" => "quote",
+ "value" => $text_example
+ ];
+ }
+
+ if($text_synonyms != ""){
+
+ $answer["description"][] = [
+ "type" => "text",
+ "value" => "Synonyms: " . $text_synonyms
+ ];
+ }
+ }
+ }
+
+ $out["answer"][] = $answer;
+ }
+ break;
+ }
+ }
+
+ // parse instant answers
+ if(
+ $get["extendedsearch"] == "yes" &&
+ $get_instant_answer === true
+ ){
+
+ // https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=BqZ3inqrAgF701&sr=1
+ try{
+ $post = [
+ "se" => "n0vze2y9dqwy",
+ "q" => $json["render"]["query"],
+ "results" => [], // populate
+ "enableKnowledgePanel" => true,
+ "enableMediaThumbBar" => false,
+ "enableSearchSuggestions" => false,
+ "enableTripadvisorProperties" => [],
+ "enableTripadvisorPlaces" => [],
+ "enableTripadvisorPlacesForLocations" => [],
+ "enableWebProducts" => false,
+ "tripadvisorPartnerId" => null,
+ "tripadvisorMapColorMode" => "light",
+ "tripadvisorDisablesKnowledgePanel" => false,
+ "instantAnswers" => [
+ "smartAnswers",
+ "youtube",
+ "tripadvisor"
+ ],
+ "iaType" => null,
+ "forceEnhancedKnowledgePanel" => false,
+ "shoppingOnly" => false,
+ "allowAdultProducts" => true,
+ "lang" => "en",
+ "browserLang" => "en-US",
+ "browserTimezone" => "America/New_York",
+ "market" => null,
+ "userLocation" => null,
+ "userDate" => date("Y-m-d"),
+ "userAgentType" => "unknown"
+ ];
+
+ foreach($out["web"] as $result){
+
+ $post["results"][] = [
+ "url" => $result["url"],
+ "title" => $result["title"]
+ ];
+ }
+
+ $post = json_encode($post, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE);
+
+ $additional_data =
+ $this->get(
+ $proxy,
+ "https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=" . $json["render"]["callback_sc"] . "&sr=1",
+ $post,
+ true,
+ true
+ );
+
+ $additional_data = json_decode($additional_data, true);
+
+ if($additional_data === null){
+
+ throw new Exception("Failed to decode JSON"); // just break out, dont fail completely
+ }
+
+ if(!isset($additional_data["knowledgePanel"])){
+
+ throw new Exception("Response has missing data (knowledgePanel)");
+ }
+
+ $additional_data = $additional_data["knowledgePanel"];
+
+ $answer = [
+ "title" => $additional_data["meta"]["title"],
+ "description" => [
+ [
+ "type" => "quote",
+ "value" => $additional_data["meta"]["description"]
+ ]
+ ],
+ "url" => $additional_data["meta"]["origWikiUrl"],
+ "thumb" => $additional_data["meta"]["image"],
+ "table" => [],
+ "sublink" => []
+ ];
+
+ // parse html for instant answer
+ $this->fuckhtml->load($additional_data["html"]);
+
+ $div =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "div"
+ );
+
+ // get description
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "sx-kp-short-extract sx-kp-short-extract-complete",
+ $div
+ );
+
+ if(count($description) !== 0){
+
+ $answer["description"][] = [
+ "type" => "text",
+ "value" =>
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ )
+ )
+ ];
+ }
+
+ // get socials
+ $socials =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "sx-wiki-social-link",
+ "a"
+ );
+
+ foreach($socials as $social){
+
+ $title =
+ $this->fuckhtml
+ ->getTextContent(
+ $social["attributes"]["title"]
+ );
+
+ $url =
+ $this->fuckhtml
+ ->getTextContent(
+ $social["attributes"]["href"]
+ );
+
+ switch($title){
+
+ case "Official Website":
+ $title = "Website";
+ break;
+ }
+
+ $answer["sublink"][$title] = $url;
+ }
+
+ // get videos
+ $videos =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "sx-kp-video-grid-item",
+ $div
+ );
+
+ foreach($videos as $video){
+
+ $this->fuckhtml->load($video);
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ if(count($as) === 0){
+
+ // ?? invalid
+ continue;
+ }
+
+ $image =
+ $this->fuckhtml
+ ->getElementsByAttributeName(
+ "data-sx-src",
+ "img"
+ );
+
+ if(count($image) !== 0){
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $image[0]["attributes"]["data-sx-src"]
+ )
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["video"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $as[0]["attributes"]["title"]
+ ),
+ "description" => null,
+ "date" => null,
+ "duration" => null,
+ "views" => null,
+ "thumb" => $thumb,
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $as[0]["attributes"]["href"]
+ )
+ ];
+ }
+
+ // reset
+ $this->fuckhtml->load($additional_data["html"]);
+
+ // get table elements
+ $table =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "sx-infobox",
+ "table"
+ );
+
+ if(count($table) !== 0){
+
+ $trs =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "tr"
+ );
+
+ foreach($trs as $tr){
+
+ $this->fuckhtml->load($tr);
+
+ // ok so startpage devs cant fucking code a table
+ // td = content
+ // th (AAAHH) = title
+ $tds =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "td"
+ );
+
+ $ths =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "th"
+ );
+
+ if(
+ count($ths) === 1 &&
+ count($tds) === 1
+ ){
+
+ $title =
+ $this->fuckhtml
+ ->getTextContent(
+ $ths[0]
+ );
+
+ $description = [];
+
+ $this->fuckhtml->load($tds[0]);
+
+ $lis =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "li"
+ );
+
+ if(count($lis) !== 0){
+
+ foreach($lis as $li){
+
+ $description[] =
+ $this->fuckhtml
+ ->getTextContent(
+ $li
+ );
+ }
+
+ $description = implode(", ", $description);
+ }else{
+
+ $description =
+ $this->fuckhtml
+ ->getTextContent(
+ $tds[0]
+ );
+ }
+
+ $answer["table"][$title] = $description;
+ }
+ }
+ }
+
+ $out["answer"][] = $answer;
+
+ }catch(Exception $error){
+
+ // do nothing
+ //echo "error!";
+ }
+ }
+
+ return $out;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$post, $proxy] = $this->backend->get($get["npt"], "images");
+
+ try{
+ $html = $this->get(
+ $proxy,
+ "https://www.startpage.com/sp/search",
+ $post,
+ true
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ try{
+
+ $proxy = $this->backend->get_ip();
+
+ $params = [
+ "query" => $get["s"],
+ "cat" => "images",
+ "pl" => "opensearch"
+ ];
+
+ if($get["nsfw"] == "no"){
+
+ $params["qadf"] = "heavy";
+ }
+
+ if($get["size"] != "any"){
+
+ if(
+ $get["size"] == "Small" ||
+ $get["size"] == "Medium" ||
+ $get["size"] == "Large" ||
+ $get["size"] == "Wallpaper"
+ ){
+
+ $params["flimgsize"] = $get["size"];
+ }else{
+
+ $params["image-size-select"] = "isz:lt,islt:" . $get["size"];
+ }
+ }
+
+ if($get["color"] != "any"){
+
+ if($get["color"] == "color"){
+
+ $params["flimgcolor"] = "ic:color";
+ }elseif($get["color"] == "bnw"){
+
+ $params["flimgcolor"] = "ic:gray";
+ }else{
+
+ $params["flimgcolor"] = "ic:specific,isc:" . $get["color"];
+ }
+ }
+
+ if($get["type"] != "any"){
+
+ $params["flimgtype"] = $get["type"];
+ }
+
+ if($get["license"] != "any"){
+
+ $params["flimglicense"] = $get["license"];
+ }
+
+ try{
+ $html = $this->get(
+ $proxy,
+ "https://www.startpage.com/sp/search",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ //$html = file_get_contents("scraper/startpage.html");
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ }
+
+ $this->detect_captcha($html);
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ if(
+ preg_match(
+ '/React\.createElement\(UIStartpage\.AppSerpImages, ?(.+)\),?$/m',
+ $html,
+ $matches
+ ) === 0
+ ){
+
+ throw new Exception("Failed to grep JSON object");
+ }
+
+ $json = json_decode($matches[1], true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON object");
+ }
+
+ // get npt
+ $out["npt"] = $this->parse_npt($json, "images", $proxy);
+
+ // get images
+ foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
+
+ if($category["display_type"] != "images-bing"){
+
+ // ignore ads and !! suggestions !! @todo
+ continue;
+ }
+
+ foreach($category["results"] as $image){
+
+ $out["image"][] = [
+ "title" => $this->titledots($image["title"]),
+ "source" => [
+ [
+ "url" => $this->unshitimage($image["clickUrl"]),
+ "width" => (int)$image["width"],
+ "height" => (int)$image["height"]
+ ],
+ [
+ "url" => $this->unshitimage($image["thumbnailUrl"]),
+ "width" => (int)$image["thumbnailWidth"],
+ "height" => (int)$image["thumbnailHeight"]
+ ]
+ ],
+ "url" => $image["altClickUrl"]
+ ];
+ }
+ }
+
+ return $out;
+ }
+
+ public function video($get){
+
+ if($get["npt"]){
+
+ [$post, $proxy] = $this->backend->get($get["npt"], "videos");
+
+ try{
+ $html = $this->get(
+ $proxy,
+ "https://www.startpage.com/sp/search",
+ $post,
+ true
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ try{
+
+ $proxy = $this->backend->get_ip();
+
+ $params = [
+ "query" => $get["s"],
+ "cat" => "video",
+ "pl" => "opensearch"
+ ];
+
+ if($get["nsfw"] == "no"){
+
+ $params["qadf"] = "heavy";
+ }
+
+ if($get["sort"] != "relevance"){
+
+ $params["sort_by"] = $get["sort"];
+ }
+
+ if($get["duration"] != "any"){
+
+ $params["with_duration"] = $get["duration"];
+ }
+
+ try{
+ $html = $this->get(
+ $proxy,
+ "https://www.startpage.com/sp/search",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ //$html = file_get_contents("scraper/startpage.html");
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ }
+
+ $this->detect_captcha($html);
+
+ if(
+ preg_match(
+ '/React\.createElement\(UIStartpage\.AppSerpVideos, ?(.+)\),?$/m',
+ $html,
+ $matches
+ ) === 0
+ ){
+
+ throw new Exception("Failed to get JSON object");
+ }
+
+ $json = json_decode($matches[1], true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON object");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ // get npt
+ $out["npt"] = $this->parse_npt($json, "video", $proxy);
+
+ // get results
+ foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
+
+ if(
+ preg_match(
+ '/^video-/i',
+ $category["display_type"]
+ )
+ ){
+
+ foreach($category["results"] as $video){
+
+ if(
+ isset($video["thumbnailUrl"]) &&
+ $video["thumbnailUrl"] !== null
+ ){
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" => $this->unshitimage($video["thumbnailUrl"])
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["video"][] = [
+ "title" => str_replace(["", ""], "", $video["title"]),
+ "description" => $this->limitstrlen($video["description"]),
+ "author" => [
+ "name" => $video["channelTitle"],
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => strtotime($video["publishDate"]),
+ "duration" => $this->hms2int($category["display_type"] == "video-youtube" ? $video["duration"] : $video["duration"] / 1000),
+ "views" => (int)$video["viewCount"],
+ "thumb" => $thumb,
+ "url" => $video["clickUrl"]
+ ];
+ }
+ }
+ }
+
+ return $out;
+ }
+
+ public function news($get){
+
+ if($get["npt"]){
+
+ [$post, $proxy] = $this->backend->get($get["npt"], "news");
+
+ try{
+ $html = $this->get(
+ $proxy,
+ "https://www.startpage.com/sp/search",
+ $post,
+ true
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ try{
+
+ $proxy = $this->backend->get_ip();
+
+ $params = [
+ "query" => $get["s"],
+ "cat" => "news",
+ "pl" => "opensearch"
+ ];
+
+ if($get["nsfw"] == "no"){
+
+ $params["qadf"] = "heavy";
+ }
+
+ if($get["time"] != "any"){
+
+ $params["with_date"] = $get["time"];
+ }
+
+ try{
+ $html = $this->get(
+ $proxy,
+ "https://www.startpage.com/sp/search",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ //$html = file_get_contents("scraper/startpage.html");
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+ }
+
+ $this->detect_captcha($html);
+
+ if(
+ preg_match(
+ '/React\.createElement\(UIStartpage\.AppSerpNews, ?(.+)\),?$/m',
+ $html,
+ $matches
+ ) === 0
+ ){
+
+ throw new Exception("Failed to get JSON object");
+ }
+
+ $json = json_decode($matches[1], true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON object");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "news" => []
+ ];
+
+ // get npt
+ $out["npt"] = $this->parse_npt($json, "news", $proxy);
+
+ foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
+
+ if($category["display_type"] != "news-bing"){
+
+ // unsupported category
+ continue;
+ }
+
+ foreach($category["results"] as $news){
+
+ if(
+ isset($news["thumbnailUrl"]) &&
+ $news["thumbnailUrl"] !== null
+ ){
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" => $this->unshitimage($news["thumbnailUrl"])
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["news"][] = [
+ "title" => $this->titledots($this->remove_penguins($news["title"])),
+ "author" => $news["source"],
+ "description" => $this->titledots($this->remove_penguins($news["description"])),
+ "date" => (int)substr((string)$news["date"], 0, -3),
+ "thumb" => $thumb,
+ "url" => $news["clickUrl"]
+ ];
+ }
+ }
+
+ return $out;
+ }
+
+ private function parse_npt($json, $pagetype, $proxy){
+
+ foreach($json["render"]["presenter"]["pagination"]["pages"] as $page){
+
+ if($page["name"] == "Next"){
+
+ parse_str(
+ explode(
+ "?",
+ $page["url"],
+ 2
+ )[1],
+ $str
+ );
+
+ return
+ $this->backend->store(
+ http_build_query(
+ [
+ "lui" => "english",
+ "language" => "english",
+ "query" => $str["q"],
+ "cat" => $pagetype,
+ "sc" => $str["sc"],
+ "t" => "device",
+ "segment" => "startpage.udog",
+ "page" => $str["page"]
+ ]
+ ),
+ $pagetype,
+ $proxy
+ );
+
+ break;
+ }
+ }
+
+ return null;
+ }
+
+ private function unshitimage($url){
+
+ $query = parse_url($url, PHP_URL_QUERY);
+ parse_str($query, $query);
+
+ if(isset($query["piurl"])){
+
+ if(strpos($query["piurl"], "gstatic.com/")){
+
+ return
+ explode(
+ "&",
+ $query["piurl"],
+ 2
+ )[0];
+ }
+
+ if(
+ strpos($query["piurl"], "bing.net/") ||
+ strpos($query["piurl"], "bing.com/")
+ ){
+
+ return
+ explode(
+ "&",
+ $query["piurl"],
+ 2
+ )[0];
+ }
+
+ return $query["piurl"];
+ }
+
+ return $url;
+ }
+
+ private function limitstrlen($text){
+
+ return
+ explode(
+ "\n",
+ wordwrap(
+ str_replace(
+ ["\n\r", "\r\n", "\n", "\r"],
+ " ",
+ $text
+ ),
+ 300,
+ "\n"
+ ),
+ 2
+ )[0];
+ }
+
+ private function titledots($title){
+
+ return trim($title, " .\t\n\r\0\x0B…");
+ }
+
+ private function hms2int($time){
+
+ $parts = explode(":", $time, 3);
+ $time = 0;
+
+ if(count($parts) === 3){
+
+ // hours
+ $time = $time + ((int)$parts[0] * 3600);
+ array_shift($parts);
+ }
+
+ if(count($parts) === 2){
+
+ // minutes
+ $time = $time + ((int)$parts[0] * 60);
+ array_shift($parts);
+ }
+
+ // seconds
+ $time = $time + (int)$parts[0];
+
+ return $time;
+ }
+
+ private function remove_penguins($text){
+
+ return str_replace(
+ ["", ""],
+ "",
+ $text
+ );
+ }
+
+ private function detect_captcha($html){
+
+ $this->fuckhtml->load($html);
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "title"
+ );
+
+ if(
+ count($title) !== 0 &&
+ $title[0]["innerHTML"] == "Redirecting..."
+ ){
+
+ // check if it's a captcha
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ foreach($as as $a){
+
+ if(
+ strpos(
+ $this->fuckhtml
+ ->getTextContent(
+ $a["innerHTML"]
+ ),
+ "https://www.startpage.com/sp/captcha"
+ ) !== false
+ ){
+
+ throw new Exception("Startpage returned a captcha");
+ }
+ }
+
+ throw new Exception("Startpage redirected the scraper to an unhandled page");
+ }
+ }
+}
diff --git a/scraper/vimeo.php b/scraper/vimeo.php
new file mode 100644
index 0000000..50bb21b
--- /dev/null
+++ b/scraper/vimeo.php
@@ -0,0 +1,754 @@
+<?php
+
+class vimeo{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("vimeo");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [
+ "time" => [
+ "display" => "Date uploaded", // &filter_uploaded=
+ "option" => [
+ "any" => "Any time",
+ "today" => "Last 24 hours",
+ "this-week" => "Last 7 days",
+ "this-month" => "Last 30 days",
+ "this-year" => "Last 365 days",
+ ]
+ ],
+ "display" => [
+ "display" => "Display",
+ "option" => [
+ "video" => "Videos",
+ "ondemand" => "On-Demand ($$)",
+ "people" => "People",
+ "channel" => "Channels",
+ "group" => "Groups"
+ ]
+ ],
+ "sort" => [
+ "display" => "Sort by",
+ "option" => [
+ "relevance" => "Relevance", // no param
+ "recent" => "Newest", // &sort=latest&direction=desc
+ "popular" => "Most popular", // &sort=popularity&direction=desc
+ "a_z" => "Title, A to Z", // &sort=alphabetical&direction=asc
+ "z_a" => "Title, Z to A", // &sort=alphabetical&direction=desc
+ "longest" => "Longest", // &sort=duration&direction=desc
+ "shortest" => "Shortest", // &sort=duration&direction=asc
+ ]
+ ],
+ "duration" => [
+ "display" => "Duration", // &filter_duration=
+ "option" => [
+ "any" => "Any duration",
+ "short" => "Short (less than 4 minutes)",
+ "medium" => "Medium (4-10 minutes)",
+ "long" => "Long (over 10 minutes)"
+ ]
+ ],
+ "resolution" => [
+ "display" => "Resolution",
+ "option" => [
+ "any" => "Any resolution",
+ "4k" => "4K" // &filter_resolution=4k
+ ]
+ ],
+ "category" => [
+ "display" => "Category", // &filter_category=
+ "option" => [
+ "any" => "Any category",
+ "animation" => "Animation",
+ "comedy" => "Comedy",
+ "music" => "Music",
+ "experimental" => "Experimental",
+ "documentary" => "Documentary",
+ "identsandanimatedlogos" => "Idents and Animated Logos",
+ "industry" => "Industry",
+ "instructionals" => "Instructionals",
+ "narrative" => "Narrative",
+ "personal" => "Personal"
+ ]
+ ],
+ "live" => [
+ "display" => "Live events",
+ "option" => [
+ "any" => "Any",
+ "yes" => "Live now" // &filter_live=now
+ ]
+ ],
+ "hdr" => [
+ "display" => "HDR", // &filter_hdr=
+ "option" => [
+ "any" => "Any",
+ "hdr" => "Any HDR",
+ "dolby_vision" => "Dolby Vision",
+ "hdr10" => "HDR10",
+ "hdr10+" => "HDR10+"
+ ]
+ ],
+ "vimeo_360" => [
+ "display" => "Vimeo 360°", // &filter_vimeo_360
+ "option" => [
+ "any" => "Any",
+ "spatial" => "Spatial",
+ "360" => "360°"
+ ]
+ ],
+ "price" => [ // &filter_price=
+ "display" => "Price",
+ "option" => [
+ "any" => "Any price",
+ "free" => "Free",
+ "paid" => "Paid"
+ ]
+ ],
+ "collection" => [
+ "display" => "Vimeo collections",
+ "option" => [
+ "any" => "Any collection",
+ "staff_pick" => "Staff picks" // &filter_staffpicked=true
+ ]
+ ],
+ "license" => [ // &filter_license=
+ "display" => "License",
+ "option" => [
+ "any" => "Any license",
+ "by-nc-nd" => "CC BY-NC-ND",
+ "by" => "CC BY",
+ "by-nc" => "CC BY-NC",
+ "by-nc-sa" => "CC BY-NC-SA",
+ "by-nd" => "CC BY-ND",
+ "by-sa" => "CC BY-SA",
+ "cc0" => "CC0"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = [], $jwt = false){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ if($jwt === false){
+
+ curl_setopt(
+ $curlproc,
+ CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "Referer: https://vimeo.com/search",
+ "X-Requested-With: XMLHttpRequest",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "Priority: u=4"]
+ );
+
+ }else{
+
+ curl_setopt(
+ $curlproc,
+ CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: application/vnd.vimeo.*+json;version=3.3",
+ "Accept-Language: en",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "Referer: https://vimeo.com/",
+ "Content-Type: application/json",
+ "Authorization: jwt $jwt",
+ "Vimeo-Page: /search/[[...slug]]",
+ "Origin: https://vimeo.com",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-site",
+ "Priority: u=4"]
+ );
+ }
+
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function video($get){
+
+ // parse shit
+ if($get["npt"]){
+
+ [$npt, $proxy] =
+ $this->backend
+ ->get(
+ $get["npt"],
+ "videos"
+ );
+
+ $npt = json_decode($npt, true);
+ $pagetype = $npt["pagetype"];
+ $npt = $npt["npt"];
+
+ $jwt = $this->get_jwt($proxy);
+
+ try{
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://api.vimeo.com" . $npt,
+ [],
+ $jwt
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ }else{
+
+ $proxy = null;
+ $jwt = $this->get_jwt($proxy); // this gives us a proxy by reference
+
+ // parse filters
+ $npt = [
+ "query" => $get["s"],
+ "page" => 1,
+ "per_page" => 24,
+ "facets" => "type"
+ ];
+
+ switch($get["display"]){
+
+ case "video":
+ $npt["filter_type"] = "clip";
+ $npt["fields"] = "clip.name,stats.plays,clip.pictures,clip.user.name,clip.user.link,clip.user.pictures.sizes,clip.uri,clip.stats.plays,clip.duration,clip.created_time,clip.link,clip.description";
+ break;
+
+ case "ondemand":
+ $npt["filter_type"] = "ondemand";
+ $npt["sizes"] = "296x744";
+ $npt["fields"] = "ondemand.link,ondemand.name,ondemand.pictures.sizes,ondemand.metadata.interactions.buy,ondemand.metadata.interactions.rent,ondemand.uri";
+ break;
+
+ case "people":
+ $npt["filter_type"] = "people";
+ $npt["fetch_user_profile"] = "1";
+ $npt["fields"] = "people.name,people.location_details.formatted_address,people.metadata.public_videos.total,people.pictures.sizes,people.link,people.metadata.connections.followers.total,people.skills.name,people.skills.uri,people.background_video,people.uri";
+ break;
+
+ case "channel":
+ $npt["filter_type"] = "channel";
+ $npt["fields"] = "channel.name,channel.metadata.connections.users.total,channel.metadata.connections.videos.total,channel.pictures.sizes,channel.link,channel.uri";
+ break;
+
+ case "group":
+ $npt["filter_type"] = "group";
+ $npt["fields"] = "group.name,group.metadata.connections.users.total,group.metadata.connections.videos.total,group.pictures.sizes,group.link,group.uri";
+ break;
+ }
+
+ // only apply filters if we're searching for videos
+ if($get["display"] == "video"){
+
+ switch($get["sort"]){
+
+ case "relevance": break; // do nothing
+
+ case "recent":
+ $npt["sort"] = "latest";
+ $npt["direction"] = "desc";
+ break;
+
+ case "popular":
+ $npt["sort"] = "popularity";
+ $npt["direction"] = "desc";
+ break;
+
+ case "a_z":
+ $npt["sort"] = "alphabetical";
+ $npt["direction"] = "asc";
+ break;
+
+ case "z_a":
+ $npt["sort"] = "alphabetical";
+ $npt["direction"] = "desc";
+ break;
+
+ case "longest":
+ $npt["sort"] = "duration";
+ $npt["direction"] = "desc";
+ break;
+
+ case "shortest":
+ $npt["sort"] = "duration";
+ $npt["direction"] = "asc";
+ break;
+ }
+
+ if($get["time"] != "any"){
+
+ $npt["filter_uploaded"] = $get["time"];
+ }
+
+ if($get["duration"] != "any"){
+
+ $npt["filter_duration"] = $get["duration"];
+ }
+
+ if($get["resolution"] != "any"){
+
+ $npt["filter_resolution"] = $get["resolution"];
+ }
+
+ if($get["category"] != "any"){
+
+ $npt["filter_category"] = $get["category"];
+ }
+
+ if($get["live"] != "any"){
+
+ $npt["filter_live"] = "now";
+ }
+
+ if($get["hdr"] != "any"){
+
+ $npt["filter_hdr"] = $get["hdr"];
+ }
+
+ if($get["vimeo_360"] != "any"){
+
+ $npt["filter_vimeo_360"] = $get["vimeo_360"];
+ }
+
+ if($get["price"] != "any"){
+
+ $npt["filter_price"] = $get["price"];
+ }
+
+ if($get["collection"] == "staff_pick"){
+
+ $npt["filter_staffpicked"] = "true";
+ }
+
+ if($get["license"] != "any"){
+
+ $npt["filter_license"] = $get["license"];
+ }
+ }
+
+ $pagetype = $npt["filter_type"];
+
+ try{
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://api.vimeo.com/search",
+ $npt,
+ $jwt
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to parse JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ if(isset($json["error"])){
+
+ $error = $json["error"];
+ if(isset($json["developer_message"])){
+
+ $error .= " ({$json["developer_message"]})";
+ }
+
+ throw new Exception("Vimeo returned an error: " . $error);
+ }
+
+ if(!isset($json["data"])){
+
+ throw new Exception("Vimeo did not return a data object");
+ }
+
+ switch($pagetype){
+
+ case "clip":
+ foreach($json["data"] as $video){
+
+ $video = $video["clip"];
+
+ if(isset($video["user"]["pictures"]["sizes"])){
+
+ $avatar = $video["user"]["pictures"]["sizes"][count($video["user"]["pictures"]["sizes"]) - 1]["link"];
+ }else{
+
+ $avatar = null;
+ }
+
+ $out["video"][] = [
+ "title" => $video["name"],
+ "description" =>
+ $this->limitstrlen(
+ $video["description"]
+ ),
+ "author" => [
+ "name" => $video["user"]["name"],
+ "url" => $video["user"]["link"],
+ "avatar" => $avatar
+ ],
+ "date" => strtotime($video["created_time"]),
+ "duration" => (int)$video["duration"],
+ "views" => (int)$video["stats"]["plays"],
+ "thumb" => [
+ "ratio" => "16:9",
+ "url" => $video["pictures"]["base_link"]
+ ],
+ "url" => $video["link"]
+ ];
+ }
+ break;
+
+ case "ondemand":
+ foreach($json["data"] as $video){
+
+ $video = $video["ondemand"];
+
+ $description = [];
+ if(isset($video["metadata"]["interactions"]["rent"]["display_price"])){
+
+ $description[] = "Rent for " . $video["metadata"]["interactions"]["rent"]["display_price"];
+ }
+
+ if(isset($video["metadata"]["interactions"]["buy"]["display_price"])){
+
+ $description[] = "Buy for " . $video["metadata"]["interactions"]["buy"]["display_price"];
+ }
+
+ $description = implode(", ", $description);
+
+ $out["video"][] = [
+ "title" => $video["name"],
+ "description" => $description,
+ "author" => [
+ "name" => null,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => null,
+ "duration" => null,
+ "views" => null,
+ "thumb" => [
+ "ratio" => "9:16",
+ "url" => $video["pictures"]["sizes"][0]["link"]
+ ],
+ "url" => $video["link"]
+ ];
+ }
+ break;
+
+ case "people":
+ foreach($json["data"] as $user){
+
+ $user = $user["people"];
+
+ if(
+ isset($user["pictures"]["sizes"]) &&
+ count($user["pictures"]["sizes"]) !== 0
+ ){
+
+ $thumb = [
+ "ratio" => "1:1",
+ "url" => $user["pictures"]["sizes"][count($user["pictures"]["sizes"]) - 1]["link"]
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["author"][] = [
+ "title" => $user["name"],
+ "followers" => (int)$user["metadata"]["connections"]["followers"]["total"],
+ "description" => $user["metadata"]["public_videos"]["total"] . " videos.",
+ "thumb" => $thumb,
+ "url" => $user["link"]
+ ];
+ }
+ break;
+
+ case "channel":
+ case "group":
+ foreach($json["data"] as $channel){
+
+ $channel = $channel[$npt["filter_type"]];
+
+ if(
+ isset($channel["pictures"]["sizes"]) &&
+ count($channel["pictures"]["sizes"]) !== 0
+ ){
+
+ $thumb = [
+ "ratio" => "16:9",
+ "url" => $channel["pictures"]["sizes"][count($channel["pictures"]["sizes"]) - 1]["link"]
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["author"][] = [
+ "title" => $channel["name"],
+ "followers" => (int)$channel["metadata"]["connections"]["users"]["total"],
+ "description" => $channel["metadata"]["connections"]["videos"]["total"] . " videos.",
+ "thumb" => $thumb,
+ "url" => $channel["link"]
+ ];
+ }
+ break;
+ }
+
+ //
+ // get next page
+ //
+ if(
+ isset($json["paging"]["next"]) &&
+ is_string($json["paging"]["next"])
+ ){
+
+ $out["npt"] =
+ $this->backend
+ ->store(
+ json_encode([
+ "npt" => $json["paging"]["next"],
+ "pagetype" => $pagetype
+ ]),
+ "videos",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+ private function get_jwt(&$proxy){
+
+ //
+ // get jwt token
+ // it's probably safe to cache this across proxies, cause the jwt doesnt contain an userID
+ // only an appID, whatever shit that is
+ // we can only cache it for 5 minutes though, otherwise vimeo cries about it
+ //
+ if($proxy === null){
+
+ $proxy = $this->backend->get_ip();
+ }
+
+ $jwt = apcu_fetch("vimeo_jwt");
+
+ if($jwt === false){
+ /*
+ $html =
+ $this->get(
+ $proxy,
+ "https://vimeo.com/search",
+ [],
+ false
+ );
+
+ $this->fuckhtml->load($html);
+
+ $captcha =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "title"
+ );
+
+ if(
+ count($captcha) !== 0 &&
+ $this->fuckhtml
+ ->getTextContent(
+ $captcha[0]
+ ) == "Vimeo / CAPTCHA Challenge"
+ ){
+
+ throw new Exception("Vimeo returned a Captcha");
+ }
+
+ $html =
+ explode(
+ '<script id="viewer-bootstrap" type="application/json">',
+ $html,
+ 2
+ );
+
+ if(count($html) !== 2){
+
+ throw new Exception("Failed to find JWT json");
+ }
+
+ $jwt =
+ json_decode(
+ $this->fuckhtml
+ ->extract_json(
+ $html[1]
+ ),
+ true
+ );
+
+ if($jwt === null){
+
+ throw new Exception("Failed to decode JWT json");
+ }
+
+ if(!isset($jwt["jwt"])){
+
+ throw new Exception("Failed to grep JWT");
+ }
+
+ $jwt = $jwt["jwt"];
+ */
+
+ try{
+ $json =
+ $this->get(
+ $proxy,
+ "https://vimeo.com/_next/jwt",
+ [],
+ false
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JWT token");
+ }
+
+ $this->fuckhtml->load($json);
+
+ $captcha =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "title"
+ );
+
+ if(
+ count($captcha) !== 0 &&
+ $this->fuckhtml
+ ->getTextContent(
+ $captcha[0]
+ ) == "Vimeo / CAPTCHA Challenge"
+ ){
+
+ throw new Exception("Vimeo returned a Captcha");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("The JWT object could not be decoded");
+ }
+
+ if(!isset($json["token"])){
+
+ throw new Exception("Vimeo did not return a JWT");
+ }
+
+ $jwt = $json["token"];
+
+ apcu_store("vimeo_jwt", $jwt, 300);
+ }
+
+ return $jwt;
+ }
+
+ private function titledots($title){
+
+ $substr = substr($title, -3);
+
+ if(
+ $substr == "..." ||
+ $substr == "…"
+ ){
+
+ return trim(substr($title, 0, -3), " \n\r\t\v\x00\0\x0B\xc2\xa0");
+ }
+
+ return trim($title, " \n\r\t\v\x00\0\x0B\xc2\xa0");
+ }
+
+ private function limitstrlen($text){
+
+ return
+ explode(
+ "\n",
+ wordwrap(
+ str_replace(
+ ["\n\r", "\r\n", "\n", "\r"],
+ " ",
+ $text
+ ),
+ 300,
+ "\n"
+ ),
+ 2
+ )[0];
+ }
+}
diff --git a/scraper/vsco.php b/scraper/vsco.php
new file mode 100644
index 0000000..8a7f057
--- /dev/null
+++ b/scraper/vsco.php
@@ -0,0 +1,257 @@
+<?php
+
+class vsco{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("vsco");
+ }
+
+ public function getfilters($page){
+
+ return [];
+ }
+
+ private function get($proxy, $url, $get = [], $bearer = null){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get_tmp = http_build_query($get);
+ $url .= "?" . $get_tmp;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ if($bearer === null){
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: same-origin",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=0, i",
+ "TE: trailers"]
+ );
+ }else{
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: */*",
+ "Accept-Language: en-US",
+ "Accept-Encoding: gzip",
+ "Referer: https://vsco.co/search/images/" . urlencode($get["query"]),
+ "authorization: Bearer " . $bearer,
+ "content-type: application/json",
+ "x-client-build: 1",
+ "x-client-platform: web",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "Priority: u=0",
+ "TE: trailers"]
+ );
+ }
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ // http2 bypass
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$data, $proxy] =
+ $this->backend->get(
+ $get["npt"], "images"
+ );
+
+ $data = json_decode($data, true);
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ // get bearer token
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://vsco.co/feed"
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch feed page");
+ }
+
+ preg_match(
+ '/"tkn":"([A-z0-9]+)"/',
+ $html,
+ $bearer
+ );
+
+ if(!isset($bearer[1])){
+
+ throw new Exception("Failed to grep bearer token");
+ }
+
+ $data = [
+ "pagination" => [
+ "query" => $search,
+ "page" => 0,
+ "size" => 100
+ ],
+ "bearer" => $bearer[1]
+ ];
+ }
+
+ try{
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://vsco.co/api/2.0/search/images",
+ $data["pagination"],
+ $data["bearer"]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ if(!isset($json["results"])){
+
+ throw new Exception("Failed to access results object");
+ }
+
+ foreach($json["results"] as $image){
+
+ $image_domain = parse_url("https://" . $image["responsive_url"], PHP_URL_HOST);
+ $thumbnail = explode($image_domain, $image["responsive_url"], 2)[1];
+
+ if(substr($thumbnail, 0, 3) != "/1/"){
+
+ $thumbnail =
+ preg_replace(
+ '/^\/[^\/]+/',
+ "",
+ $thumbnail
+ );
+ }
+
+ $thumbnail = "https://img.vsco.co/cdn-cgi/image/width=480,height=360" . $thumbnail;
+ $size =
+ $this->image_ratio(
+ (int)$image["dimensions"]["width"],
+ (int)$image["dimensions"]["height"]
+ );
+
+ $out["image"][] = [
+ "title" => $image["description"],
+ "source" => [
+ [
+ "url" => "https://" . $image["responsive_url"],
+ "width" => (int)$image["dimensions"]["width"],
+ "height" => (int)$image["dimensions"]["height"]
+ ],
+ [
+ "url" => $thumbnail,
+ "width" => $size[0],
+ "height" => $size[1]
+ ]
+ ],
+ "url" => "https://" . $image["grid"]["domain"] . "/media/" . $image["imageId"]
+ ];
+ }
+
+ // get NPT
+ $max_page = ceil($json["total"] / 100);
+ $data["pagination"]["page"]++;
+
+ if($max_page > $data["pagination"]["page"]){
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($data),
+ "images",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+ private function image_ratio($width, $height){
+
+ $ratio = [
+ 480 / $width,
+ 360 / $height
+ ];
+
+ if($ratio[0] < $ratio[1]){
+
+ $ratio = $ratio[0];
+ }else{
+
+ $ratio = $ratio[1];
+ }
+
+ return [
+ floor($width * $ratio),
+ floor($height * $ratio)
+ ];
+ }
+}
diff --git a/scraper/wiby.php b/scraper/wiby.php
new file mode 100644
index 0000000..59f723c
--- /dev/null
+++ b/scraper/wiby.php
@@ -0,0 +1,246 @@
+<?php
+
+class wiby{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("wiby");
+ }
+
+ public function getfilters($page){
+
+ if($page != "web"){
+
+ return [];
+ }
+
+ return [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ],
+ "date" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "day" => "Past day",
+ "week" => "Past week",
+ "month" => "Past month",
+ "year" => "Past year",
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = [], $nsfw){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Cookie: ws={$nsfw}",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ [$q, $proxy] = $this->backend->get($get["npt"], "web");
+ $q = json_decode($q, true);
+
+ $nsfw = $q["nsfw"];
+ unset($q["nsfw"]);
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $date = $get["date"];
+ $nsfw = $get["nsfw"] == "yes" ? "0" : "1";
+
+ $search =
+ str_replace(
+ [
+ "!g",
+ "!gi",
+ "!gv",
+ "!gm",
+ "!b",
+ "!bi",
+ "!bv",
+ "!bm",
+ "!td",
+ "!tw",
+ "!tm",
+ "!ty",
+ "&g",
+ "&gi",
+ "&gv",
+ "&gm",
+ "&b",
+ "&bi",
+ "&bv",
+ "&bm",
+ "&td",
+ "&tw",
+ "&tm",
+ "&ty",
+ ],
+ "",
+ $search
+ );
+
+ switch($date){
+
+ case "day": $search = "!td " . $search; break;
+ case "week": $search = "!tw " . $search; break;
+ case "month": $search = "!tm " . $search; break;
+ case "year": $search = "!ty " . $search; break;
+ }
+
+ $q = [
+ "q" => $search
+ ];
+ }
+
+ try{
+ $html = $this->get(
+ $proxy,
+ "https://wiby.me/",
+ $q,
+ $nsfw
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ preg_match(
+ '/<p class="pin"><blockquote>(?:<\/p>)?<br><a class="more" href="\/\?q=[^"]+&p=([0-9]+)">Find more\.\.\.<\/a><\/blockquote>/',
+ $html,
+ $nextpage
+ );
+
+ if(count($nextpage) === 0){
+
+ $nextpage = null;
+ }else{
+
+ $nextpage =
+ $this->backend->store(
+ json_encode([
+ "q" => $q["q"],
+ "p" => (int)$nextpage[1],
+ "nsfw" => $nsfw
+ ]),
+ "web",
+ $proxy
+ );
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => $nextpage,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ preg_match_all(
+ '/<blockquote>[\s]*<a .* href="(.*)">(.*)<\/a>.*<p>(.*)<\/p>[\s]*<\/blockquote>/Ui',
+ $html,
+ $links
+ );
+
+ for($i=0; $i<count($links[0]); $i++){
+
+ $out["web"][] = [
+ "title" => $this->unescapehtml(trim($links[2][$i])),
+ "description" => $this->unescapehtml(trim(strip_tags($links[3][$i]), ".\n\r ")),
+ "url" => trim($links[1][$i]),
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+
+ private function unescapehtml($str){
+
+ return html_entity_decode(
+ str_replace(
+ [
+ "<br>",
+ "<br/>",
+ "</br>",
+ "<BR>",
+ "<BR/>",
+ "</BR>",
+ ],
+ "\n",
+ $str
+ ),
+ ENT_QUOTES | ENT_XML1, 'UTF-8'
+ );
+ }
+}
diff --git a/scraper/yandex.php b/scraper/yandex.php
new file mode 100644
index 0000000..f73c3fd
--- /dev/null
+++ b/scraper/yandex.php
@@ -0,0 +1,1248 @@
+<?php
+
+class yandex{
+
+ /*
+ curl functions
+ */
+ public function __construct(){
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ include "lib/backend.php";
+ // backend included in the scraper functions
+ }
+
+ private function get($proxy, $url, $get = [], $nsfw, $get_cookie = 1){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ // extract "i" cookie
+ if($get_cookie === 0){
+
+ $cookies_tmp = [];
+ curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
+
+ $length = strlen($header);
+
+ $header = explode(":", $header, 2);
+
+ if(trim(strtolower($header[0])) == "set-cookie"){
+
+ $cookie_tmp = explode("=", trim($header[1]), 2);
+
+ $cookies_tmp[trim($cookie_tmp[0])] =
+ explode(";", $cookie_tmp[1], 2)[0];
+ }
+
+ return $length;
+ });
+ }
+
+ switch($nsfw){
+ case "yes": $nsfw = "0"; break;
+ case "maybe": $nsfw = "1"; break;
+ case "no": $nsfw = "2"; break;
+ }
+
+ switch($get_cookie){
+
+ case 0:
+ $cookie = "";
+ break;
+
+ case 1:
+ $cookie = "Cookie: yp=" . (time() - 4000033) . ".szm.1:1920x1080:876x1000#" . time() . ".sp.family:" . $nsfw;
+ break;
+
+ default:
+ $cookie = "Cookie: i=" . $get_cookie;
+ }
+
+ $headers =
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Encoding: gzip",
+ "Accept-Language: en-US,en;q=0.5",
+ "DNT: 1",
+ $cookie,
+ "Referer: https://yandex.com/images/search",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: cross-site",
+ "Upgrade-Insecure-Requests: 1"];
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if($get_cookie === 0){
+
+ if(isset($cookies_tmp["i"])){
+
+ return $cookies_tmp["i"];
+ }else{
+
+ throw new Exception("Failed to get Yandex clearance cookie");
+ }
+ }
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function getfilters($pagetype){
+
+ switch($pagetype){
+
+ case "web":
+ return [
+ "lang" => [
+ "display" => "Language",
+ "option" => [
+ "any" => "Any language",
+ "en" => "English",
+ "ru" => "Russian",
+ "be" => "Belorussian",
+ "fr" => "French",
+ "de" => "German",
+ "id" => "Indonesian",
+ "kk" => "Kazakh",
+ "tt" => "Tatar",
+ "tr" => "Turkish",
+ "uk" => "Ukrainian"
+ ]
+ ],
+ "newer" => [
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ]
+ ];
+ break;
+
+ case "images":
+ return
+ [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "time" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "week" => "Last week"
+ ]
+ ],
+ "size" => [
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "small" => "Small",
+ "medium" => "Medium",
+ "large" => "Large",
+ "wallpaper" => "Wallpaper"
+ ]
+ ],
+ "color" => [
+ "display" => "Colors",
+ "option" => [
+ "any" => "All colors",
+ "color" => "Color images only",
+ "gray" => "Black and white",
+ "red" => "Red",
+ "orange" => "Orange",
+ "yellow" => "Yellow",
+ "cyan" => "Cyan",
+ "green" => "Green",
+ "blue" => "Blue",
+ "violet" => "Purple",
+ "white" => "White",
+ "black" => "Black"
+ ]
+ ],
+ "type" => [
+ "display" => "Type",
+ "option" => [
+ "any" => "All types",
+ "photo" => "Photos",
+ "clipart" => "White background",
+ "lineart" => "Drawings and sketches",
+ "face" => "People",
+ "demotivator" => "Demotivators"
+ ]
+ ],
+ "layout" => [
+ "display" => "Layout",
+ "option" => [
+ "any" => "All layouts",
+ "horizontal" => "Horizontal",
+ "vertical" => "Vertical",
+ "square" => "Square"
+ ]
+ ],
+ "format" => [
+ "display" => "Format",
+ "option" => [
+ "any" => "Any format",
+ "jpeg" => "JPEG",
+ "png" => "PNG",
+ "gif" => "GIF"
+ ]
+ ]
+ ];
+ break;
+
+ case "videos":
+ return [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "time" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "9" => "Recently"
+ ]
+ ],
+ "duration" => [
+ "display" => "Duration",
+ "option" => [
+ "any" => "Any duration",
+ "short" => "Short"
+ ]
+ ]
+ ];
+ break;
+ }
+ }
+
+ public function web($get){
+
+ $this->backend = new backend("yandex_w");
+
+ // has captcha
+ // https://yandex.com/search/touch/?text=lol&app_platform=android&appsearch_header=1&ui=webmobileapp.yandex&app_version=23070603&app_id=ru.yandex.searchplugin&search_source=yandexcom_touch_native&clid=2218567
+
+ // https://yandex.com/search/site/?text=minecraft&web=1&frame=1&v=2.0&searchid=3131712
+ // &within=777&from_day=26&from_month=8&from_year=2023&to_day=26&to_month=8&to_year=2023
+
+ // get clearance cookie
+ if(($cookie = apcu_fetch("yandexweb_cookie")) === false){
+
+ $proxy = $this->backend->get_ip();
+
+ $cookie =
+ $this->get(
+ $proxy,
+ "https://yandex.ru/support2/smart-captcha/ru/",
+ [],
+ false,
+ 0
+ );
+
+ apcu_store("yandexweb_cookie", $cookie);
+ }
+
+ if($get["npt"]){
+
+ [$npt, $proxy] = $this->backend->get($get["npt"], "web");
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://yandex.com" . $npt,
+ [],
+ "yes",
+ $cookie
+ );
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = !isset($proxy) ? $this->backend->get_ip() : $proxy;
+ $lang = $get["lang"];
+ $older = $get["older"];
+ $newer = $get["newer"];
+
+ $params = [
+ "text" => $search,
+ "web" => "1",
+ "frame" => "1",
+ "searchid" => "3131712"
+ ];
+
+ if($lang != "any"){
+
+ $params["lang"] = $lang;
+ }
+
+ if(
+ $newer === false &&
+ $older !== false
+ ){
+
+ $newer = 0;
+ }
+
+ if($newer !== false){
+
+ $params["from_day"] = date("j", $newer);
+ $params["from_month"] = date("n", $newer);
+ $params["from_year"] = date("Y", $newer);
+
+ if($older === false){
+
+ $older = time();
+ }
+
+ $params["to_day"] = date("j", $older);
+ $params["to_month"] = date("n", $older);
+ $params["to_year"] = date("Y", $older);
+ }
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://yandex.com/search/site/",
+ $params,
+ "yes",
+ $cookie
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Could not get search page");
+ }
+
+ /*
+ $handle = fopen("scraper/yandex.html", "r");
+ $html = fread($handle, filesize("scraper/yandex.html"));
+ fclose($handle);*/
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $this->fuckhtml->load($html);
+
+ // Scrape page blocked error
+ $title =
+ $this->fuckhtml
+ ->getElementsByTagName("title");
+
+ if(
+ count($title) !== 0 &&
+ $title[0]["innerHTML"] == "403"
+ ){
+
+ throw new Exception("Yandex blocked this proxy or 4get instance.");
+ }
+
+ // get nextpage
+ $npt =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "b-pager__next",
+ "a"
+ );
+
+ if(count($npt) !== 0){
+
+ $out["npt"] =
+ $this->backend->store(
+ $this->fuckhtml
+ ->getTextContent(
+ $npt
+ [0]
+ ["attributes"]
+ ["href"]
+ ),
+ "web",
+ $proxy
+ );
+ }
+
+ // get items
+ $items =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "b-serp-item",
+ "li"
+ );
+
+ foreach($items as $item){
+
+ $this->fuckhtml->load($item);
+
+ $link =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "b-serp-item__title-link",
+ "a"
+ )[0];
+
+ $out["web"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $link
+ )
+ ),
+ "description" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "b-serp-item__text",
+ "div"
+ )[0]
+ )
+ ),
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $link
+ ["attributes"]
+ ["href"]
+ ),
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+
+ public function image($get){
+
+ $this->backend = new backend("yandex_i");
+
+ if($get["npt"]){
+
+ [$request, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "images"
+ );
+
+ $request = json_decode($request, true);
+
+ $nsfw = $request["nsfw"];
+ unset($request["nsfw"]);
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $nsfw = $get["nsfw"];
+ $time = $get["time"];
+ $size = $get["size"];
+ $color = $get["color"];
+ $type = $get["type"];
+ $layout = $get["layout"];
+ $format = $get["format"];
+ /*
+ $handle = fopen("scraper/yandex.json", "r");
+ $json = fread($handle, filesize("scraper/yandex.json"));
+ fclose($handle);*/
+
+ // SIZE
+ // large
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=large&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // medium
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=medium&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // small
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=small&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // ORIENTATION
+ // Horizontal
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=horizontal&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Vertical
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=vertical&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Square
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=square&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // TYPE
+ // Photos
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=photo&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // White background
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=clipart&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Drawings and sketches
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=lineart&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // People
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=face&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Demotivators
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=demotivator&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // COLOR
+ // Color images only
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=color&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Black and white
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=gray&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Red
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=red&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Orange
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=orange&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Yellow
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=yellow&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Cyan
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=cyan&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Green
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=green&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Blue
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=blue&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Purple
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=violet&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // White
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=white&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Black
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=black&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // FORMAT
+ // jpeg
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=jpg&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // png
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=png&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // gif
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=gifan&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // RECENT
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&recent=7D&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // WALLPAPER
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=wallpaper&text=minecraft&wp=wh16x9_1920x1080&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+
+ $request = [
+ "format" => "json",
+ "request" => [
+ "blocks" => [
+ [
+ "block" => "extra-content",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "i-global__params:ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "search2:ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "preview__isWallpaper",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "content_type_search",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "serp-controller",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "cookies_ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "advanced-search-block",
+ "params" => (object)[],
+ "version" => 2
+ ]
+ ],
+ "metadata" => [
+ "bundles" => [
+ "lb" => "AS?(E<X120"
+ ],
+ "assets" => [
+ // las base
+ "las" => "justifier-height=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;"
+
+ // las default
+ //"las" => "justifier-height=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;227.0=1;203.0=1;76fe94.0=1;215f96.0=1;75.0=1"
+ ],
+ "extraContent" => [
+ "names" => [
+ "i-react-ajax-adapter"
+ ]
+ ]
+ ]
+ ]
+ ];
+
+ /*
+ Apply filters
+ */
+ if($time == "week"){
+ $request["recent"] = "7D";
+ }
+
+ if($size != "any"){
+
+ $request["isize"] = $size;
+ }
+
+ if($type != "any"){
+
+ $request["type"] = $type;
+ }
+
+ if($color != "any"){
+
+ $request["icolor"] = $color;
+ }
+
+ if($layout != "any"){
+
+ $request["iorient"] = $layout;
+ }
+
+ if($format != "any"){
+
+ $request["itype"] = $format;
+ }
+
+ $request["text"] = $search;
+ $request["uinfo"] = "sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080";
+
+ $request["request"] = json_encode($request["request"]);
+ }
+
+ try{
+ $json = $this->get(
+ $proxy,
+ "https://yandex.com/images/search",
+ $request,
+ $nsfw,
+ "yandex_i"
+ );
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get JSON");
+ }
+
+ /*
+ $handle = fopen("scraper/yandex.json", "r");
+ $json = fread($handle, filesize("scraper/yandex.json"));
+ fclose($handle);*/
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ if(
+ isset($json["type"]) &&
+ $json["type"] == "captcha"
+ ){
+
+ throw new Exception("Yandex blocked this 4get instance. Please try again in ~7 minutes.");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ // get html
+ $html = "";
+ foreach($json["blocks"] as $block){
+
+ $html .= $block["html"];
+ // get next page
+ if(
+ isset($block["params"]["nextPageUrl"]) &&
+ !empty($block["params"]["nextPageUrl"])
+ ){
+
+ $request["nsfw"] = $nsfw;
+
+ if(isset($request["p"])){
+
+ $request["p"]++;
+ }else{
+
+ $request["p"] = 1;
+ }
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($request),
+ "images",
+ $proxy
+ );
+ }
+ }
+
+ $this->fuckhtml->load($html);
+
+ // get search results
+ $data = null;
+
+ foreach(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "Root",
+ "div"
+ ) as $div
+ ){
+
+ if(isset($div["attributes"]["data-state"])){
+
+ $tmp = json_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $div["attributes"]["data-state"]
+ ),
+ true
+ );
+
+ if(isset($tmp["initialState"]["serpList"])){
+
+ $data = $tmp;
+ break;
+ }
+ }
+ }
+
+ if($data === null){
+
+ throw new Exception("Failed to extract JSON");
+ }
+
+ foreach($data["initialState"]["serpList"]["items"]["entities"] as $image){
+
+ $title = [html_entity_decode($image["snippet"]["title"], ENT_QUOTES | ENT_HTML5)];
+
+ if(isset($image["snippet"]["text"])){
+
+ $title[] = html_entity_decode($image["snippet"]["text"], ENT_QUOTES | ENT_HTML5);
+ }
+
+ $tmp = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->titledots(
+ implode(": ", $title)
+ )
+ ),
+ "source" => [],
+ "url" => htmlspecialchars_decode($image["snippet"]["url"])
+ ];
+
+ // add preview URL
+ $tmp["source"][] = [
+ "url" => htmlspecialchars_decode($image["viewerData"]["preview"][0]["url"]),
+ "width" => (int)$image["viewerData"]["preview"][0]["w"],
+ "height" => (int)$image["viewerData"]["preview"][0]["h"],
+ ];
+
+ foreach($image["viewerData"]["dups"] as $dup){
+
+ $tmp["source"][] = [
+ "url" => htmlspecialchars_decode($dup["url"]),
+ "width" => (int)$dup["w"],
+ "height" => (int)$dup["h"],
+ ];
+ }
+
+ $tmp["source"][] = [
+ "url" =>
+ preg_replace(
+ '/^\/\//',
+ "https://",
+ htmlspecialchars_decode($image["viewerData"]["thumb"]["url"])
+ ),
+ "width" => (int)$image["viewerData"]["thumb"]["w"],
+ "height" => (int)$image["viewerData"]["thumb"]["h"]
+ ];
+
+ $out["image"][] = $tmp;
+ }
+
+ return $out;
+ }
+
+ public function video($get){
+
+ $this->backend = new backend("yandex_v");
+
+ if($get["npt"]){
+
+ [$params, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "video"
+ );
+
+ $params = json_decode($params, true);
+
+ $nsfw = $params["nsfw"];
+ unset($params["nsfw"]);
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $nsfw = $get["nsfw"];
+ $time = $get["time"];
+ $duration = $get["duration"];
+
+ // https://yandex.com/video/search
+ // ?tmpl_version=releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63
+ // &format=json
+ // &request=
+ // {
+ // "blocks":[
+ // {"block":"extra-content","params":{},"version":2},
+ // {"block":"i-global__params:ajax","params":{},"version":2},
+ // {"block":"search2:ajax","params":{},"version":2},
+ // {"block":"vital-incut","params":{},"version":2},
+ // {"block":"content_type_search","params":{},"version":2},
+ // {"block":"serp-controller","params":{},"version":2},
+ // {"block":"cookies_ajax","params":{},"version":2}
+ // ],
+ // "metadata":{
+ // "bundles":{"lb":"^G]!q<X120"},
+ // "assets":{"las":"react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"},
+ // "extraContent":{"names":["i-react-ajax-adapter"]}
+ // }
+ // }
+ // &yu=4861394161661655015
+ // &from=tabbar
+ // &reqid=1693106278500184-6825210746979814879-balancer-l7leveler-kubr-yp-sas-7-BAL-4237
+ // &suggest_reqid=486139416166165501562797413447032
+ // &text=minecraft
+
+ $params = [
+ "tmpl_version" => "releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63",
+ "format" => "json",
+ "request" => json_encode([
+ "blocks" => [
+ (object)[
+ "block" => "extra-content",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "i-global__params:ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "search2:ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "vital-incut",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "content_type_search",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "serp-controller",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "cookies_ajax",
+ "params" => (object)[],
+ "version" => 2
+ ]
+ ],
+ "metadata" => (object)[
+ "bundles" => (object)[
+ "lb" => "^G]!q<X120"
+ ],
+ "assets" => (object)[
+ "las" => "react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"
+ ],
+ "extraContent" => (object)[
+ "names" => [
+ "i-react-ajax-adapter"
+ ]
+ ]
+ ]
+ ]),
+ "text" => $search
+ ];
+
+ if($duration != "any"){
+
+ $params["duration"] = $duration;
+ }
+
+ if($time != "any"){
+
+ $params["within"] = $time;
+ }
+ }
+ /*
+ $handle = fopen("scraper/yandex-video.json", "r");
+ $json = fread($handle, filesize("scraper/yandex-video.json"));
+ fclose($handle);
+ */
+ try{
+ $json =
+ $this->get(
+ $proxy,
+ "https://yandex.com/video/search",
+ $params,
+ $nsfw,
+ "yandex_v"
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Could not parse JSON");
+ }
+
+ if(!isset($json["blocks"])){
+
+ throw new Exception("Yandex blocked this 4get instance. Please try again in 7~ minutes.");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ $html = null;
+ foreach($json["blocks"] as $block){
+
+ if(isset($block["html"])){
+
+ $html .= $block["html"];
+ }
+ }
+
+ $this->fuckhtml->load($html);
+
+ $div =
+ $this->fuckhtml
+ ->getElementsByTagName("div");
+
+ /*
+ Get nextpage
+ */
+ $npt =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "more more_direction_next i-bem",
+ $div
+ );
+
+ if(count($npt) !== 0){
+
+ $params["p"] = "1";
+ $params["nsfw"] = $nsfw;
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($params),
+ "video",
+ $proxy
+ );
+ }
+
+ $items =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "serp-item",
+ $div
+ );
+
+ foreach($items as $item){
+
+ $data =
+ json_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $item["attributes"]["data-video"]
+ ),
+ true
+ );
+
+ $this->fuckhtml->load($item);
+
+ $thumb =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "thumb-image__image",
+ "img"
+ );
+
+ $c = 1;
+ if(count($thumb) === 0){
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }else{
+
+ $thumb = [
+ "url" =>
+ str_replace(
+ "//",
+ "https://",
+ $this->fuckhtml
+ ->getTextContent(
+ $thumb
+ [0]
+ ["attributes"]
+ ["src"]
+ ),
+ $c
+ ),
+ "ratio" => "16:9"
+ ];
+ }
+
+ $smallinfos =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "serp-item__sitelinks-item",
+ "div"
+ );
+
+ $date = null;
+ $views = null;
+ $first = true;
+
+ foreach($smallinfos as $info){
+
+ if($first){
+
+ $first = false;
+ continue;
+ }
+
+ $info =
+ $this->fuckhtml
+ ->getTextContent(
+ $info
+ );
+
+ if($temp_date = strtotime($info)){
+
+ $date = $temp_date;
+ }else{
+
+ $views = $this->parseviews($info);
+ }
+ }
+
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "serp-item__text serp-item__text_visibleText_always",
+ "div"
+ );
+
+ if(count($description) === 0){
+
+ $description = null;
+ }else{
+
+ $description =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ )
+ );
+ }
+
+ $out["video"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->titledots(
+ $data["title"]
+ )
+ ),
+ "description" => $description,
+ "author" => [
+ "name" => null,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => $date,
+ "duration" =>
+ (int)$data
+ ["counters"]
+ ["toHostingLoaded"]
+ ["stredParams"]
+ ["duration"],
+ "views" => $views,
+ "thumb" => $thumb,
+ "url" =>
+ str_replace(
+ "http://",
+ "https://",
+ $this->fuckhtml
+ ->getTextContent(
+ $data["counters"]
+ ["toHostingLoaded"]
+ ["postfix"]
+ ["href"]
+ ),
+ $c
+ )
+ ];
+ }
+
+ return $out;
+ }
+
+ private function parseviews($text){
+
+ $text = explode(" ", $text);
+
+ $num = (float)$text[0];
+ $mod = $text[1];
+
+ switch($mod){
+
+ case "bln.": $num = $num * 1000000000; break;
+ case "mln.": $num = $num * 1000000; break;
+ case "thsd.": $num = $num * 1000; break;
+ }
+
+ return $num;
+ }
+
+ private function titledots($title){
+
+ $substr = substr($title, -3);
+
+ if(
+ $substr == "..." ||
+ $substr == "…"
+ ){
+
+ return trim(substr($title, 0, -3));
+ }
+
+ return trim($title);
+ }
+}
diff --git a/scraper/yep.php b/scraper/yep.php
new file mode 100644
index 0000000..bfe347f
--- /dev/null
+++ b/scraper/yep.php
@@ -0,0 +1,741 @@
+<?php
+
+class yep{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("yep");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "all" => "All regions",
+ "af" => "Afghanistan",
+ "al" => "Albania",
+ "dz" => "Algeria",
+ "as" => "American Samoa",
+ "ad" => "Andorra",
+ "ao" => "Angola",
+ "ai" => "Anguilla",
+ "ag" => "Antigua and Barbuda",
+ "ar" => "Argentina",
+ "am" => "Armenia",
+ "aw" => "Aruba",
+ "au" => "Australia",
+ "at" => "Austria",
+ "az" => "Azerbaijan",
+ "bs" => "Bahamas",
+ "bh" => "Bahrain",
+ "bd" => "Bangladesh",
+ "bb" => "Barbados",
+ "by" => "Belarus",
+ "be" => "Belgium",
+ "bz" => "Belize",
+ "bj" => "Benin",
+ "bt" => "Bhutan",
+ "bo" => "Bolivia",
+ "ba" => "Bosnia and Herzegovina",
+ "bw" => "Botswana",
+ "br" => "Brazil",
+ "bn" => "Brunei Darussalam",
+ "bg" => "Bulgaria",
+ "bf" => "Burkina Faso",
+ "bi" => "Burundi",
+ "cv" => "Cabo Verde",
+ "kh" => "Cambodia",
+ "cm" => "Cameroon",
+ "ca" => "Canada",
+ "ky" => "Cayman Islands",
+ "cf" => "Central African Republic",
+ "td" => "Chad",
+ "cl" => "Chile",
+ "cn" => "China",
+ "co" => "Colombia",
+ "cg" => "Congo",
+ "cd" => "Congo, Democratic Republic",
+ "ck" => "Cook Islands",
+ "cr" => "Costa Rica",
+ "hr" => "Croatia",
+ "cu" => "Cuba",
+ "cy" => "Cyprus",
+ "cz" => "Czechia",
+ "ci" => "Côte d'Ivoire",
+ "dk" => "Denmark",
+ "dj" => "Djibouti",
+ "dm" => "Dominica",
+ "do" => "Dominican Republic",
+ "ec" => "Ecuador",
+ "eg" => "Egypt",
+ "sv" => "El Salvador",
+ "gq" => "Equatorial Guinea",
+ "ee" => "Estonia",
+ "et" => "Ethiopia",
+ "fo" => "Faroe Islands",
+ "fj" => "Fiji",
+ "fi" => "Finland",
+ "fr" => "France",
+ "gf" => "French Guiana",
+ "pf" => "French Polynesia",
+ "ga" => "Gabon",
+ "gm" => "Gambia",
+ "ge" => "Georgia",
+ "de" => "Germany",
+ "gh" => "Ghana",
+ "gi" => "Gibraltar",
+ "gr" => "Greece",
+ "gl" => "Greenland",
+ "gd" => "Grenada",
+ "gp" => "Guadeloupe",
+ "gu" => "Guam",
+ "gt" => "Guatemala",
+ "gg" => "Guernsey",
+ "gn" => "Guinea",
+ "gy" => "Guyana",
+ "ht" => "Haiti",
+ "hn" => "Honduras",
+ "hk" => "Hong Kong",
+ "hu" => "Hungary",
+ "is" => "Iceland",
+ "in" => "India",
+ "id" => "Indonesia",
+ "iq" => "Iraq",
+ "ie" => "Ireland",
+ "im" => "Isle of Man",
+ "il" => "Israel",
+ "it" => "Italy",
+ "jm" => "Jamaica",
+ "jp" => "Japan",
+ "je" => "Jersey",
+ "jo" => "Jordan",
+ "kz" => "Kazakhstan",
+ "ke" => "Kenya",
+ "ki" => "Kiribati",
+ "kw" => "Kuwait",
+ "kg" => "Kyrgyzstan",
+ "la" => "Lao People's Democratic Republic",
+ "lv" => "Latvia",
+ "lb" => "Lebanon",
+ "ls" => "Lesotho",
+ "ly" => "Libya",
+ "li" => "Liechtenstein",
+ "lt" => "Lithuania",
+ "lu" => "Luxembourg",
+ "mk" => "Macedonia",
+ "mg" => "Madagascar",
+ "mw" => "Malawi",
+ "my" => "Malaysia",
+ "mv" => "Maldives",
+ "ml" => "Mali",
+ "mt" => "Malta",
+ "mq" => "Martinique",
+ "mr" => "Mauritania",
+ "mu" => "Mauritius",
+ "yt" => "Mayotte",
+ "mx" => "Mexico",
+ "fm" => "Micronesia, Federated States of",
+ "md" => "Moldova",
+ "mc" => "Monaco",
+ "mn" => "Mongolia",
+ "me" => "Montenegro",
+ "ms" => "Montserrat",
+ "ma" => "Morocco",
+ "mz" => "Mozambique",
+ "mm" => "Myanmar",
+ "na" => "Namibia",
+ "nr" => "Nauru",
+ "np" => "Nepal",
+ "nl" => "Netherlands",
+ "nc" => "New Caledonia",
+ "nz" => "New Zealand",
+ "ni" => "Nicaragua",
+ "ne" => "Niger",
+ "ng" => "Nigeria",
+ "nu" => "Niue",
+ "no" => "Norway",
+ "om" => "Oman",
+ "pk" => "Pakistan",
+ "ps" => "Palestine, State of",
+ "pa" => "Panama",
+ "pg" => "Papua New Guinea",
+ "py" => "Paraguay",
+ "pe" => "Peru",
+ "ph" => "Philippines",
+ "pn" => "Pitcairn",
+ "pl" => "Poland",
+ "pt" => "Portugal",
+ "pr" => "Puerto Rico",
+ "qa" => "Qatar",
+ "ro" => "Romania",
+ "ru" => "Russian Federation",
+ "rw" => "Rwanda",
+ "re" => "Réunion",
+ "sh" => "Saint Helena",
+ "kn" => "Saint Kitts and Nevis",
+ "lc" => "Saint Lucia",
+ "vc" => "Saint Vincent and the Grenadines",
+ "ws" => "Samoa",
+ "sm" => "San Marino",
+ "st" => "Sao Tome and Principe",
+ "sa" => "Saudi Arabia",
+ "sn" => "Senegal",
+ "rs" => "Serbia",
+ "sc" => "Seychelles",
+ "sl" => "Sierra Leone",
+ "sg" => "Singapore",
+ "sk" => "Slovakia",
+ "si" => "Slovenia",
+ "sb" => "Solomon Islands",
+ "so" => "Somalia",
+ "kr" => "Sourth Korea",
+ "za" => "South Africa",
+ "es" => "Spain",
+ "lk" => "Sri Lanka",
+ "sr" => "Suriname",
+ "se" => "Sweden",
+ "ch" => "Switzerland",
+ "tw" => "Taiwan",
+ "tj" => "Tajikistan",
+ "tz" => "Tanzania",
+ "th" => "Thailand",
+ "tl" => "Timor-Leste",
+ "tg" => "Togo",
+ "tk" => "Tokelau",
+ "to" => "Tonga",
+ "tt" => "Trinidad and Tobago",
+ "tn" => "Tunisia",
+ "tr" => "Turkey",
+ "tm" => "Turkmenistan",
+ "ug" => "Uganda",
+ "ua" => "Ukraine",
+ "ae" => "United Arab Emirates",
+ "gb" => "United Kingdom",
+ "us" => "United States",
+ "uy" => "Uruguay",
+ "uz" => "Uzbekistan",
+ "vu" => "Vanuatu",
+ "ve" => "Venezuela",
+ "vn" => "Vietnam",
+ "vg" => "Virgin Islands, British",
+ "vi" => "Virgin Islands, U.S.",
+ "ye" => "Yemen",
+ "zm" => "Zambia",
+ "zw" => "Zimbabwe"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = []){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ // use http2
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ // set ciphers
+ curl_setopt(
+ $curlproc,
+ CURLOPT_SSL_CIPHER_LIST,
+ "aes_128_gcm_sha_256,chacha20_poly1305_sha_256,aes_256_gcm_sha_384,ecdhe_ecdsa_aes_128_gcm_sha_256,ecdhe_rsa_aes_128_gcm_sha_256,ecdhe_ecdsa_chacha20_poly1305_sha_256,ecdhe_rsa_chacha20_poly1305_sha_256,ecdhe_ecdsa_aes_256_gcm_sha_384,ecdhe_rsa_aes_256_gcm_sha_384,ecdhe_ecdsa_aes_256_sha,ecdhe_ecdsa_aes_128_sha,ecdhe_rsa_aes_128_sha,ecdhe_rsa_aes_256_sha,rsa_aes_128_gcm_sha_256,rsa_aes_256_gcm_sha_384,rsa_aes_128_sha,rsa_aes_256_sha"
+ );
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "Referer: https://yep.com/",
+ "Origin: https://yep.com",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-site",
+ "Priority: u=4",
+ "TE: trailers"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+
+
+ public function web($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+
+ switch($nsfw){
+
+ case "yes": $nsfw = "off"; break;
+ case "maybe": $nsfw = "moderate"; break;
+ case "no": $nsfw = "strict"; break;
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ try{
+
+ // https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
+ $json =
+ $this->get(
+ $this->backend->get_ip(),
+ "https://api.yep.com/fs/2/search",
+ [
+ "client" => "web",
+ "gl" => $country == "all" ? $country : strtoupper($country),
+ "limit" => "99999",
+ "no_correct" => "false",
+ "q" => $search,
+ "safeSearch" => $nsfw,
+ "type" => "web"
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $this->detect_cf($json);
+
+ $json = json_decode($json, true);
+ //$json = json_decode(file_get_contents("scraper/yep.json"), true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ if(isset($json[1]["correction"])){
+
+ $out["spelling"] = [
+ "type" => "not_many",
+ "using" => $search,
+ "correction" => $json[1]["correction"][1]
+ ];
+ }
+
+ if(isset($json[1]["results"])){
+ foreach($json[1]["results"] as $item){
+
+ switch(strtolower($item["type"])){
+
+ case "organic":
+ $sublinks = [];
+
+ if(isset($item["sitelinks"]["full"])){
+
+ foreach($item["sitelinks"]["full"] as $link){
+
+ $sublinks[] = [
+ "title" => $link["title"],
+ "date" => null,
+ "description" =>
+ $this->titledots(
+ strip_tags(
+ html_entity_decode(
+ $link["snippet"]
+ )
+ )
+ ),
+ "url" => $link["url"]
+ ];
+ }
+ }
+
+ $out["web"][] = [
+ "title" => $item["title"],
+ "description" =>
+ $this->titledots(
+ strip_tags(
+ html_entity_decode(
+ $item["snippet"]
+ )
+ )
+ ),
+ "url" => $item["url"],
+ "date" => strtotime($item["first_seen"]),
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => $sublinks,
+ "table" => []
+ ];
+ break;
+ }
+ }
+ }
+
+ if(isset($json[1]["featured_news"])){
+
+ foreach($json[1]["featured_news"] as $news){
+
+ $out["news"][] = [
+ "title" => $news["title"],
+ "description" =>
+ $this->titledots(
+ strip_tags(
+ html_entity_decode(
+ $news["snippet"]
+ )
+ )
+ ),
+ "date" => strtotime($news["first_seen"]),
+ "thumb" =>
+ isset($news["img"]) ?
+ [
+ "url" => $this->unshiturl($news["img"]),
+ "ratio" => "16:9"
+ ] :
+ [
+ "url" => null,
+ "ratio" => null
+ ],
+ "url" => $news["url"]
+ ];
+ }
+ }
+
+ if(isset($json[1]["featured_images"])){
+
+ foreach($json[1]["featured_images"] as $image){
+
+ if(
+ $image["width"] !== 0 &&
+ $image["height"] !== 0
+ ){
+
+ $thumb_width = $image["width"] >= 260 ? 260 : $image["width"];
+ $thumb_height = ceil($image["height"] * ($thumb_width / $image["width"]));
+
+ $width = $image["width"];
+ $height = $image["height"];
+ }else{
+
+ $thumb_width = null;
+ $thumb_height = null;
+ $width = null;
+ $height = null;
+ }
+
+ $out["image"][] = [
+ "title" => $image["title"],
+ "source" => [
+ [
+ "url" => $image["image_id"],
+ "width" => $width,
+ "height" => $height
+ ],
+ [
+ "url" => $image["src"],
+ "width" => $thumb_width,
+ "height" => $thumb_height
+ ]
+ ],
+ "url" => $image["host_page"]
+ ];
+ }
+ }
+
+ return $out;
+ }
+
+
+
+ public function image($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+
+ switch($nsfw){
+
+ case "yes": $nsfw = "off"; break;
+ case "maybe": $nsfw = "moderate"; break;
+ case "no": $nsfw = "strict"; break;
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ try{
+
+ $json =
+ $this->get(
+ $this->backend->get_ip(), // no nextpage!
+ "https://api.yep.com/fs/2/search",
+ [
+ "client" => "web",
+ "gl" => $country == "all" ? $country : strtoupper($country),
+ "no_correct" => "false",
+ "q" => $search,
+ "safeSearch" => $nsfw,
+ "type" => "images"
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $this->detect_cf($json);
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ if(isset($json[1]["results"])){
+ foreach($json[1]["results"] as $item){
+
+ if(
+ $item["width"] !== 0 &&
+ $item["height"] !== 0
+ ){
+
+ $thumb_width = $item["width"] >= 260 ? 260 : $item["width"];
+ $thumb_height = ceil($item["height"] * ($thumb_width / $item["width"]));
+
+ $width = $item["width"];
+ $height = $item["height"];
+ }else{
+
+ $thumb_width = null;
+ $thumb_height = null;
+ $width = null;
+ $height = null;
+ }
+
+ $out["image"][] = [
+ "title" => $item["title"],
+ "source" => [
+ [
+ "url" => $item["image_id"],
+ "width" => $width,
+ "height" => $height
+ ],
+ [
+ "url" => $item["src"],
+ "width" => $thumb_width,
+ "height" => $thumb_height
+ ]
+ ],
+ "url" => $item["host_page"]
+ ];
+ }
+ }
+
+ return $out;
+ }
+
+
+ public function news($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+
+ switch($nsfw){
+
+ case "yes": $nsfw = "off"; break;
+ case "maybe": $nsfw = "moderate"; break;
+ case "no": $nsfw = "strict"; break;
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "news" => []
+ ];
+
+ try{
+
+ // https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
+ $json =
+ $this->get(
+ $this->backend->get_ip(),
+ "https://api.yep.com/fs/2/search",
+ [
+ "client" => "web",
+ "gl" => $country == "all" ? $country : strtoupper($country),
+ "limit" => "99999",
+ "no_correct" => "false",
+ "q" => $search,
+ "safeSearch" => $nsfw,
+ "type" => "news"
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $this->detect_cf($json);
+
+ $json = json_decode($json, true);
+ //$json = json_decode(file_get_contents("scraper/yep.json"), true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ if(isset($json[1]["results"])){
+ foreach($json[1]["results"] as $item){
+
+ $out["news"][] = [
+ "title" => $item["title"],
+ "author" => null,
+ "description" =>
+ $this->titledots(
+ strip_tags(
+ html_entity_decode(
+ $item["snippet"]
+ )
+ )
+ ),
+ "date" => strtotime($item["first_seen"]),
+ "thumb" =>
+ isset($item["img"]) ?
+ [
+ "url" => $this->unshiturl($item["img"]),
+ "ratio" => "16:9"
+ ] :
+ [
+ "url" => null,
+ "ratio" => null
+ ],
+ "url" => $item["url"]
+ ];
+ }
+ }
+
+ return $out;
+ }
+
+
+ private function detect_cf($payload){
+
+ // detect cloudflare page
+ $this->fuckhtml->load($payload);
+
+ if(
+ count(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "cf-wrapper",
+ "div"
+ )
+ ) !== 0
+ ){
+
+ throw new Exception("Blocked by Cloudflare. Please follow curl-impersonate installation instructions");
+ }
+ }
+
+
+ private function titledots($title){
+
+ $substr = substr($title, -4);
+
+ if(
+ strpos($substr, "...") !== false ||
+ strpos($substr, "…") !== false
+ ){
+
+ return trim(substr($title, 0, -4));
+ }
+
+ return trim($title);
+ }
+
+ private function unshiturl($url){
+
+ $newurl = parse_url($url, PHP_URL_QUERY);
+ parse_str($newurl, $newurl);
+
+ if(isset($newurl["url"])){
+
+ return $newurl["url"];
+ }
+
+ return $url;
+ }
+}
diff --git a/scraper/yt.php b/scraper/yt.php
new file mode 100644
index 0000000..a27fd82
--- /dev/null
+++ b/scraper/yt.php
@@ -0,0 +1,1727 @@
+<?php
+
+//$yt = new youtube();
+//header("Content-Type: application/json");
+//echo json_encode($yt->video("minecraft", null, "today", "any", "any", "live", "relevance"));
+
+class yt{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("yt");
+ }
+
+ public function getfilters($page){
+
+ if($page != "videos"){
+
+ return [];
+ }
+
+ return [
+ "date" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "hour" => "Last hour",
+ "today" => "Today",
+ "week" => "This week",
+ "month" => "This month",
+ "year" => "This year"
+ ]
+ ],
+ "type" => [
+ "display" => "Type",
+ "option" => [
+ "video" => "Video",
+ "channel" => "Channel",
+ "playlist" => "Playlist",
+ "Movie" => "Movie"
+ ]
+ ],
+ "duration" => [
+ "display" => "Duration",
+ "option" => [
+ "any" => "Any duration",
+ "short" => "Short (>4min)",
+ "medium" => "Medium (4-20min)",
+ "long" => "Long (<20min)"
+ ]
+ ],
+ "feature" => [
+ "display" => "Feature",
+ "option" => [
+ "any" => "No features",
+ "live" => "Live",
+ "4k" => "4K",
+ "hd" => "HD",
+ "subtitles" => "Subtitles/CC",
+ "creativecommons" => "Creative Commons",
+ "360" => "VR 360°",
+ "vr180" => "VR 180°",
+ "3d" => "3D",
+ "hdr" => "HDR"
+ ]
+ ],
+ "sort" => [
+ "display" => "Sort by",
+ "option" => [
+ "relevance" => "Relevance",
+ "upload_date" => "Upload date",
+ "view_count" => "View count",
+ "rating" => "Rating"
+ ]
+ ]
+ ];
+ }
+
+ private function ytfilter($date, $type, $duration, $feature, $sort){
+
+ // ------------
+ // INCOMPATIBLE FILTERS
+ // channel,playlist DURATION, FEATURES, SORT BY
+ // Movie Features=[live, subtitles, creative commons, 3d]
+
+ // live, 3D
+ // Type[channel, playlist, movie]
+
+ // UPLOAD DATE, DURATION, 4k, 360, VR180, HDR
+ // Type[channel, playlist]
+
+ // -----------
+
+ // MUST BE TOGETHER
+ // Relevance,upload date Type=Video
+
+ switch($type){
+
+ case "channel":
+ case "playlist":
+ if($duration != "any"){ $duration = "any"; }
+ if($feature != "any"){ $feature = "any"; }
+ if($sort != "any"){ $sort = "any"; }
+ break;
+
+ case "movie":
+ if(
+ in_array(
+ $feature,
+ [
+ "live",
+ "subtitles",
+ "creative_commons",
+ "3d"
+ ],
+ )
+ ){
+
+ $feature = "any";
+ }
+ break;
+ }
+
+ switch($feature){
+
+ case "live":
+ case "3d":
+ if(
+ in_array(
+ $type,
+ [
+ "channel",
+ "playlist",
+ "movie"
+ ],
+ )
+ ){
+
+ $type = "video";
+ }
+ break;
+ }
+
+ if(
+ (
+ $date != "any" ||
+ $duration != "any" ||
+ $feature == "4k" ||
+ $feature == "360" ||
+ $feature == "vr180" ||
+ $feature == "hdr"
+ ) &&
+ (
+ $type == "channel" ||
+ $type == "playlist"
+ )
+ ){
+
+ $type = "video";
+ }
+
+ if(
+ $date == "any" &&
+ $type == "video" &&
+ $duration == "any" &&
+ $feature == "any" &&
+ $sort == "relevance"
+ ){
+
+ return null;
+ }
+
+ //print_r([$date, $type, $duration, $feature, $sort]);
+
+ /*
+ Encode hex data
+ */
+
+ // UPLOAD DATE
+ // hour EgQIARAB 12 04 08 01 10 01
+ // today EgQIAhAB 12 04 08 02 10 01
+ // week EgQIAxAB 12 04 08 03 10 01
+ // month EgQIBBAB 12 04 08 04 10 01
+ // year EgQIBRAB 12 04 08 05 10 01
+
+ // TYPE
+ // video EgIQAQ%253D%253D 12 02 10 01
+ // channel EgIQAg%253D%253D 12 02 10 02
+ // playlist EgIQAw%253D%253D 12 02 10 03
+ // movie EgIQBA%253D%253D 12 02 10 04
+
+ // DURATION
+ // -4min EgIYAQ%253D%253D 12 02 18 01
+ // 4-20min EgIYAw%253D%253D 12 02 18 03
+ // 20+min EgIYAg%253D%253D 12 02 18 02
+
+ // FEATURE
+ // live EgJAAQ%253D%253D 12 02 40 01
+ // 4K EgJwAQ%253D%253D 12 02 70 01
+ // HD EgIgAQ%253D%253D 12 02 20 01
+ // Subtitles/CC EgIoAQ%253D%253D 12 02 28 01
+ // Creative Commons EgIwAQ%253D%253D 12 02 30 01
+ // 360 EgJ4AQ%253D%253D 12 02 78 01
+ // VR180 EgPQAQE%253D 12 03 d0 01 01
+ // 3D EgI4AQ%253D%253D 12 02 38 01
+ // HDR EgPIAQE%253D 12 03 c8 01 01
+ // (location & purchased unused)
+
+ // SORT BY
+ // Relevance CAASAhAB 08 00 12 02 10 01 (is nothing by default)
+ // Upload date CAI%253D 08 02
+ // View count CAM%253D 08 03
+ // Rating CAE%253D 08 01
+
+ // video
+ // 12 02 10 01
+
+ // under 4 minutes
+ // 12 02 18 01
+
+ // video + under 4 minutes
+ // 12 04 10 01 18 01
+
+ // video + under 4 minutes + HD
+ // 08 00 12 06 10 01 18 01 20 01
+
+ // video + under 4 minutes + upload date
+ // 08 02 12 04 10 01 18 01
+
+ // video + under 4 minutes + HD + upload date
+ // 08 02 12 06 10 01 18 01 20 01
+
+ // this year + video + under 4 minutes + HD + upload date
+ // 08 02 12 08 08 05 10 01 18 01 20 01
+
+ // this week + video + over 20 minutes + HD + view count
+ // 08 03 12 08 08 03 10 01 18 02 20 01
+
+ //echo urlencode(urlencode(base64_encode(hex2bin($str))));
+ //echo bin2hex(base64_decode(urldecode(urldecode("CAI%253D"))));
+
+ // week + video + 20min + rating
+ // 08 01 12 06 08 03 10 01 18 02
+
+ // week + video + 20min + live + rating
+ // 08 01 12 08 08 03 10 01 18 02 40 01
+
+ // live 12 02 40 01
+
+ $hex = null;
+ if(
+ $date == "any" &&
+ $type == "video" &&
+ $duration == "any" &&
+ $feature == "any" &&
+ $sort == "relevance"
+ ){
+
+ return $hex;
+ }
+
+ $opcode = 0;
+
+ if($date != "any"){ $opcode += 2; }
+ if($type != "any"){ $opcode += 2; }
+ if($duration != "any"){ $opcode += 2; }
+
+ switch($feature){
+
+ case "live":
+ case "4k":
+ case "hd":
+ case "subtitles":
+ case "creativecommons":
+ case "360":
+ case "3d":
+ $opcode += 2;
+ break;
+
+ case "hdr":
+ case "vr180":
+ $opcode += 3;
+ break;
+ }
+
+ switch($sort){
+
+ case "relevance": $hex .= "0800"; break;
+ case "upload_date": $hex .= "0802"; break;
+ case "view_count": $hex .= "0803"; break;
+ case "rating": $hex .= "0801"; break;
+ }
+
+ $hex .= "12" . "0".$opcode;
+
+ switch($date){
+
+ case "hour": $hex .= "0801"; break;
+ case "today": $hex .= "0802"; break;
+ case "week": $hex .= "0803"; break;
+ case "month": $hex .= "0804"; break;
+ case "year": $hex .= "0805"; break;
+ }
+
+ switch($type){
+
+ case "video": $hex .= "1001"; break;
+ case "channel": $hex .= "1002"; break;
+ case "playlist": $hex .= "1003"; break;
+ case "movie": $hex .= "1004"; break;
+ }
+
+ switch($duration){
+
+ case "short": $hex .= "1801"; break;
+ case "medium": $hex .= "1803"; break;
+ case "long": $hex .= "1802"; break;
+ }
+
+ switch($feature){
+
+ case "live": $hex .= "4001"; break;
+ case "4k": $hex .= "7001"; break;
+ case "hd": $hex .= "2001"; break;
+ case "subtitles": $hex .= "2801"; break;
+ case "creativecommons": $hex .= "3001"; break;
+ case "360": $hex .= "7801"; break;
+ case "vr180": $hex .= "d00101"; break;
+ case "3d": $hex .= "3801"; break;
+ case "hdr": $hex .= "c80101"; break;
+ }
+
+ //echo $hex . "\n\n";
+ return urlencode(base64_encode(hex2bin($hex)));
+ }
+
+ // me reading youtube's json
+ // https://imgur.com/X9hVlFX
+
+ const req_web = 0;
+ const req_xhr = 1;
+
+ private function get($proxy, $url, $get = [], $reqtype = self::req_web, $continuation = null){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ switch($reqtype){
+ case self::req_web:
+ $headers =
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Cookie: PREF=tz=America.New_York",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"];
+ break;
+
+ case self::req_xhr:
+ $headers =
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Cookie: PREF=tz=America.New_York",
+ "Referer: https://youtube.com.com/",
+ "Content-Type: application/json",
+ "Content-Length: " . strlen($continuation),
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: same-origin",
+ "Sec-Fetch-Site: same-origin"];
+
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt($curlproc, CURLOPT_POSTFIELDS, $continuation);
+ break;
+ }
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function video($get){
+
+ $this->out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ if($get["npt"]){
+
+ // parse nextPage
+ // https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false
+ /*
+ $handle = fopen("nextpage.json", "r");
+ $json = fread($handle, filesize("nextpage.json"));
+ fclose($handle);*/
+
+ [$npt, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "videos"
+ );
+
+ $npt = json_decode($npt, true);
+
+ try{
+ $json = $this->get(
+ $proxy,
+ "https://www.youtube.com/youtubei/v1/search",
+ [
+ "key" => $npt["key"],
+ "prettyPrint" => "false"
+ ],
+ self::req_xhr,
+ json_encode($npt["post"])
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch results page");
+ }
+
+ $json = json_decode($json);
+
+ foreach(
+ $json
+ ->onResponseReceivedCommands[0]
+ ->appendContinuationItemsAction
+ ->continuationItems[0]
+ ->itemSectionRenderer
+ ->contents
+ as $video
+ ){
+
+ $this->parsevideoobject($video);
+ }
+
+ if(
+ !isset(
+ $json
+ ->onResponseReceivedCommands[0]
+ ->appendContinuationItemsAction
+ ->continuationItems[1]
+ ->continuationItemRenderer
+ ->continuationEndpoint
+ ->continuationCommand
+ ->token
+ )
+ ){
+
+ $npt = null;
+
+ }else{
+ // prepare nextpage for later..
+ $npt["post"]["continuation"] =
+ $json
+ ->onResponseReceivedCommands[0]
+ ->appendContinuationItemsAction
+ ->continuationItems[1]
+ ->continuationItemRenderer
+ ->continuationEndpoint
+ ->continuationCommand
+ ->token;
+ }
+
+ $this->out["npt"] = $npt;
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $date = $get["date"];
+ $type = $get["type"];
+ $duration = $get["duration"];
+ $feature = $get["feature"];
+ $sort = $get["sort"];
+
+ // parse ytInitialData
+
+ $get = [
+ "search_query" => $search
+ ];
+
+ if(
+ (
+ $filter =
+ $this->ytfilter(
+ $date,
+ $type,
+ $duration,
+ $feature,
+ $sort
+ )
+ ) !== null
+ ){
+
+ $get["sp"] = $filter;
+ }
+
+ try{
+ $json = $this->get(
+ $proxy,
+ "https://www.youtube.com/results",
+ $get
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch results page");
+ }
+ /*
+ $handle = fopen("test.html", "r");
+ $json = fread($handle, filesize("test.html"));
+ fclose($handle);
+ */
+ if(
+ !preg_match(
+ '/ytcfg\.set\(({".*})\); *window\.ytcfg/',
+ $json,
+ $ytconfig
+ )
+ ){
+
+ throw new Exception("Could not get ytcfg");
+ }
+
+ $ytconfig = json_decode($ytconfig[1]);
+
+ if(
+ !preg_match(
+ '/ytInitialData *= *({.*});<\/script>/',
+ $json,
+ $json
+ )
+ ){
+
+ throw new Exception("Could not get ytInitialData");
+ }
+
+ $json = json_decode($json[1]);
+
+ // generate POST data for nextpage
+
+ $ytconfig->INNERTUBE_CONTEXT->client->screenWidthPoints = 1239;
+ $ytconfig->INNERTUBE_CONTEXT->client->screenHeightPoints = 999;
+ $ytconfig->INNERTUBE_CONTEXT->client->screenPixelDensity = 1;
+ $ytconfig->INNERTUBE_CONTEXT->client->screenDensityFloat = 1;
+ $ytconfig->INNERTUBE_CONTEXT->client->utcOffsetMinutes = -240;
+ $ytconfig->INNERTUBE_CONTEXT->request->internalExperimentFlags = [];
+ $ytconfig->INNERTUBE_CONTEXT->request->consistencyTokenJars = [];
+
+ $ytconfig->INNERTUBE_CONTEXT->client->mainAppWebInfo = [
+ "graftUrl" => $ytconfig->INNERTUBE_CONTEXT->client->originalUrl,
+ "webDisplayMode" => "WEB_DISPLAY_MODE_BROWSER",
+ "isWebNativeShareAvailable" => false
+ ];
+
+ $ytconfig->INNERTUBE_CONTEXT->adSignalsInfo = [
+ "params" => [
+ [
+ "key" => "dt",
+ "value" => (string)$ytconfig->TIME_CREATED_MS
+ ],
+ [
+ "key" => "flash",
+ "value" => "0"
+ ],
+ [
+ "key" => "frm",
+ "value" => "0"
+ ],
+ [
+ "key" => "u_tz",
+ "value" => "-240"
+ ],
+ [
+ "key" => "u_his",
+ "value" => "3"
+ ],
+ [
+ "key" => "u_h",
+ "value" => "1080"
+ ],
+ [
+ "key" => "u_w",
+ "value" => "1920"
+ ],
+ [
+ "key" => "u_ah",
+ "value" => "1080"
+ ],
+ [
+ "key" => "u_cd",
+ "value" => "24"
+ ],
+ [
+ "key" => "bc",
+ "value" => "31"
+ ],
+ [
+ "key" => "bih",
+ "value" => "999"
+ ],
+ [
+ "key" => "biw",
+ "value" => "1239"
+ ],
+ [
+ "key" => "brdim",
+ "value" => "0,0,0,0,1920,0,1920,1061,1239,999"
+ ],
+ [
+ "key" => "vis",
+ "value" => "1"
+ ],
+ [
+ "key" => "wgl",
+ "value" => "true"
+ ],
+ [
+ "key" => "ca_type",
+ "value" => "image"
+ ]
+ ]
+ ];
+
+ /*
+ echo json_encode($json);
+ die();*/
+
+ // *inhales*
+ foreach(
+ $json
+ ->contents
+ ->twoColumnSearchResultsRenderer
+ ->primaryContents
+ ->sectionListRenderer
+ ->contents[0]
+ ->itemSectionRenderer
+ ->contents
+ as $video
+ ){
+
+ $this->parsevideoobject($video);
+ }
+
+ // get additional data from secondaryContents
+ if(
+ isset(
+ $json
+ ->contents
+ ->twoColumnSearchResultsRenderer
+ ->secondaryContents
+ ->secondarySearchContainerRenderer
+ ->contents[0]
+ ->universalWatchCardRenderer
+ )
+ ){
+
+ $video =
+ $json
+ ->contents
+ ->twoColumnSearchResultsRenderer
+ ->secondaryContents
+ ->secondarySearchContainerRenderer
+ ->contents[0]
+ ->universalWatchCardRenderer;
+ /*
+ echo json_encode($video);
+ die();*/
+
+ $author =
+ [
+ "name" =>
+ $video
+ ->header
+ ->watchCardRichHeaderRenderer
+ ->title
+ ->simpleText,
+ "url" =>
+ "https://www.youtube.com/channel/" .
+ $video
+ ->header
+ ->watchCardRichHeaderRenderer
+ ->titleNavigationEndpoint
+ ->browseEndpoint
+ ->browseId,
+ "avatar" => null
+ ];
+
+ if(
+ isset(
+ $video
+ ->header
+ ->watchCardRichHeaderRenderer
+ ->avatar
+ ->thumbnails[0]
+ ->url
+ )
+ ){
+
+ $author["avatar"] =
+ $video
+ ->header
+ ->watchCardRichHeaderRenderer
+ ->avatar
+ ->thumbnails[0]
+ ->url;
+ }
+
+ // add video in callToAction if present
+ if(
+ isset(
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->lengthText
+ )
+ ){
+
+ array_push(
+ $this->out["video"],
+ [
+ "title" =>
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->title
+ ->simpleText,
+ "description" => null,
+ "author" => $author,
+ "date" =>
+ $this->textualdate2unix(
+ trim(
+ explode(
+ "•",
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->subtitle
+ ->simpleText
+ )[2]
+ )
+ ),
+ "duration" =>
+ $this->hms2int(
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->lengthText
+ ->simpleText
+ ),
+ "views" =>
+ $this->truncatedcount2int(
+ trim(
+ explode(
+ "•",
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->subtitle
+ ->simpleText,
+ 2
+ )[1]
+ )
+ ),
+ "thumb" => [
+ "url" =>
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->heroImage
+ ->singleHeroImageRenderer
+ ->thumbnail
+ ->thumbnails[0]
+ ->url,
+ "ratio" => "16:9"
+ ],
+ "url" =>
+ "https://www.youtube.com/watch?v=" .
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->navigationEndpoint
+ ->watchEndpoint
+ ->videoId
+ ]
+ );
+ }
+
+ // get all playlists, ignore videos
+ $out = null;
+
+ foreach(
+ $video
+ ->sections
+ as $section
+ ){
+
+ if(
+ isset(
+ $section
+ ->watchCardSectionSequenceRenderer
+ ->lists[0]
+ ->horizontalCardListRenderer
+ ->cards
+ )
+ ){
+
+ $out =
+ $section
+ ->watchCardSectionSequenceRenderer
+ ->lists[0]
+ ->horizontalCardListRenderer
+ ->cards;
+ break;
+ }
+ }
+
+ if($out !== null){
+
+ foreach(
+ $out as $video
+ ){
+
+ if(
+ !isset(
+ $video
+ ->searchRefinementCardRenderer
+ )
+ ){
+
+ continue;
+ }
+
+ $video =
+ $video
+ ->searchRefinementCardRenderer;
+
+ array_push(
+ $this->out["playlist"],
+ [
+ "title" =>
+ $video
+ ->query
+ ->runs[0]
+ ->text,
+ "description" => null,
+ "author" => $author,
+ "date" => null,
+ "duration" => null,
+ "views" => null,
+ "thumb" => [
+ "url" =>
+ $video
+ ->thumbnail
+ ->thumbnails[0]
+ ->url,
+ "ratio" => "1:1"
+ ],
+ "url" =>
+ "https://www.youtube.com" .
+ $video
+ ->searchEndpoint
+ ->commandMetadata
+ ->webCommandMetadata
+ ->url
+ ]
+ );
+ }
+ }
+ }
+
+ foreach(
+ $json
+ ->contents
+ ->twoColumnSearchResultsRenderer
+ ->primaryContents
+ ->sectionListRenderer
+ ->contents
+ as $cont
+ ){
+
+ if(isset($cont->continuationItemRenderer)){
+
+ $this->out["npt"] = [
+ "key" =>
+ $ytconfig
+ ->INNERTUBE_API_KEY,
+ "post" => [
+ "context" =>
+ $ytconfig
+ ->INNERTUBE_CONTEXT,
+ "continuation" =>
+ $cont
+ ->continuationItemRenderer
+ ->continuationEndpoint
+ ->continuationCommand
+ ->token
+ ]
+ ];
+ break;
+ }
+ }
+ }
+
+ if($this->out["npt"] !== null){
+
+ $this->out["npt"] =
+ $this->backend->store(
+ json_encode(
+ $this->out["npt"]
+ ),
+ "videos",
+ $proxy
+ );
+ }
+
+ return $this->out;
+ }
+
+ private function parsevideoobject($video){
+
+ if(isset($video->videoRenderer)){
+
+ $video = $video->videoRenderer;
+
+ $description = null;
+
+ if(isset($video->detailedMetadataSnippets)){
+ foreach(
+ $video
+ ->detailedMetadataSnippets[0]
+ ->snippetText
+ ->runs
+ as $description_part
+ ){
+
+ $description .= $description_part->text;
+ }
+ }
+
+ if(
+ isset(
+ $video
+ ->badges[0]
+ ->metadataBadgeRenderer
+ ->icon
+ ->iconType
+ ) &&
+ $video
+ ->badges[0]
+ ->metadataBadgeRenderer
+ ->icon
+ ->iconType
+ == "LIVE"
+ ){
+
+ $type = "livestream";
+ $date = null;
+ $duration = "_LIVE";
+
+ if(isset($video->viewCountText->runs[0]->text)){
+
+ $views =
+ $this->views2int(
+ $video
+ ->viewCountText
+ ->runs[0]
+ ->text
+ );
+ }else{
+
+ $views = null;
+ }
+ }else{
+
+ $type = "video";
+
+ if(isset($video->publishedTimeText->simpleText)){
+
+ $date = $this->textualdate2unix(
+ $video
+ ->publishedTimeText
+ ->simpleText
+ );
+ }else{
+
+ $date = null;
+ }
+
+ if(isset($video->lengthText->simpleText)){
+
+ $duration =
+ $this->hms2int(
+ $video
+ ->lengthText
+ ->simpleText
+ );
+ }else{
+
+ $duration = null;
+ }
+
+ if(isset($video->viewCountText->simpleText)){
+
+ $views =
+ $this->views2int(
+ $video
+ ->viewCountText
+ ->simpleText
+ );
+ }else{
+
+ $views = null;
+ }
+ }
+
+ if(
+ $video
+ ->navigationEndpoint
+ ->commandMetadata
+ ->webCommandMetadata
+ ->webPageType
+ == "WEB_PAGE_TYPE_SHORTS"
+ ){
+
+ // haha you thought you could get me, youtube
+ // jokes on you i dont go outside
+ $type = "reel";
+ }
+
+ array_push(
+ $this->out[$type],
+ [
+ "title" =>
+ $video
+ ->title
+ ->runs[0]
+ ->text,
+ "description" =>
+ $this->titledots($description),
+ "author" => [
+ "name" =>
+ $video
+ ->longBylineText
+ ->runs[0]
+ ->text,
+ "url" =>
+ "https://www.youtube.com/channel/" .
+ $video
+ ->longBylineText
+ ->runs[0]
+ ->navigationEndpoint
+ ->browseEndpoint
+ ->browseId,
+ "avatar" =>
+ $this->checkhttpspresence(
+ $video
+ ->channelThumbnailSupportedRenderers
+ ->channelThumbnailWithLinkRenderer
+ ->thumbnail
+ ->thumbnails[0]
+ ->url
+ )
+ ],
+ "date" => $date,
+ "duration" => $duration,
+ "views" => $views,
+ "thumb" => [
+ "url" =>
+ $video
+ ->thumbnail
+ ->thumbnails[0]
+ ->url,
+ "ratio" => "16:9"
+ ],
+ "url" =>
+ "https://www.youtube.com/watch?v=" .
+ $video
+ ->videoId
+ ]
+ );
+ }elseif(isset($video->watchCardCompactVideoRenderer)){
+
+ $video =
+ $video
+ ->watchCardCompactVideoRenderer;
+
+ array_push(
+ $this->out["video"],
+ [
+ "title" =>
+ $video
+ ->title
+ ->simpleText,
+ "description" => null,
+ "author" => [
+ "name" =>
+ $video
+ ->byline
+ ->runs[0]
+ ->text,
+ "url" =>
+ "https://www.youtube.com/channel/" .
+ $video
+ ->byline
+ ->runs[0]
+ ->navigationEndpoint
+ ->browseEndpoint
+ ->browseId,
+ "avatar" => null
+ ],
+ "date" =>
+ $this->textualdate2unix(
+ trim(
+ explode(
+ "•",
+ $video
+ ->subtitle
+ ->simpleText,
+ 2
+ )[1]
+ )
+ ),
+ "duration" =>
+ $this->hms2int(
+ $video
+ ->lengthText
+ ->simpleText
+ ),
+ "views" =>
+ $this->truncatedcount2int(
+ trim(
+ explode(
+ "•",
+ $video
+ ->subtitle
+ ->simpleText,
+ 2
+ )[0]
+ )
+ ),
+ "thumb" => [
+ "url" =>
+ $video
+ ->thumbnail
+ ->thumbnails[0]
+ ->url,
+ "ratio" => "16:9"
+ ],
+ "url" =>
+ "https://www.youtube.com/watch?v=" .
+ $video
+ ->navigationEndpoint
+ ->watchEndpoint
+ ->videoId
+ ]
+ );
+
+ }elseif(isset($video->reelShelfRenderer)){
+
+ foreach(
+ $video
+ ->reelShelfRenderer
+ ->items
+ as $reel
+ ){
+
+ $reel =
+ $reel
+ ->shortsLockupViewModel;
+
+ array_push(
+ $this->out["reel"],
+ [
+ "title" =>
+ $reel
+ ->overlayMetadata
+ ->primaryText
+ ->content,
+ "description" => null,
+ "author" => [
+ "name" => null,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => null,
+ "duration" => null,
+ "views" => null,
+ "thumb" => [
+ "url" =>
+ $reel
+ ->thumbnail
+ ->sources[0]
+ ->url,
+ "ratio" => "9:16"
+ ],
+ "url" =>
+ "https://www.youtube.com/watch?v=" .
+ $reel
+ ->onTap
+ ->innertubeCommand
+ ->reelWatchEndpoint
+ ->videoId
+ ]
+ );
+ }
+ }
+
+ elseif(isset($video->channelRenderer)){
+
+ $video = $video->channelRenderer;
+
+ $description = null;
+
+ if(isset($video->descriptionSnippet)){
+
+ foreach(
+ $video
+ ->descriptionSnippet
+ ->runs
+ as $description_part
+ ){
+
+ $description .= $description_part->text;
+ }
+ }
+
+ array_push(
+ $this->out["author"],
+ [
+ "title" =>
+ $video
+ ->title
+ ->simpleText,
+ "followers" =>
+ isset(
+ $video
+ ->videoCountText
+ ->simpleText
+ ) ?
+ $this->truncatedcount2int(
+ $video
+ ->videoCountText
+ ->simpleText
+ ) :
+ 0,
+ "description" => $this->titledots($description),
+ "thumb" =>
+ [
+ "url" =>
+ $this->checkhttpspresence(
+ $video
+ ->thumbnail
+ ->thumbnails[
+ count(
+ $video
+ ->thumbnail
+ ->thumbnails
+ ) - 1
+ ]
+ ->url
+ ),
+ "ratio" => "1:1"
+ ],
+ "url" =>
+ "https://www.youtube.com/channel/" .
+ $video
+ ->channelId
+ ]
+ );
+ }
+
+ elseif(isset($video->shelfRenderer)){
+
+ if(
+ !is_object(
+ $video
+ ->shelfRenderer
+ ->content
+ ->verticalListRenderer
+ )
+ ){
+ return;
+ }
+
+ foreach(
+ $video
+ ->shelfRenderer
+ ->content
+ ->verticalListRenderer
+ ->items
+ as $shelfvideo
+ ){
+
+ $this->parsevideoobject($shelfvideo);
+ }
+
+ }elseif(isset($video->radioRenderer)){
+
+ $video = $video->radioRenderer;
+
+ $description =
+ $video
+ ->videoCountText
+ ->runs[0]
+ ->text
+ . ".";
+
+ $tmp = [];
+ foreach(
+ $video->videos
+ as $childvideo
+ ){
+
+ $tmp[] =
+ $childvideo
+ ->childVideoRenderer
+ ->title
+ ->simpleText;
+ }
+
+ if(count($tmp) !== 0){
+
+ $description .=
+ " " . implode(", ", $tmp);
+ }
+
+ array_push(
+ $this->out["playlist"],
+ [
+ "title" =>
+ $video
+ ->title
+ ->simpleText,
+ "description" => $description,
+ "author" => [
+ "name" =>
+ $video
+ ->longBylineText
+ ->simpleText,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => null,
+ "duration" => null,
+ "views" => null,
+ "thumb" => [
+ "url" =>
+ $video
+ ->thumbnail
+ ->thumbnails[
+ count(
+ $video
+ ->thumbnail
+ ->thumbnails
+ ) - 1
+ ]
+ ->url,
+ "ratio" => "16:9"
+ ],
+ "url" =>
+ "https://www.youtube.com/watch?v=" .
+ $video
+ ->videos[0]
+ ->childVideoRenderer
+ ->videoId .
+ "&list=" .
+ $video
+ ->playlistId .
+ "&start_radio=1"
+ ]
+ );
+
+ }elseif(isset($video->playlistRenderer)){
+
+ $video = $video->playlistRenderer;
+
+ $description = $video->videoCount . " videos.";
+
+ $tmp = [];
+ foreach(
+ $video
+ ->videos
+ as $childvideo
+ ){
+
+ $tmp[] =
+ $childvideo
+ ->childVideoRenderer
+ ->title
+ ->simpleText;
+ }
+
+ if(count($tmp) !== 0){
+
+ $description .=
+ " " . implode(", ", $tmp);
+ }
+
+ array_push(
+ $this->out["playlist"],
+ [
+ "title" =>
+ $video
+ ->title
+ ->simpleText,
+ "description" => $description,
+ "author" => [
+ "name" =>
+ $video
+ ->longBylineText
+ ->runs[0]
+ ->text,
+ "url" =>
+ "https://www.youtube.com/channel/" .
+ $video
+ ->longBylineText
+ ->runs[0]
+ ->navigationEndpoint
+ ->browseEndpoint
+ ->browseId,
+ "picture" => null
+ ],
+ "date" => null,
+ "duration" => null,
+ "views" => null,
+ "thumb" =>
+ [
+ "url" =>
+ $video
+ ->thumbnails[0]
+ ->thumbnails[
+ count(
+ $video
+ ->thumbnails[0]
+ ->thumbnails
+ ) - 1
+ ]
+ ->url,
+ "ratio" => "16:9"
+ ],
+ "url" =>
+ "https://www.youtube.com/watch?v=" .
+ $video
+ ->videos[0]
+ ->childVideoRenderer
+ ->videoId .
+ "&list=" .
+ $video
+ ->playlistId .
+ "&start_radio=1"
+ ]
+ );
+
+ }/*else{
+ if(!isset($video->searchPyvRenderer)){
+ echo json_encode($video);
+ die();}
+ }*/
+ }
+
+ private function textualdate2unix($number){
+
+ $number =
+ explode(
+ " ",
+ str_replace(
+ [
+ " ago",
+ "seconds",
+ "minutes",
+ "hours",
+ "days",
+ "weeks",
+ "months",
+ "years"
+ ],
+ [
+ "",
+ "second",
+ "minute",
+ "hour",
+ "day",
+ "week",
+ "month",
+ "year"
+ ],
+ $number
+ ),
+ 2
+ );
+
+ $time = 0;
+ switch($number[1]){
+
+ case "second":
+ $time = (int)$number[0];
+ break;
+
+ case "minute":
+ $time = (int)$number[0] * 60;
+ break;
+
+ case "hour":
+ $time = (int)$number[0] * 3600;
+ break;
+
+ case "day":
+ $time = (int)$number[0] * 86400;
+ break;
+
+ case "week":
+ $time = (int)$number[0] * 604800;
+ break;
+
+ case "month":
+ $time = (int)$number[0] * 2629746;
+ break;
+
+ case "year":
+ $time = (int)$number[0] * 31556952;
+ break;
+ }
+
+ return time() - $time;
+ }
+
+ private function checkhttpspresence($link){
+
+ if(substr($link, 0, 2) == "//"){
+
+ return "https:" . $link;
+ }
+
+ return $link;
+ }
+
+ private function textualtime2int($number){
+
+ $number = explode(" - ", $number);
+
+ if(count($number) >= 2){
+
+ $number = $number[count($number) - 2];
+ }else{
+
+ $number = $number[0];
+ }
+
+ $number =
+ str_replace(
+ [
+ " ",
+ "seconds",
+ "minutes",
+ "hours",
+ ],
+ [
+ "",
+ "second",
+ "minute",
+ "hour"
+ ],
+ $number
+ );
+
+ preg_match_all(
+ '/([0-9]+)(second|minute|hour)/',
+ $number,
+ $number
+ );
+
+ $time = 0;
+
+ for($i=0; $i<count($number[0]); $i++){
+
+ switch($number[2][$i]){
+
+ case "second":
+ $time = $time + (int)$number[1][$i];
+ break;
+
+ case "minute":
+ $time = $time + ((int)$number[1][$i] * 60);
+ break;
+
+ case "hour":
+ $time = $time + ((int)$number[1][$i] * 3600);
+ break;
+ }
+ }
+
+ return $time;
+ }
+
+ private function views2int($views){
+
+ return
+ (int)str_replace(
+ ",", "",
+ explode(" ", $views, 2)[0]
+ );
+ }
+
+ private function hms2int($time){
+
+ $parts = explode(":", $time, 3);
+ $time = 0;
+
+ if(count($parts) === 3){
+
+ // hours
+ $time = $time + ((int)$parts[0] * 3600);
+ array_shift($parts);
+ }
+
+ if(count($parts) === 2){
+
+ // minutes
+ $time = $time + ((int)$parts[0] * 60);
+ array_shift($parts);
+ }
+
+ // seconds
+ $time = $time + (int)$parts[0];
+
+ return $time;
+ }
+
+ private function truncatedcount2int($number){
+
+ // decimal should always be 1 number long
+ $number = explode(" ", $number, 2);
+ $number = $number[0];
+
+ $unit = strtolower($number[strlen($number) - 1]);
+
+ $tmp = explode(".", $number, 2);
+ $number = (int)$number;
+
+ if(count($tmp) === 2){
+
+ $decimal = (int)$tmp[1];
+ }else{
+
+ $decimal = 0;
+ }
+
+ switch($unit){
+
+ case "k":
+ $exponant = 1000;
+ break;
+
+ case "m":
+ $exponant = 1000000;
+ break;
+
+ case "b";
+ $exponant = 1000000000;
+ break;
+
+ default:
+ $exponant = 1;
+ break;
+ }
+
+ return ($number * $exponant) + ($decimal * ($exponant / 10));
+ }
+
+ private function titledots($title){
+
+ $substr = substr($title, -3);
+
+ if(
+ $substr == "..." ||
+ $substr == "…"
+ ){
+
+ return trim(substr($title, 0, -3), " \n\r\t\v\x00\0\x0B\xc2\xa0");
+ }
+
+ return trim($title, " \n\r\t\v\x00\0\x0B\xc2\xa0");
+ }
+}