aboutsummaryrefslogtreecommitdiffstats
path: root/scraper/yandex.php
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/yandex.php')
-rw-r--r--scraper/yandex.php1248
1 files changed, 1248 insertions, 0 deletions
diff --git a/scraper/yandex.php b/scraper/yandex.php
new file mode 100644
index 0000000..f73c3fd
--- /dev/null
+++ b/scraper/yandex.php
@@ -0,0 +1,1248 @@
+<?php
+
+class yandex{
+
+ /*
+ curl functions
+ */
+ public function __construct(){
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ include "lib/backend.php";
+ // backend included in the scraper functions
+ }
+
+ private function get($proxy, $url, $get = [], $nsfw, $get_cookie = 1){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ // extract "i" cookie
+ if($get_cookie === 0){
+
+ $cookies_tmp = [];
+ curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
+
+ $length = strlen($header);
+
+ $header = explode(":", $header, 2);
+
+ if(trim(strtolower($header[0])) == "set-cookie"){
+
+ $cookie_tmp = explode("=", trim($header[1]), 2);
+
+ $cookies_tmp[trim($cookie_tmp[0])] =
+ explode(";", $cookie_tmp[1], 2)[0];
+ }
+
+ return $length;
+ });
+ }
+
+ switch($nsfw){
+ case "yes": $nsfw = "0"; break;
+ case "maybe": $nsfw = "1"; break;
+ case "no": $nsfw = "2"; break;
+ }
+
+ switch($get_cookie){
+
+ case 0:
+ $cookie = "";
+ break;
+
+ case 1:
+ $cookie = "Cookie: yp=" . (time() - 4000033) . ".szm.1:1920x1080:876x1000#" . time() . ".sp.family:" . $nsfw;
+ break;
+
+ default:
+ $cookie = "Cookie: i=" . $get_cookie;
+ }
+
+ $headers =
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Encoding: gzip",
+ "Accept-Language: en-US,en;q=0.5",
+ "DNT: 1",
+ $cookie,
+ "Referer: https://yandex.com/images/search",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: cross-site",
+ "Upgrade-Insecure-Requests: 1"];
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if($get_cookie === 0){
+
+ if(isset($cookies_tmp["i"])){
+
+ return $cookies_tmp["i"];
+ }else{
+
+ throw new Exception("Failed to get Yandex clearance cookie");
+ }
+ }
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function getfilters($pagetype){
+
+ switch($pagetype){
+
+ case "web":
+ return [
+ "lang" => [
+ "display" => "Language",
+ "option" => [
+ "any" => "Any language",
+ "en" => "English",
+ "ru" => "Russian",
+ "be" => "Belorussian",
+ "fr" => "French",
+ "de" => "German",
+ "id" => "Indonesian",
+ "kk" => "Kazakh",
+ "tt" => "Tatar",
+ "tr" => "Turkish",
+ "uk" => "Ukrainian"
+ ]
+ ],
+ "newer" => [
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ]
+ ];
+ break;
+
+ case "images":
+ return
+ [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "time" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "week" => "Last week"
+ ]
+ ],
+ "size" => [
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "small" => "Small",
+ "medium" => "Medium",
+ "large" => "Large",
+ "wallpaper" => "Wallpaper"
+ ]
+ ],
+ "color" => [
+ "display" => "Colors",
+ "option" => [
+ "any" => "All colors",
+ "color" => "Color images only",
+ "gray" => "Black and white",
+ "red" => "Red",
+ "orange" => "Orange",
+ "yellow" => "Yellow",
+ "cyan" => "Cyan",
+ "green" => "Green",
+ "blue" => "Blue",
+ "violet" => "Purple",
+ "white" => "White",
+ "black" => "Black"
+ ]
+ ],
+ "type" => [
+ "display" => "Type",
+ "option" => [
+ "any" => "All types",
+ "photo" => "Photos",
+ "clipart" => "White background",
+ "lineart" => "Drawings and sketches",
+ "face" => "People",
+ "demotivator" => "Demotivators"
+ ]
+ ],
+ "layout" => [
+ "display" => "Layout",
+ "option" => [
+ "any" => "All layouts",
+ "horizontal" => "Horizontal",
+ "vertical" => "Vertical",
+ "square" => "Square"
+ ]
+ ],
+ "format" => [
+ "display" => "Format",
+ "option" => [
+ "any" => "Any format",
+ "jpeg" => "JPEG",
+ "png" => "PNG",
+ "gif" => "GIF"
+ ]
+ ]
+ ];
+ break;
+
+ case "videos":
+ return [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "time" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "9" => "Recently"
+ ]
+ ],
+ "duration" => [
+ "display" => "Duration",
+ "option" => [
+ "any" => "Any duration",
+ "short" => "Short"
+ ]
+ ]
+ ];
+ break;
+ }
+ }
+
+ public function web($get){
+
+ $this->backend = new backend("yandex_w");
+
+ // has captcha
+ // https://yandex.com/search/touch/?text=lol&app_platform=android&appsearch_header=1&ui=webmobileapp.yandex&app_version=23070603&app_id=ru.yandex.searchplugin&search_source=yandexcom_touch_native&clid=2218567
+
+ // https://yandex.com/search/site/?text=minecraft&web=1&frame=1&v=2.0&searchid=3131712
+ // &within=777&from_day=26&from_month=8&from_year=2023&to_day=26&to_month=8&to_year=2023
+
+ // get clearance cookie
+ if(($cookie = apcu_fetch("yandexweb_cookie")) === false){
+
+ $proxy = $this->backend->get_ip();
+
+ $cookie =
+ $this->get(
+ $proxy,
+ "https://yandex.ru/support2/smart-captcha/ru/",
+ [],
+ false,
+ 0
+ );
+
+ apcu_store("yandexweb_cookie", $cookie);
+ }
+
+ if($get["npt"]){
+
+ [$npt, $proxy] = $this->backend->get($get["npt"], "web");
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://yandex.com" . $npt,
+ [],
+ "yes",
+ $cookie
+ );
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = !isset($proxy) ? $this->backend->get_ip() : $proxy;
+ $lang = $get["lang"];
+ $older = $get["older"];
+ $newer = $get["newer"];
+
+ $params = [
+ "text" => $search,
+ "web" => "1",
+ "frame" => "1",
+ "searchid" => "3131712"
+ ];
+
+ if($lang != "any"){
+
+ $params["lang"] = $lang;
+ }
+
+ if(
+ $newer === false &&
+ $older !== false
+ ){
+
+ $newer = 0;
+ }
+
+ if($newer !== false){
+
+ $params["from_day"] = date("j", $newer);
+ $params["from_month"] = date("n", $newer);
+ $params["from_year"] = date("Y", $newer);
+
+ if($older === false){
+
+ $older = time();
+ }
+
+ $params["to_day"] = date("j", $older);
+ $params["to_month"] = date("n", $older);
+ $params["to_year"] = date("Y", $older);
+ }
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://yandex.com/search/site/",
+ $params,
+ "yes",
+ $cookie
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Could not get search page");
+ }
+
+ /*
+ $handle = fopen("scraper/yandex.html", "r");
+ $html = fread($handle, filesize("scraper/yandex.html"));
+ fclose($handle);*/
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $this->fuckhtml->load($html);
+
+ // Scrape page blocked error
+ $title =
+ $this->fuckhtml
+ ->getElementsByTagName("title");
+
+ if(
+ count($title) !== 0 &&
+ $title[0]["innerHTML"] == "403"
+ ){
+
+ throw new Exception("Yandex blocked this proxy or 4get instance.");
+ }
+
+ // get nextpage
+ $npt =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "b-pager__next",
+ "a"
+ );
+
+ if(count($npt) !== 0){
+
+ $out["npt"] =
+ $this->backend->store(
+ $this->fuckhtml
+ ->getTextContent(
+ $npt
+ [0]
+ ["attributes"]
+ ["href"]
+ ),
+ "web",
+ $proxy
+ );
+ }
+
+ // get items
+ $items =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "b-serp-item",
+ "li"
+ );
+
+ foreach($items as $item){
+
+ $this->fuckhtml->load($item);
+
+ $link =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "b-serp-item__title-link",
+ "a"
+ )[0];
+
+ $out["web"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $link
+ )
+ ),
+ "description" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "b-serp-item__text",
+ "div"
+ )[0]
+ )
+ ),
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $link
+ ["attributes"]
+ ["href"]
+ ),
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+
+ public function image($get){
+
+ $this->backend = new backend("yandex_i");
+
+ if($get["npt"]){
+
+ [$request, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "images"
+ );
+
+ $request = json_decode($request, true);
+
+ $nsfw = $request["nsfw"];
+ unset($request["nsfw"]);
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $nsfw = $get["nsfw"];
+ $time = $get["time"];
+ $size = $get["size"];
+ $color = $get["color"];
+ $type = $get["type"];
+ $layout = $get["layout"];
+ $format = $get["format"];
+ /*
+ $handle = fopen("scraper/yandex.json", "r");
+ $json = fread($handle, filesize("scraper/yandex.json"));
+ fclose($handle);*/
+
+ // SIZE
+ // large
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=large&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // medium
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=medium&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // small
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=small&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // ORIENTATION
+ // Horizontal
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=horizontal&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Vertical
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=vertical&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Square
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=square&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // TYPE
+ // Photos
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=photo&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // White background
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=clipart&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Drawings and sketches
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=lineart&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // People
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=face&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Demotivators
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=demotivator&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // COLOR
+ // Color images only
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=color&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Black and white
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=gray&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Red
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=red&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Orange
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=orange&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Yellow
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=yellow&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Cyan
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=cyan&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Green
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=green&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Blue
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=blue&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Purple
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=violet&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // White
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=white&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Black
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=black&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // FORMAT
+ // jpeg
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=jpg&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // png
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=png&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // gif
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=gifan&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // RECENT
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&recent=7D&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // WALLPAPER
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=wallpaper&text=minecraft&wp=wh16x9_1920x1080&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+
+ $request = [
+ "format" => "json",
+ "request" => [
+ "blocks" => [
+ [
+ "block" => "extra-content",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "i-global__params:ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "search2:ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "preview__isWallpaper",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "content_type_search",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "serp-controller",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "cookies_ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "advanced-search-block",
+ "params" => (object)[],
+ "version" => 2
+ ]
+ ],
+ "metadata" => [
+ "bundles" => [
+ "lb" => "AS?(E<X120"
+ ],
+ "assets" => [
+ // las base
+ "las" => "justifier-height=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;"
+
+ // las default
+ //"las" => "justifier-height=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;227.0=1;203.0=1;76fe94.0=1;215f96.0=1;75.0=1"
+ ],
+ "extraContent" => [
+ "names" => [
+ "i-react-ajax-adapter"
+ ]
+ ]
+ ]
+ ]
+ ];
+
+ /*
+ Apply filters
+ */
+ if($time == "week"){
+ $request["recent"] = "7D";
+ }
+
+ if($size != "any"){
+
+ $request["isize"] = $size;
+ }
+
+ if($type != "any"){
+
+ $request["type"] = $type;
+ }
+
+ if($color != "any"){
+
+ $request["icolor"] = $color;
+ }
+
+ if($layout != "any"){
+
+ $request["iorient"] = $layout;
+ }
+
+ if($format != "any"){
+
+ $request["itype"] = $format;
+ }
+
+ $request["text"] = $search;
+ $request["uinfo"] = "sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080";
+
+ $request["request"] = json_encode($request["request"]);
+ }
+
+ try{
+ $json = $this->get(
+ $proxy,
+ "https://yandex.com/images/search",
+ $request,
+ $nsfw,
+ "yandex_i"
+ );
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get JSON");
+ }
+
+ /*
+ $handle = fopen("scraper/yandex.json", "r");
+ $json = fread($handle, filesize("scraper/yandex.json"));
+ fclose($handle);*/
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ if(
+ isset($json["type"]) &&
+ $json["type"] == "captcha"
+ ){
+
+ throw new Exception("Yandex blocked this 4get instance. Please try again in ~7 minutes.");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ // get html
+ $html = "";
+ foreach($json["blocks"] as $block){
+
+ $html .= $block["html"];
+ // get next page
+ if(
+ isset($block["params"]["nextPageUrl"]) &&
+ !empty($block["params"]["nextPageUrl"])
+ ){
+
+ $request["nsfw"] = $nsfw;
+
+ if(isset($request["p"])){
+
+ $request["p"]++;
+ }else{
+
+ $request["p"] = 1;
+ }
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($request),
+ "images",
+ $proxy
+ );
+ }
+ }
+
+ $this->fuckhtml->load($html);
+
+ // get search results
+ $data = null;
+
+ foreach(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "Root",
+ "div"
+ ) as $div
+ ){
+
+ if(isset($div["attributes"]["data-state"])){
+
+ $tmp = json_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $div["attributes"]["data-state"]
+ ),
+ true
+ );
+
+ if(isset($tmp["initialState"]["serpList"])){
+
+ $data = $tmp;
+ break;
+ }
+ }
+ }
+
+ if($data === null){
+
+ throw new Exception("Failed to extract JSON");
+ }
+
+ foreach($data["initialState"]["serpList"]["items"]["entities"] as $image){
+
+ $title = [html_entity_decode($image["snippet"]["title"], ENT_QUOTES | ENT_HTML5)];
+
+ if(isset($image["snippet"]["text"])){
+
+ $title[] = html_entity_decode($image["snippet"]["text"], ENT_QUOTES | ENT_HTML5);
+ }
+
+ $tmp = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->titledots(
+ implode(": ", $title)
+ )
+ ),
+ "source" => [],
+ "url" => htmlspecialchars_decode($image["snippet"]["url"])
+ ];
+
+ // add preview URL
+ $tmp["source"][] = [
+ "url" => htmlspecialchars_decode($image["viewerData"]["preview"][0]["url"]),
+ "width" => (int)$image["viewerData"]["preview"][0]["w"],
+ "height" => (int)$image["viewerData"]["preview"][0]["h"],
+ ];
+
+ foreach($image["viewerData"]["dups"] as $dup){
+
+ $tmp["source"][] = [
+ "url" => htmlspecialchars_decode($dup["url"]),
+ "width" => (int)$dup["w"],
+ "height" => (int)$dup["h"],
+ ];
+ }
+
+ $tmp["source"][] = [
+ "url" =>
+ preg_replace(
+ '/^\/\//',
+ "https://",
+ htmlspecialchars_decode($image["viewerData"]["thumb"]["url"])
+ ),
+ "width" => (int)$image["viewerData"]["thumb"]["w"],
+ "height" => (int)$image["viewerData"]["thumb"]["h"]
+ ];
+
+ $out["image"][] = $tmp;
+ }
+
+ return $out;
+ }
+
+ public function video($get){
+
+ $this->backend = new backend("yandex_v");
+
+ if($get["npt"]){
+
+ [$params, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "video"
+ );
+
+ $params = json_decode($params, true);
+
+ $nsfw = $params["nsfw"];
+ unset($params["nsfw"]);
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $nsfw = $get["nsfw"];
+ $time = $get["time"];
+ $duration = $get["duration"];
+
+ // https://yandex.com/video/search
+ // ?tmpl_version=releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63
+ // &format=json
+ // &request=
+ // {
+ // "blocks":[
+ // {"block":"extra-content","params":{},"version":2},
+ // {"block":"i-global__params:ajax","params":{},"version":2},
+ // {"block":"search2:ajax","params":{},"version":2},
+ // {"block":"vital-incut","params":{},"version":2},
+ // {"block":"content_type_search","params":{},"version":2},
+ // {"block":"serp-controller","params":{},"version":2},
+ // {"block":"cookies_ajax","params":{},"version":2}
+ // ],
+ // "metadata":{
+ // "bundles":{"lb":"^G]!q<X120"},
+ // "assets":{"las":"react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"},
+ // "extraContent":{"names":["i-react-ajax-adapter"]}
+ // }
+ // }
+ // &yu=4861394161661655015
+ // &from=tabbar
+ // &reqid=1693106278500184-6825210746979814879-balancer-l7leveler-kubr-yp-sas-7-BAL-4237
+ // &suggest_reqid=486139416166165501562797413447032
+ // &text=minecraft
+
+ $params = [
+ "tmpl_version" => "releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63",
+ "format" => "json",
+ "request" => json_encode([
+ "blocks" => [
+ (object)[
+ "block" => "extra-content",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "i-global__params:ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "search2:ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "vital-incut",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "content_type_search",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "serp-controller",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "cookies_ajax",
+ "params" => (object)[],
+ "version" => 2
+ ]
+ ],
+ "metadata" => (object)[
+ "bundles" => (object)[
+ "lb" => "^G]!q<X120"
+ ],
+ "assets" => (object)[
+ "las" => "react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"
+ ],
+ "extraContent" => (object)[
+ "names" => [
+ "i-react-ajax-adapter"
+ ]
+ ]
+ ]
+ ]),
+ "text" => $search
+ ];
+
+ if($duration != "any"){
+
+ $params["duration"] = $duration;
+ }
+
+ if($time != "any"){
+
+ $params["within"] = $time;
+ }
+ }
+ /*
+ $handle = fopen("scraper/yandex-video.json", "r");
+ $json = fread($handle, filesize("scraper/yandex-video.json"));
+ fclose($handle);
+ */
+ try{
+ $json =
+ $this->get(
+ $proxy,
+ "https://yandex.com/video/search",
+ $params,
+ $nsfw,
+ "yandex_v"
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Could not parse JSON");
+ }
+
+ if(!isset($json["blocks"])){
+
+ throw new Exception("Yandex blocked this 4get instance. Please try again in 7~ minutes.");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ $html = null;
+ foreach($json["blocks"] as $block){
+
+ if(isset($block["html"])){
+
+ $html .= $block["html"];
+ }
+ }
+
+ $this->fuckhtml->load($html);
+
+ $div =
+ $this->fuckhtml
+ ->getElementsByTagName("div");
+
+ /*
+ Get nextpage
+ */
+ $npt =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "more more_direction_next i-bem",
+ $div
+ );
+
+ if(count($npt) !== 0){
+
+ $params["p"] = "1";
+ $params["nsfw"] = $nsfw;
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($params),
+ "video",
+ $proxy
+ );
+ }
+
+ $items =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "serp-item",
+ $div
+ );
+
+ foreach($items as $item){
+
+ $data =
+ json_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $item["attributes"]["data-video"]
+ ),
+ true
+ );
+
+ $this->fuckhtml->load($item);
+
+ $thumb =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "thumb-image__image",
+ "img"
+ );
+
+ $c = 1;
+ if(count($thumb) === 0){
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }else{
+
+ $thumb = [
+ "url" =>
+ str_replace(
+ "//",
+ "https://",
+ $this->fuckhtml
+ ->getTextContent(
+ $thumb
+ [0]
+ ["attributes"]
+ ["src"]
+ ),
+ $c
+ ),
+ "ratio" => "16:9"
+ ];
+ }
+
+ $smallinfos =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "serp-item__sitelinks-item",
+ "div"
+ );
+
+ $date = null;
+ $views = null;
+ $first = true;
+
+ foreach($smallinfos as $info){
+
+ if($first){
+
+ $first = false;
+ continue;
+ }
+
+ $info =
+ $this->fuckhtml
+ ->getTextContent(
+ $info
+ );
+
+ if($temp_date = strtotime($info)){
+
+ $date = $temp_date;
+ }else{
+
+ $views = $this->parseviews($info);
+ }
+ }
+
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "serp-item__text serp-item__text_visibleText_always",
+ "div"
+ );
+
+ if(count($description) === 0){
+
+ $description = null;
+ }else{
+
+ $description =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ )
+ );
+ }
+
+ $out["video"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->titledots(
+ $data["title"]
+ )
+ ),
+ "description" => $description,
+ "author" => [
+ "name" => null,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => $date,
+ "duration" =>
+ (int)$data
+ ["counters"]
+ ["toHostingLoaded"]
+ ["stredParams"]
+ ["duration"],
+ "views" => $views,
+ "thumb" => $thumb,
+ "url" =>
+ str_replace(
+ "http://",
+ "https://",
+ $this->fuckhtml
+ ->getTextContent(
+ $data["counters"]
+ ["toHostingLoaded"]
+ ["postfix"]
+ ["href"]
+ ),
+ $c
+ )
+ ];
+ }
+
+ return $out;
+ }
+
+ private function parseviews($text){
+
+ $text = explode(" ", $text);
+
+ $num = (float)$text[0];
+ $mod = $text[1];
+
+ switch($mod){
+
+ case "bln.": $num = $num * 1000000000; break;
+ case "mln.": $num = $num * 1000000; break;
+ case "thsd.": $num = $num * 1000; break;
+ }
+
+ return $num;
+ }
+
+ private function titledots($title){
+
+ $substr = substr($title, -3);
+
+ if(
+ $substr == "..." ||
+ $substr == "…"
+ ){
+
+ return trim(substr($title, 0, -3));
+ }
+
+ return trim($title);
+ }
+}