aboutsummaryrefslogtreecommitdiffstats
path: root/scraper/pinterest.php
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/pinterest.php')
-rw-r--r--scraper/pinterest.php439
1 files changed, 439 insertions, 0 deletions
diff --git a/scraper/pinterest.php b/scraper/pinterest.php
new file mode 100644
index 0000000..4188bce
--- /dev/null
+++ b/scraper/pinterest.php
@@ -0,0 +1,439 @@
+<?php
+
+class pinterest{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("pinterest");
+ }
+
+ public function getfilters($page){
+
+ return [];
+ }
+
+ private function get($proxy, $url, $get = [], &$cookies, $header_data_post = null){
+
+ $curlproc = curl_init();
+
+ if($header_data_post === null){
+
+ // handling GET
+
+ // extract cookies
+ $cookies_tmp = [];
+ curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
+
+ $length = strlen($header);
+
+ $header = explode(":", $header, 2);
+
+ if(trim(strtolower($header[0])) == "set-cookie"){
+
+ $cookie_tmp = explode("=", trim($header[1]), 2);
+
+ $cookies_tmp[trim($cookie_tmp[0])] =
+ explode(";", $cookie_tmp[1], 2)[0];
+ }
+
+ return $length;
+ });
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: application/json, text/javascript, */*, q=0.01",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Referer: https://ca.pinterest.com/",
+ "X-Requested-With: XMLHttpRequest",
+ "X-APP-VERSION: 78f8764",
+ "X-Pinterest-AppState: active",
+ "X-Pinterest-Source-Url: /",
+ "X-Pinterest-PWS-Handler: www/index.js",
+ "screen-dpr: 1",
+ "is-preload-enabled: 1",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "Connection: keep-alive",
+ "Alt-Used: ca.pinterest.com",
+ "Priority: u=0",
+ "TE: trailers"]
+ );
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+ }else{
+
+ // handling POST (pagination)
+ $get = http_build_query($get);
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: application/json, text/javascript, */*, q=0.01",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Content-Type: application/x-www-form-urlencoded",
+ "Content-Length: " . strlen($get),
+ "Referer: https://ca.pinterest.com/",
+ "X-Requested-With: XMLHttpRequest",
+ "X-APP-VERSION: 78f8764",
+ "X-CSRFToken: " . $cookies["csrf"],
+ "X-Pinterest-AppState: active",
+ "X-Pinterest-Source-Url: /search/pins/?rs=ac&len=2&q=" . urlencode($header_data_post) . "&eq=" . urlencode($header_data_post),
+ "X-Pinterest-PWS-Handler: www/search/[scope].js",
+ "screen-dpr: 1",
+ "is-preload-enabled: 1",
+ "Origin: https://ca.pinterest.com",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "Connection: keep-alive",
+ "Alt-Used: ca.pinterest.com",
+ "Cookie: " . $cookies["cookie"],
+ "TE: trailers"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ // http2 bypass
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ if($header_data_post === null){
+
+ if(!isset($cookies_tmp["csrftoken"])){
+
+ throw new Exception("Failed to grep CSRF token");
+ }
+
+ $cookies = "";
+
+ foreach($cookies_tmp as $cookie_name => $cookie_value){
+
+ $cookies .= $cookie_name . "=" . $cookie_value . "; ";
+ }
+
+ $cookies = [
+ "csrf" => $cookies_tmp["csrftoken"],
+ "cookie" => rtrim($cookies, " ;")
+ ];
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$data, $proxy] =
+ $this->backend->get(
+ $get["npt"], "images"
+ );
+
+ $data = json_decode($data, true);
+
+ $search = $data["q"];
+ $cookies = $data["cookies"];
+
+ try{
+ $json =
+ $this->get(
+ $proxy,
+ "https://ca.pinterest.com/resource/BaseSearchResource/get/",
+ [
+ "source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed",
+ "data" => json_encode(
+ [
+ "options" => [
+ "applied_unified_filters" => null,
+ "appliedProductFilters" => "---",
+ "article" => null,
+ "auto_correction_disabled" => false,
+ "corpus" => null,
+ "customized_rerank_type" => null,
+ "domains" => null,
+ "dynamicPageSizeExpGroup" => null,
+ "filters" => null,
+ "journey_depth" => null,
+ "page_size" => null,
+ "price_max" => null,
+ "price_min" => null,
+ "query_pin_sigs" => null,
+ "query" => $data["q"],
+ "redux_normalize_feed" => true,
+ "request_params" => null,
+ "rs" => "typed",
+ "scope" => "pins",
+ "selected_one_bar_modules" => null,
+ "source_id" => null,
+ "source_module_id" => null,
+ "source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed",
+ "top_pin_id" => null,
+ "top_pin_ids" => null,
+ "bookmarks" => [
+ $data["bookmark"]
+ ]
+ ],
+ "context" => []
+ ],
+ JSON_UNESCAPED_SLASHES
+ )
+ ],
+ $cookies,
+ $search
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ // https://ca.pinterest.com/resource/BaseSearchResource/get/?source_url=%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac&data=%7B%22options%22%3A%7B%22applied_unified_filters%22%3Anull%2C%22appliedProductFilters%22%3A%22---%22%2C%22article%22%3Anull%2C%22auto_correction_disabled%22%3Afalse%2C%22corpus%22%3Anull%2C%22customized_rerank_type%22%3Anull%2C%22domains%22%3Anull%2C%22dynamicPageSizeExpGroup%22%3Anull%2C%22filters%22%3Anull%2C%22journey_depth%22%3Anull%2C%22page_size%22%3Anull%2C%22price_max%22%3Anull%2C%22price_min%22%3Anull%2C%22query_pin_sigs%22%3Anull%2C%22query%22%3A%22higurashi%20when%20they%20cry%22%2C%22redux_normalize_feed%22%3Atrue%2C%22request_params%22%3Anull%2C%22rs%22%3A%22ac%22%2C%22scope%22%3A%22pins%22%2C%22selected_one_bar_modules%22%3Anull%2C%22source_id%22%3Anull%2C%22source_module_id%22%3Anull%2C%22source_url%22%3A%22%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac%22%2C%22top_pin_id%22%3Anull%2C%22top_pin_ids%22%3Anull%7D%2C%22context%22%3A%7B%7D%7D&_=1736116313987
+ // source_url=%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac
+ // &data=%7B%22options%22%3A%7B%22applied_unified_filters%22%3Anull%2C%22appliedProductFilters%22%3A%22---%22%2C%22article%22%3Anull%2C%22auto_correction_disabled%22%3Afalse%2C%22corpus%22%3Anull%2C%22customized_rerank_type%22%3Anull%2C%22domains%22%3Anull%2C%22dynamicPageSizeExpGroup%22%3Anull%2C%22filters%22%3Anull%2C%22journey_depth%22%3Anull%2C%22page_size%22%3Anull%2C%22price_max%22%3Anull%2C%22price_min%22%3Anull%2C%22query_pin_sigs%22%3Anull%2C%22query%22%3A%22higurashi%20when%20they%20cry%22%2C%22redux_normalize_feed%22%3Atrue%2C%22request_params%22%3Anull%2C%22rs%22%3A%22ac%22%2C%22scope%22%3A%22pins%22%2C%22selected_one_bar_modules%22%3Anull%2C%22source_id%22%3Anull%2C%22source_module_id%22%3Anull%2C%22source_url%22%3A%22%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac%22%2C%22top_pin_id%22%3Anull%2C%22top_pin_ids%22%3Anull%7D%2C%22context%22%3A%7B%7D%7D
+ // &_=1736116313987
+
+ $source_url = "/search/pins/?q=" . urlencode($search) . "&rs=" . urlencode($search);
+
+ $filter = [
+ "source_url" => $source_url,
+ "rs" => "typed",
+ "data" =>
+ json_encode(
+ [
+ "options" => [
+ "applied_unified_filters" => null,
+ "appliedProductFilters" => "---",
+ "article" => null,
+ "corpus" => null,
+ "customized_rerank_type" => null,
+ "domains" => null,
+ "dynamicPageSizeExpGroup" => null,
+ "filters" => null,
+ "journey_depth" => null,
+ "page_size" => null,
+ "price_max" => null,
+ "price_min" => null,
+ "query_pin_sigs" => null,
+ "query" => $search,
+ "redux_normalize_feed" => true,
+ "request_params" => null,
+ "rs" => "ac",
+ "scope" => "pins", // pins, boards, videos,
+ "selected_one_bar_modules" => null,
+ "source_id" => null,
+ "source_module_id" => null,
+ "source_url" => $source_url,
+ "top_pin_id" => null,
+ "top_pin_ids" => null
+ ],
+ "context" => []
+ ]
+ ),
+ "_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
+ ];
+
+ $proxy = $this->backend->get_ip();
+ $cookies = [];
+
+ try{
+ $json =
+ $this->get(
+ $proxy,
+ "https://ca.pinterest.com/resource/BaseSearchResource/get/",
+ $filter,
+ $cookies,
+ null
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ if(
+ !isset(
+ $json["resource_response"]
+ ["status"]
+ )
+ ){
+
+ throw new Exception("Unknown API failure");
+ }
+
+ if($json["resource_response"]["status"] != "success"){
+
+ $status = "Got non-OK response: " . $json["resource_response"]["status"];
+
+ if(
+ isset(
+ $json["resource_response"]["message"]
+ )
+ ){
+
+ $status .= " - " . $json["resource_response"]["message"];
+ }
+
+ throw new Exception($status);
+ }
+
+ if(
+ isset(
+ $json["resource_response"]["sensitivity"]
+ ["notices"][0]["description"]["text"]
+ )
+ ){
+
+ throw new Exception(
+ "Pinterest returned a notice: " .
+ $json["resource_response"]["sensitivity"]["notices"][0]["description"]["text"]
+ );
+ }
+
+ // get NPT
+ if(isset($json["resource_response"]["bookmark"])){
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode([
+ "q" => $search,
+ "bookmark" => $json["resource_response"]["bookmark"],
+ "cookies" => $cookies
+ ]),
+ "images",
+ $proxy
+ );
+ }
+
+ foreach(
+ $json
+ ["resource_response"]
+ ["data"]
+ ["results"]
+ as $item
+ ){
+
+ switch($item["type"]){
+
+ case "pin":
+ case "board":
+
+ /*
+ Handle image object
+ */
+ $images = array_values($item["images"]);
+ $image = &$images[count($images) - 1]; // original
+ $thumb = &$images[1]; // 236x
+
+ $title = [];
+
+ if(
+ isset($item["grid_title"]) &&
+ trim($item["grid_title"]) != ""
+ ){
+
+ $title[] = $item["grid_title"];
+ }
+
+ if(
+ isset($item["description"]) &&
+ trim($item["description"]) != ""
+ ){
+
+ $title[] = $item["description"];
+ }
+
+ $title = implode(": ", $title);
+
+ if(
+ $title == "" &&
+ isset($item["board"]["name"]) &&
+ trim($item["board"]["name"]) != ""
+ ){
+
+ $title = $item["board"]["name"];
+ }
+
+ if($title == ""){
+
+ $title = null;
+ }
+
+ $out["image"][] = [
+ "title" => $title,
+ "source" => [
+ [
+ "url" => $image["url"],
+ "width" => (int)$image["width"],
+ "height" => (int)$image["height"]
+ ],
+ [
+ "url" => $thumb["url"],
+ "width" => (int)$thumb["width"],
+ "height" => (int)$thumb["height"]
+ ]
+ ],
+ "url" =>
+ $item["link"] === null ?
+ "https://ca.pinterest.com/pin/" . $item["id"] :
+ $item["link"]
+ ];
+ break;
+ }
+ }
+
+ return $out;
+ }
+}