aboutsummaryrefslogtreecommitdiffstats
path: root/scraper/vsco.php
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2025-08-11 01:55:15 +0000
committerlolcat <will@lolcat.ca>2025-08-11 01:55:15 +0000
commitcdf958d29333d448f4521f4d2faa2592b58e9b27 (patch)
tree528f2a0ffa789a6f4279d9f54a4a2aaf391f390f /scraper/vsco.php
downloadshittyweb-search-cdf958d29333d448f4521f4d2faa2592b58e9b27.tar.gz
shittyweb-search-cdf958d29333d448f4521f4d2faa2592b58e9b27.tar.bz2
shittyweb-search-cdf958d29333d448f4521f4d2faa2592b58e9b27.zip
fix wikipedia crashgrafted
Diffstat (limited to 'scraper/vsco.php')
-rw-r--r--scraper/vsco.php257
1 files changed, 257 insertions, 0 deletions
diff --git a/scraper/vsco.php b/scraper/vsco.php
new file mode 100644
index 0000000..8a7f057
--- /dev/null
+++ b/scraper/vsco.php
@@ -0,0 +1,257 @@
+<?php
+
+class vsco{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("vsco");
+ }
+
+ public function getfilters($page){
+
+ return [];
+ }
+
+ private function get($proxy, $url, $get = [], $bearer = null){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get_tmp = http_build_query($get);
+ $url .= "?" . $get_tmp;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+
+ if($bearer === null){
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: same-origin",
+ "Sec-Fetch-User: ?1",
+ "Priority: u=0, i",
+ "TE: trailers"]
+ );
+ }else{
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: */*",
+ "Accept-Language: en-US",
+ "Accept-Encoding: gzip",
+ "Referer: https://vsco.co/search/images/" . urlencode($get["query"]),
+ "authorization: Bearer " . $bearer,
+ "content-type: application/json",
+ "x-client-build: 1",
+ "x-client-platform: web",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "Priority: u=0",
+ "TE: trailers"]
+ );
+ }
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ // http2 bypass
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$data, $proxy] =
+ $this->backend->get(
+ $get["npt"], "images"
+ );
+
+ $data = json_decode($data, true);
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ // get bearer token
+ try{
+
+ $html =
+ $this->get(
+ $proxy,
+ "https://vsco.co/feed"
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch feed page");
+ }
+
+ preg_match(
+ '/"tkn":"([A-z0-9]+)"/',
+ $html,
+ $bearer
+ );
+
+ if(!isset($bearer[1])){
+
+ throw new Exception("Failed to grep bearer token");
+ }
+
+ $data = [
+ "pagination" => [
+ "query" => $search,
+ "page" => 0,
+ "size" => 100
+ ],
+ "bearer" => $bearer[1]
+ ];
+ }
+
+ try{
+
+ $json =
+ $this->get(
+ $proxy,
+ "https://vsco.co/api/2.0/search/images",
+ $data["pagination"],
+ $data["bearer"]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ if(!isset($json["results"])){
+
+ throw new Exception("Failed to access results object");
+ }
+
+ foreach($json["results"] as $image){
+
+ $image_domain = parse_url("https://" . $image["responsive_url"], PHP_URL_HOST);
+ $thumbnail = explode($image_domain, $image["responsive_url"], 2)[1];
+
+ if(substr($thumbnail, 0, 3) != "/1/"){
+
+ $thumbnail =
+ preg_replace(
+ '/^\/[^\/]+/',
+ "",
+ $thumbnail
+ );
+ }
+
+ $thumbnail = "https://img.vsco.co/cdn-cgi/image/width=480,height=360" . $thumbnail;
+ $size =
+ $this->image_ratio(
+ (int)$image["dimensions"]["width"],
+ (int)$image["dimensions"]["height"]
+ );
+
+ $out["image"][] = [
+ "title" => $image["description"],
+ "source" => [
+ [
+ "url" => "https://" . $image["responsive_url"],
+ "width" => (int)$image["dimensions"]["width"],
+ "height" => (int)$image["dimensions"]["height"]
+ ],
+ [
+ "url" => $thumbnail,
+ "width" => $size[0],
+ "height" => $size[1]
+ ]
+ ],
+ "url" => "https://" . $image["grid"]["domain"] . "/media/" . $image["imageId"]
+ ];
+ }
+
+ // get NPT
+ $max_page = ceil($json["total"] / 100);
+ $data["pagination"]["page"]++;
+
+ if($max_page > $data["pagination"]["page"]){
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($data),
+ "images",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+ private function image_ratio($width, $height){
+
+ $ratio = [
+ 480 / $width,
+ 360 / $height
+ ];
+
+ if($ratio[0] < $ratio[1]){
+
+ $ratio = $ratio[0];
+ }else{
+
+ $ratio = $ratio[1];
+ }
+
+ return [
+ floor($width * $ratio),
+ floor($height * $ratio)
+ ];
+ }
+}