From cdf958d29333d448f4521f4d2faa2592b58e9b27 Mon Sep 17 00:00:00 2001 From: lolcat Date: Sun, 10 Aug 2025 21:55:15 -0400 Subject: fix wikipedia crash --- data/config.php | 178 ++++++++++++++++++++++++++++++++++++++++++++++++ data/fonts/captcha.ttf | Bin 0 -> 125972 bytes data/proxies/.gitignore | 3 + data/proxies/onion.txt | 13 ++++ 4 files changed, 194 insertions(+) create mode 100644 data/config.php create mode 100644 data/fonts/captcha.ttf create mode 100644 data/proxies/.gitignore create mode 100644 data/proxies/onion.txt (limited to 'data') diff --git a/data/config.php b/data/config.php new file mode 100644 index 0000000..2fd47aa --- /dev/null +++ b/data/config.php @@ -0,0 +1,178 @@ + tag on home page + const SERVER_SHORT_DESCRIPTION = "4get is a proxy search engine that doesn't suck."; + + // Will be shown in server list ping (null for no description) + const SERVER_LONG_DESCRIPTION = null; + + // Add your own themes in "static/themes". Set to "Dark" for default theme. + // Eg. To use "static/themes/Cream.css", specify "Cream". + const DEFAULT_THEME = "Dark"; + + // Enable the API? + const API_ENABLED = true; + + // + // BOT PROTECTION + // + + // 0 = disabled, 1 = ask for image captcha, @TODO: 2 = invite only (users needs a pass) + // VERY useful against a targetted attack + const BOT_PROTECTION = 0; + + // if BOT_PROTECTION is set to 1, specify the available datasets here + // images should be named from 1.png to X.png, and be 100x100 in size + // Eg. data/captcha/birds/1.png up to 2263.png + const CAPTCHA_DATASET = [ + // example: + //["birds", 2263], + //["fumo_plushies", 1006], + //["minecraft", 848] + ]; + + // If this regex expression matches on the user agent, it blocks the request + // Not useful at all against a targetted attack + const HEADER_REGEX = '/bot|wget|curl|python-requests|scrapy|go-http-client|ruby|yahoo|spider|qwant/i'; + + // Block clients who present any of the following headers in their request (SPECIFY IN !!lowercase!!) + // Eg: ["x-forwarded-for", "x-via", "forwarded-for", "via"]; + // Useful for blocking *some* proxies used for botting + const FILTERED_HEADER_KEYS = [ + //"x-forwarded-for", + //"x-cluster-client-ip", + //"x-client-ip", + //"x-real-ip", + //"client-ip", + //"real-ip", + //"forwarded-for", + //"forwarded-for-ip", + //"forwarded", + //"proxy-connection", + //"remote-addr", + //"via" + ]; + + // Block SSL ciphers used by CLI tools used for botting + // Basically a primitive version of Cloudflare's browser integrity check + // ** If curl can still access the site (with spoofed headers), please make sure you use the new apache2 config ** + // https://git.lolcat.ca/lolcat/4get/docs/apache2.md + const DISALLOWED_SSL = [ + // "TLS_AES_256_GCM_SHA384" // used by WGET and CURL + ]; + + // Maximal number of searches per captcha key/pass issued. Counter gets + // reset on every APCU cache clear (should happen once a day). + // Only useful when BOT_PROTECTION is NOT set to 0 + const MAX_SEARCHES = 100; + + // List of domains that point to your servers. Include your tor/i2p + // addresses here! Must be a valid URL. Won't affect links placed on + // the homepage. + const ALT_ADDRESSES = [ + //"https://4get.alt-tld", + //"http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion" + ]; + + // Known 4get instances. MUST use the https protocol if your instance uses + // it. Is used to generate a distributed list of instances. + // To appear in the list of an instance, contact the host and if everyone added + // eachother your serber should appear everywhere. + const INSTANCES = [ + "https://4get.ca", + "https://4get.zzls.xyz", + "https://4getus.zzls.xyz", + "https://4get.silly.computer", + "https://4get.konakona.moe", + "https://4get.lvkaszus.pl", + "https://4g.ggtyler.dev", + "https://4get.perennialte.ch", + "https://4get.sijh.net", + "https://4get.hbubli.cc", + "https://4get.plunked.party", + "https://4get.etenie.pl", + "https://4get.lunar.icu", + "https://4get.dcs0.hu", + "https://4get.kizuki.lol", + "https://4get.psily.garden", + "https://search.milivojevic.in.rs", + "https://4get.snine.nl", + "https://4get.datura.network", + "https://4get.neco.lol", + "https://4get.lol", + "https://4get.ch", + "https://4get.edmateo.site", + "https://4get.sudovanilla.org", + "https://search.mint.lgbt" + ]; + + // Default user agent to use for scraper requests. Sometimes ignored to get specific webpages + // Changing this might break things. + const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:141.0) Gecko/20100101 Firefox/141.0"; + + // Proxy pool assignments for each scraper + // false = Use server's raw IP + // string = will load a proxy list from data/proxies + // Eg. "onion" will load data/proxies/onion.txt + const PROXY_DDG = false; // duckduckgo + const PROXY_BRAVE = false; + const PROXY_FB = false; // facebook + const PROXY_GOOGLE = false; + const PROXY_GOOGLE_API = false; + const PROXY_GOOGLE_CSE = false; + const PROXY_STARTPAGE = false; + const PROXY_QWANT = false; + const PROXY_BAIDU = false; + const PROXY_COCCOC = false; + const PROXY_GHOSTERY = false; + const PROXY_MARGINALIA = false; + const PROXY_MOJEEK = false; + const PROXY_SC = false; // soundcloud + const PROXY_SPOTIFY = false; + const PROXY_SOLOFIELD = false; + const PROXY_WIBY = false; + const PROXY_CURLIE = false; + const PROXY_YT = false; // youtube + const PROXY_SEPIASEARCH = false; + const PROXY_ODYSEE = false; + const PROXY_VIMEO = false; + const PROXY_YEP = false; + const PROXY_PINTEREST = false; + const PROXY_SANKAKUCOMPLEX = false; + const PROXY_FLICKR = false; + const PROXY_FIVEHPX = false; + const PROXY_VSCO = false; + const PROXY_SEZNAM = false; + const PROXY_NAVER = false; + const PROXY_GREPPR = false; + const PROXY_CROWDVIEW = false; + const PROXY_MWMBL = false; + const PROXY_FTM = false; // findthatmeme + const PROXY_IMGUR = false; + const PROXY_CARA = false; + const PROXY_YANDEX_W = false; // yandex web + const PROXY_YANDEX_I = false; // yandex images + const PROXY_YANDEX_V = false; // yandex videos + + // + // Scraper-specific parameters + // + + // GOOGLE CSE & GOOGLE API + const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0"; + + // MARGINALIA + // Use "null" to default out to HTML scraping OR specify a string to + // use the API (Eg: "public"). API has less filters. + const MARGINALIA_API_KEY = null; +} diff --git a/data/fonts/captcha.ttf b/data/fonts/captcha.ttf new file mode 100644 index 0000000..13f5dc7 Binary files /dev/null and b/data/fonts/captcha.ttf differ diff --git a/data/proxies/.gitignore b/data/proxies/.gitignore new file mode 100644 index 0000000..68f8e48 --- /dev/null +++ b/data/proxies/.gitignore @@ -0,0 +1,3 @@ +* +!.gitignore +!onion.txt diff --git a/data/proxies/onion.txt b/data/proxies/onion.txt new file mode 100644 index 0000000..28ab436 --- /dev/null +++ b/data/proxies/onion.txt @@ -0,0 +1,13 @@ +# Specify proxies by following this format: +# :
::: +# +# Examples: +# https:1.3.3.7:6969:abcd:efg +# socks4:1.2.3.4:8080:: +# raw_ip:::: +# +# Available protocols: +# raw_ip, http, https, socks4, socks5, socks4a, socks5_hostname + +# Local tor proxy +socks5:localhost:9050:: -- cgit v1.2.3