From cdf958d29333d448f4521f4d2faa2592b58e9b27 Mon Sep 17 00:00:00 2001
From: lolcat <will@lolcat.ca>
Date: Sun, 10 Aug 2025 21:55:15 -0400
Subject: fix wikipedia crash

---
 data/config.php         | 178 ++++++++++++++++++++++++++++++++++++++++++++++++
 data/fonts/captcha.ttf  | Bin 0 -> 125972 bytes
 data/proxies/.gitignore |   3 +
 data/proxies/onion.txt  |  13 ++++
 4 files changed, 194 insertions(+)
 create mode 100644 data/config.php
 create mode 100644 data/fonts/captcha.ttf
 create mode 100644 data/proxies/.gitignore
 create mode 100644 data/proxies/onion.txt

(limited to 'data')
diff --git a/data/config.php b/data/config.php
new file mode 100644
index 0000000..2fd47aa
--- /dev/null
+++ b/data/config.php
@@ -0,0 +1,178 @@
+<?php
+class config{
+	// Welcome to the 4get configuration file
+	// When updating your instance, please make sure this file isn't missing
+	// any parameters.
+	
+	// 4get version. Please keep this updated
+	const VERSION = 8;
+	
+	// Will be shown pretty much everywhere.
+	const SERVER_NAME = "4get";
+	
+	// Will be shown in <meta> tag on home page
+	const SERVER_SHORT_DESCRIPTION = "4get is a proxy search engine that doesn't suck.";
+	
+	// Will be shown in server list ping (null for no description)
+	const SERVER_LONG_DESCRIPTION = null;
+	
+	// Add your own themes in "static/themes". Set to "Dark" for default theme.
+	// Eg. To use "static/themes/Cream.css", specify "Cream".
+	const DEFAULT_THEME = "Dark";
+	
+	// Enable the API?
+	const API_ENABLED = true;
+	
+	//
+	// BOT PROTECTION
+	//
+	
+	// 0 = disabled, 1 = ask for image captcha, @TODO: 2 = invite only (users needs a pass)
+	// VERY useful against a targetted attack
+	const BOT_PROTECTION = 0;
+	
+	// if BOT_PROTECTION is set to 1, specify the available datasets here
+	// images should be named from 1.png to X.png, and be 100x100 in size
+	// Eg. data/captcha/birds/1.png up to 2263.png
+	const CAPTCHA_DATASET = [
+		// example:
+		//["birds", 2263],
+		//["fumo_plushies", 1006],
+		//["minecraft", 848]
+	];
+	
+	// If this regex expression matches on the user agent, it blocks the request
+	// Not useful at all against a targetted attack
+	const HEADER_REGEX = '/bot|wget|curl|python-requests|scrapy|go-http-client|ruby|yahoo|spider|qwant/i';
+	
+	// Block clients who present any of the following headers in their request (SPECIFY IN !!lowercase!!)
+	// Eg: ["x-forwarded-for", "x-via", "forwarded-for", "via"];
+	// Useful for blocking *some* proxies used for botting
+	const FILTERED_HEADER_KEYS = [
+		//"x-forwarded-for",
+		//"x-cluster-client-ip",
+		//"x-client-ip",
+		//"x-real-ip",
+		//"client-ip",
+		//"real-ip",
+		//"forwarded-for",
+		//"forwarded-for-ip",
+		//"forwarded",
+		//"proxy-connection",
+		//"remote-addr",
+		//"via"
+	];
+	
+	// Block SSL ciphers used by CLI tools used for botting
+	// Basically a primitive version of Cloudflare's browser integrity check
+	// ** If curl can still access the site (with spoofed headers), please make sure you use the new apache2 config **
+	// https://git.lolcat.ca/lolcat/4get/docs/apache2.md
+	const DISALLOWED_SSL = [
+		// "TLS_AES_256_GCM_SHA384" // used by WGET and CURL
+	];
+	
+	// Maximal number of searches per captcha key/pass issued. Counter gets
+	// reset on every APCU cache clear (should happen once a day).
+	// Only useful when BOT_PROTECTION is NOT set to 0
+	const MAX_SEARCHES = 100;
+	
+	// List of domains that point to your servers. Include your tor/i2p
+	// addresses here! Must be a valid URL. Won't affect links placed on
+	// the homepage.
+	const ALT_ADDRESSES = [
+		//"https://4get.alt-tld",
+		//"http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion"
+	];
+	
+	// Known 4get instances. MUST use the https protocol if your instance uses
+	// it. Is used to generate a distributed list of instances.
+	// To appear in the list of an instance, contact the host and if everyone added
+	// eachother your serber should appear everywhere.
+	const INSTANCES = [
+		"https://4get.ca",
+		"https://4get.zzls.xyz",
+		"https://4getus.zzls.xyz",
+		"https://4get.silly.computer",
+		"https://4get.konakona.moe",
+		"https://4get.lvkaszus.pl",
+		"https://4g.ggtyler.dev",
+		"https://4get.perennialte.ch",
+		"https://4get.sijh.net",
+		"https://4get.hbubli.cc",
+		"https://4get.plunked.party",
+		"https://4get.etenie.pl",
+		"https://4get.lunar.icu",
+		"https://4get.dcs0.hu",
+		"https://4get.kizuki.lol",
+		"https://4get.psily.garden",
+		"https://search.milivojevic.in.rs",
+		"https://4get.snine.nl",
+		"https://4get.datura.network",
+		"https://4get.neco.lol",
+		"https://4get.lol",
+		"https://4get.ch",
+		"https://4get.edmateo.site",
+		"https://4get.sudovanilla.org",
+		"https://search.mint.lgbt"
+	];
+	
+	// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
+	// Changing this might break things.
+	const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:141.0) Gecko/20100101 Firefox/141.0";
+	
+	// Proxy pool assignments for each scraper
+	// false = Use server's raw IP
+	// string = will load a proxy list from data/proxies
+	// Eg. "onion" will load data/proxies/onion.txt
+	const PROXY_DDG = false; // duckduckgo
+	const PROXY_BRAVE = false;
+	const PROXY_FB = false; // facebook
+	const PROXY_GOOGLE = false;
+	const PROXY_GOOGLE_API = false;
+	const PROXY_GOOGLE_CSE = false;
+	const PROXY_STARTPAGE = false;
+	const PROXY_QWANT = false;
+	const PROXY_BAIDU = false;
+	const PROXY_COCCOC = false;
+	const PROXY_GHOSTERY = false;
+	const PROXY_MARGINALIA = false;
+	const PROXY_MOJEEK = false;
+	const PROXY_SC = false; // soundcloud
+	const PROXY_SPOTIFY = false;
+	const PROXY_SOLOFIELD = false;
+	const PROXY_WIBY = false;
+	const PROXY_CURLIE = false;
+	const PROXY_YT = false; // youtube
+	const PROXY_SEPIASEARCH = false;
+	const PROXY_ODYSEE = false;
+	const PROXY_VIMEO = false;
+	const PROXY_YEP = false;
+	const PROXY_PINTEREST = false;
+	const PROXY_SANKAKUCOMPLEX = false;
+	const PROXY_FLICKR = false;
+	const PROXY_FIVEHPX = false;
+	const PROXY_VSCO = false;
+	const PROXY_SEZNAM = false;
+	const PROXY_NAVER = false;
+	const PROXY_GREPPR = false;
+	const PROXY_CROWDVIEW = false;
+	const PROXY_MWMBL = false;
+	const PROXY_FTM = false; // findthatmeme
+	const PROXY_IMGUR = false;
+	const PROXY_CARA = false;
+	const PROXY_YANDEX_W = false; // yandex web
+	const PROXY_YANDEX_I = false; // yandex images
+	const PROXY_YANDEX_V = false; // yandex videos
+	
+	//
+	// Scraper-specific parameters
+	//
+	
+	// GOOGLE CSE & GOOGLE API
+	const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
+	
+	// MARGINALIA
+	// Use "null" to default out to HTML scraping OR specify a string to
+	// use the API (Eg: "public"). API has less filters.
+	const MARGINALIA_API_KEY = null;
+}
diff --git a/data/fonts/captcha.ttf b/data/fonts/captcha.ttf
new file mode 100644
index 0000000..13f5dc7
Binary files /dev/null and b/data/fonts/captcha.ttf differ
diff --git a/data/proxies/.gitignore b/data/proxies/.gitignore
new file mode 100644
index 0000000..68f8e48
--- /dev/null
+++ b/data/proxies/.gitignore
@@ -0,0 +1,3 @@
+*
+!.gitignore
+!onion.txt
diff --git a/data/proxies/onion.txt b/data/proxies/onion.txt
new file mode 100644
index 0000000..28ab436
--- /dev/null
+++ b/data/proxies/onion.txt
@@ -0,0 +1,13 @@
+# Specify proxies by following this format:
+#  <protocol>:<address>:<port>:<username>:<password>
+#
+# Examples:
+#  https:1.3.3.7:6969:abcd:efg
+#  socks4:1.2.3.4:8080::
+#  raw_ip::::
+#
+# Available protocols:
+#  raw_ip, http, https, socks4, socks5, socks4a, socks5_hostname
+
+# Local tor proxy
+socks5:localhost:9050::
-- 
cgit v1.2.3