Feat: Add async garbage collection to SurrealDB rate limits

This commit is contained in:
2026-02-23 10:20:45 -08:00
parent 406dd41258
commit d763f06415

View File

@@ -3,94 +3,82 @@ export async function onRequest(context) {
const url = new URL(request.url); const url = new URL(request.url);
const path = params.path?.join("/") || ""; const path = params.path?.join("/") || "";
// Serve static assets for root or standard files
if (!path || path === "index.html" || path === "favicon.ico" || path === "robots.txt" || path === "limit.webp" || path === "bad.webp") { if (!path || path === "index.html" || path === "favicon.ico" || path === "robots.txt" || path === "limit.webp" || path === "bad.webp") {
return env.ASSETS.fetch(request); return env.ASSETS.fetch(request);
} }
// Reject literal dots and slashes (bot probes like info.php or wp-admin/setup-config.php)
// We check the raw pathname (excluding leading slash and trailing slashes) to allow encoded dots (%2E) and slashes (%2F)
const rawQueryPart = url.pathname.slice(1).replace(/\/+$/, ""); const rawQueryPart = url.pathname.slice(1).replace(/\/+$/, "");
if (rawQueryPart.includes(".") || rawQueryPart.includes("/")) { if (rawQueryPart.includes(".") || rawQueryPart.includes("/")) {
const badReq = new Request(new URL("/bad.webp", url.origin)); return env.ASSETS.fetch(new Request(new URL("/bad.webp", url.origin)));
return env.ASSETS.fetch(badReq);
} }
const query = normalizeQuery(path); const query = normalizeQuery(path);
if (!query) { if (!query) return jsonResponse(400, { error: "Empty query" });
return jsonResponse(400, { error: "Empty query" }); if (query.length > 200) return jsonResponse(400, { error: "Query too long (max 200 characters)" });
}
// Max query length: 200 chars after normalization
if (query.length > 200) {
return jsonResponse(400, { error: "Query too long (max 200 characters)" });
}
const cacheKey = query; const cacheKey = query;
const r2Key = await sha256(query); const r2Key = await sha256(query);
// 1. Check KV cache
const cached = await env.DIRECT_IMG_CACHE.get(cacheKey, "json"); const cached = await env.DIRECT_IMG_CACHE.get(cacheKey, "json");
if (cached) { if (cached) {
const obj = await env.R2_IMAGES.get(r2Key); const obj = await env.R2_IMAGES.get(r2Key);
if (obj) { if (obj) {
const nowSec = Math.floor(Date.now() / 1000); const nowSec = Math.floor(Date.now() / 1000);
const thirtyDaysSec = 30 * 24 * 60 * 60; const remainingSec = Math.max(0, (cached.t + 2592000) - nowSec);
const remainingSec = Math.max(0, (cached.t + thirtyDaysSec) - nowSec); return new Response(obj.body, { headers: imageHeaders(cached.ct, remainingSec * 1000) });
return new Response(obj.body, {
headers: imageHeaders(cached.ct, remainingSec * 1000),
});
} }
} }
// 2. Cache miss — check rate limit via KV list
const ip = request.headers.get("cf-connecting-ip") || "unknown"; const ip = request.headers.get("cf-connecting-ip") || "unknown";
const today = new Date().toISOString().slice(0, 10); const today = new Date().toISOString().slice(0, 10);
const ratePrefix = `${ip}:${today}:`; const rateId = await sha256(`${ip}:${today}`);
let count = 1;
const rateList = await env.DIRECT_IMG_RATE.list({ prefix: ratePrefix }); if (env.SURREAL_URL && env.SURREAL_USER && env.SURREAL_PASS) {
const count = rateList.keys.length; const auth = btoa(`${env.SURREAL_USER}:${env.SURREAL_PASS}`);
// Atomic upsert + increment, while recording the timestamp
const sql = `UPDATE rate:\`${rateId}\` SET count += 1, updated_at = time::now() RETURN count;`;
if (count >= 15) { try {
context.waitUntil(notify(env, { const dbRes = await fetch(`${env.SURREAL_URL}/sql`, {
title: "Rate Limit Hit", method: "POST",
message: `IP ${ip} reached limit for: ${query}`, headers: { "Accept": "application/json", "Authorization": `Basic ${auth}`, "NS": "direct_img", "DB": "rate_limit" },
tags: "warning,no_entry", body: sql
priority: 2
}));
const limitReq = new Request(new URL("/limit.webp", url.origin));
return env.ASSETS.fetch(limitReq);
}
// Write a unique rate key BEFORE doing the search (claim the slot)
// TTL of 25 hours is enough to cover the remainder of the UTC day
const rateEntryKey = `${ratePrefix}${Date.now()}-${crypto.randomUUID()}`;
await env.DIRECT_IMG_RATE.put(rateEntryKey, "1", {
expirationTtl: 25 * 60 * 60,
}); });
// Notify of a new search (Cache Miss) if (dbRes.ok) {
context.waitUntil(notify(env, { const data = await dbRes.json();
title: "New Search", if (data[0]?.status === "OK" && data[0]?.result?.length > 0) count = data[0].result[0].count;
message: `Query: ${query} (Search #${count + 1} for ${ip})\n${url.origin}/${path}`, }
tags: "mag",
priority: 3 // Background cleanup: ~5% chance to sweep records older than 25h asynchronously
})); if (Math.random() < 0.05) {
context.waitUntil(
fetch(`${env.SURREAL_URL}/sql`, {
method: "POST",
headers: { "Accept": "application/json", "Authorization": `Basic ${auth}`, "NS": "direct_img", "DB": "rate_limit" },
body: `DELETE rate WHERE updated_at < time::now() - 25h;`
}).catch(() => {})
);
}
} catch (err) {
console.error("SurrealDB fetch failed:", err);
}
}
if (count > 15) {
context.waitUntil(notify(env, { title: "Rate Limit Hit", message: `IP ${ip} hit limit for: ${query}`, tags: "warning,no_entry", priority: 2 }));
return env.ASSETS.fetch(new Request(new URL("/limit.webp", url.origin)));
}
context.waitUntil(notify(env, { title: "New Search", message: `Query: ${query} (Search #${count} for ${ip})\n${url.origin}/${path}`, tags: "mag", priority: 3 }));
// 3. Fetch from Brave Image Search (returns array of potential URLs)
const imageUrls = await braveImageSearch(query, env.BRAVE_API_KEY); const imageUrls = await braveImageSearch(query, env.BRAVE_API_KEY);
if (!imageUrls || imageUrls.length === 0) { if (!imageUrls || imageUrls.length === 0) {
context.waitUntil(notify(env, { context.waitUntil(notify(env, { title: "Search Failed", message: `No results for: ${query}`, tags: "question", priority: 3 }));
title: "Search Failed",
message: `No results found for: ${query}`,
tags: "question",
priority: 3
}));
return jsonResponse(404, { error: "No image found for query" }); return jsonResponse(404, { error: "No image found for query" });
} }
// 4. Robust Fetch: Try all results with a 20s global deadline
const GLOBAL_DEADLINE = Date.now() + 20000; const GLOBAL_DEADLINE = Date.now() + 20000;
let imgResult = null; let imgResult = null;
@@ -102,73 +90,32 @@ export async function onRequest(context) {
} }
if (!imgResult) { if (!imgResult) {
context.waitUntil(notify(env, { context.waitUntil(notify(env, { title: "Fetch Error (502)", message: `All sources failed for: ${query}`, tags: "boom,x", priority: 4 }));
title: "Fetch Error (502)",
message: `All sources failed for: ${query}`,
tags: "boom,x",
priority: 4
}));
return jsonResponse(502, { error: "Failed to fetch image from all available sources" }); return jsonResponse(502, { error: "Failed to fetch image from all available sources" });
} }
const { buffer: imgBuffer, contentType: finalContentType } = imgResult; const { buffer: imgBuffer, contentType: finalContentType } = imgResult;
await env.R2_IMAGES.put(r2Key, imgBuffer, { httpMetadata: { contentType: finalContentType } });
// 5. Store in R2 const TTL_SECONDS = 2592000; // 30 days
await env.R2_IMAGES.put(r2Key, imgBuffer, { await env.DIRECT_IMG_CACHE.put(cacheKey, JSON.stringify({ t: Math.floor(Date.now() / 1000), ct: finalContentType }), { expirationTtl: TTL_SECONDS });
httpMetadata: { contentType: finalContentType },
});
// 6. Store in KV cache (TTL 30 days) return new Response(imgBuffer, { headers: imageHeaders(finalContentType, TTL_SECONDS * 1000) });
const nowSec = Math.floor(Date.now() / 1000);
const TTL_SECONDS = 30 * 24 * 60 * 60;
await env.DIRECT_IMG_CACHE.put(cacheKey, JSON.stringify({ t: nowSec, ct: finalContentType }), {
expirationTtl: TTL_SECONDS,
});
return new Response(imgBuffer, {
headers: imageHeaders(finalContentType, TTL_SECONDS * 1000),
});
} }
/**
* Sends a notification to ntfy. Uses context.waitUntil to avoid latency.
*/
async function notify(env, { title, message, tags, priority }) { async function notify(env, { title, message, tags, priority }) {
if (!env.NTFY_URL) return; if (!env.NTFY_URL) return;
const endpoint = env.NTFY_URL.startsWith("http") ? env.NTFY_URL : `https://${env.NTFY_URL}`; const endpoint = env.NTFY_URL.startsWith("http") ? env.NTFY_URL : `https://${env.NTFY_URL}`;
try { try {
await fetch(endpoint, { await fetch(endpoint, { method: "POST", body: message, headers: { "Title": title, "Tags": tags, "Priority": priority.toString() } });
method: "POST", } catch {}
body: message,
headers: {
"Title": title,
"Tags": tags,
"Priority": priority.toString(),
},
});
} catch (e) {
console.error("Notification failed", e);
}
} }
function normalizeQuery(path) { function normalizeQuery(path) {
try { try {
const decoded = decodeURIComponent(path.replace(/\+/g, " ")); return decodeURIComponent(path.replace(/\+/g, " ")).toLowerCase().trim().replace(/[\x00-\x1f]/g, "").replace(/\/+$/, "").replace(/\s+/g, " ");
return decoded
.toLowerCase()
.trim()
.replace(/[\x00-\x1f]/g, "")
.replace(/\/+$/, "")
.replace(/\s+/g, " ");
} catch { } catch {
return path return path.toLowerCase().trim().replace(/[\x00-\x1f]/g, "").replace(/\/+$/, "").replace(/\s+/g, " ");
.toLowerCase()
.trim()
.replace(/[\x00-\x1f]/g, "")
.replace(/\/+$/, "")
.replace(/\s+/g, " ");
} }
} }
@@ -178,72 +125,38 @@ async function sha256(str) {
} }
async function braveImageSearch(query, apiKey) { async function braveImageSearch(query, apiKey) {
const searchUrl = `https://api.search.brave.com/res/v1/images/search?q=${encodeURIComponent(query)}&count=50&safesearch=off`; const res = await fetch(`https://api.search.brave.com/res/v1/images/search?q=${encodeURIComponent(query)}&count=50&safesearch=off`, {
headers: { "Accept": "application/json", "X-Subscription-Token": apiKey },
const res = await fetch(searchUrl, {
headers: {
"Accept": "application/json",
"X-Subscription-Token": apiKey,
},
}); });
if (!res.ok) return null; if (!res.ok) return null;
const data = await res.json(); const data = await res.json();
const results = data.results; return data.results?.map(r => r.properties?.url || r.thumbnail?.src).filter(url => !!url) || null;
if (!results?.length) return null;
return results
.map(r => r.properties?.url || r.thumbnail?.src)
.filter(url => !!url);
} }
async function fetchImage(imageUrl, timeoutMs = 5000) { async function fetchImage(imageUrl, timeoutMs = 5000) {
try { try {
const res = await fetch(imageUrl, { const res = await fetch(imageUrl, {
headers: { headers: { "User-Agent": "Mozilla/5.0", "Accept": "image/avif,image/webp,image/*,*/*;q=0.8" },
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", redirect: "follow", signal: AbortSignal.timeout(timeoutMs), cf: { cacheTtl: 0 }
"Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
},
redirect: "follow",
signal: AbortSignal.timeout(timeoutMs),
cf: { cacheTtl: 0 },
}); });
if (!res.ok || !res.headers.get("content-type")?.startsWith("image/")) return null;
if (!res.ok) return null;
const ct = res.headers.get("content-type") || "";
if (!ct.startsWith("image/")) return null;
const size = res.headers.get("content-length"); const size = res.headers.get("content-length");
if (size && parseInt(size) > 10485760) return null; if (size && parseInt(size) > 10485760) return null;
const buffer = await res.arrayBuffer(); const buffer = await res.arrayBuffer();
if (buffer.byteLength > 10485760) return null; if (buffer.byteLength > 10485760) return null;
return { buffer, contentType: res.headers.get("content-type") };
return { buffer, contentType: ct }; } catch { return null; }
} catch {
return null;
}
} }
function imageHeaders(contentType, maxAgeMs) { function imageHeaders(contentType, maxAgeMs) {
const maxAgeSec = Math.max(0, Math.floor(maxAgeMs / 1000));
return { return {
"Content-Type": contentType, "Content-Type": contentType,
"Cache-Control": `public, max-age=${maxAgeSec}`, "Cache-Control": `public, max-age=${Math.max(0, Math.floor(maxAgeMs / 1000))}`,
"Access-Control-Allow-Origin": "*", "Access-Control-Allow-Origin": "*",
"X-Content-Type-Options": "nosniff", "X-Content-Type-Options": "nosniff",
}; };
} }
function jsonResponse(status, body) { function jsonResponse(status, body) {
return new Response(JSON.stringify(body), { return new Response(JSON.stringify(body), { status, headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" } });
status,
headers: {
"Content-Type": "application/json",
"Access-Control-Allow-Origin": "*",
},
});
} }