mirror of
https://github.com/multipleof4/lynchmark.git
synced 2026-01-13 16:17:54 +00:00
73 lines
3.6 KiB
HTML
73 lines
3.6 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<title>Lynchmark – LLM Benchmark</title>
|
||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=DM+Serif+Display:ital@0;1&family=IBM+Plex+Mono:wght@400;500&display=swap" rel="stylesheet">
|
||
<script src="https://cdn.tailwindcss.com"></script>
|
||
<style>
|
||
body{font-family:Inter,system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,Noto Sans,sans-serif}
|
||
.mono{font-family:"IBM Plex Mono",ui-monospace,SFMono-Regular,Menlo,monospace}
|
||
</style>
|
||
</head>
|
||
<body class="bg-gray-50 text-gray-800">
|
||
<main class="max-w-2xl mx-auto flex flex-col min-h-screen p-6 lg:p-8">
|
||
<header class="text-center mb-10">
|
||
<div class="relative inline-block">
|
||
<h1 class="text-4xl font-bold text-gray-900 mb-2">Lynchmark</h1>
|
||
<span class="mono pointer-events-none absolute -top-2 -right-3 inline-flex items-center rounded-full border border-green-200 bg-green-50 text-green-700 text-[10px] leading-none font-medium px-1.5 py-0.5 shadow-sm">
|
||
Last updated <time id="last-updated" class="ml-1"></time>
|
||
</span>
|
||
</div>
|
||
<p class="text-base text-gray-600 max-w-lg mx-auto">This benchmark focuses on JavaScript code generation & also the model is tasked to import a lib from a CDN and it must have knowledge of its URL path.</p>
|
||
</header>
|
||
|
||
<div id="results-container" class="flex flex-col gap-6 flex-grow">
|
||
</div>
|
||
</main>
|
||
<script type="module">
|
||
const container = document.getElementById('results-container');
|
||
const updatedEl = document.getElementById('last-updated');
|
||
const now = new Date();
|
||
updatedEl.textContent = now.toLocaleDateString('en-US', { month: 'short', year: 'numeric' });
|
||
updatedEl.dateTime = now.toISOString().split('T')[0];
|
||
|
||
const run = async () => {
|
||
const readme = await fetch('./README').then(r => r.text());
|
||
const genTimes = await fetch('./results.json').then(r => r.json());
|
||
const models = readme.match(/<!-- MODELS_START -->\n([\s\S]+?)\n<!-- MODELS_END -->/)[1].trim().split('\n');
|
||
const tests = ['1_dijkstra', '2_convex_hull', '3_lis', '4_determinant'];
|
||
|
||
for (const model of models) {
|
||
const sModel = model.replace(/[\/:]/g, '_');
|
||
const card = document.createElement('section');
|
||
card.className = 'rounded-2xl border border-gray-200 bg-white shadow-sm overflow-hidden';
|
||
card.innerHTML = `
|
||
<div class="bg-gray-50 px-5 py-3 border-b border-gray-200">
|
||
<p class="mono text-sm text-gray-700 font-medium">${model}</p>
|
||
</div>
|
||
<ul class="p-4 space-y-2" id="list-${sModel}"></ul>`;
|
||
container.appendChild(card);
|
||
const list = document.getElementById(`list-${sModel}`);
|
||
|
||
for (const test of tests) {
|
||
let itemHTML;
|
||
try {
|
||
const tMod = await import(`./tests/${test}/test.js`);
|
||
const lMod = await import(`./tests/${test}/outputs/${sModel}.js`);
|
||
await tMod.default.runTest(lMod.default);
|
||
itemHTML = `✅ <span class="font-medium text-gray-800">${test}</span>`;
|
||
} catch (e) {
|
||
console.error(`${model} - ${test}: `, e);
|
||
itemHTML = `❌ <span class="font-medium text-gray-800">${test}</span>`;
|
||
}
|
||
const time = genTimes[model]?.[test]?.toFixed(3) ?? 'N/A';
|
||
list.innerHTML += `<li class="flex items-center gap-3 text-sm">${itemHTML}<span class="mono text-gray-500 ml-auto">${time}s</span></li>`;
|
||
}
|
||
}
|
||
};
|
||
run();
|
||
</script>
|
||
</body>
|
||
</html>
|