Files
lynchmark/index.html

156 lines
8.1 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Lynchmark LLM Benchmark</title>
<meta property="og:title" content="Lynchmark LLM Benchmark">
<meta property="og:site_name" content="Lynchmark">
<meta name="description" content="Lynchmark tests LLMs by requiring correct CDN imports and library-specific implementations to solve challenging browser-based JavaScript tasks.">
<meta property="og:description" content="Lynchmark tests LLMs by requiring correct CDN imports and library-specific implementations to solve challenging browser-based JavaScript tasks.">
<meta property="og:type" content="website">
<meta property="og:url" content="https://lynchmark.com/">
<link rel="canonical" href="https://lynchmark.com/">
<script type="application/ld+json">
{
"@context":"https://schema.org",
"@type":"WebSite",
"name":"Lynchmark",
"url":"https://lynchmark.com/",
"description":"Lynchmark an automated benchmark for LLM coding abilities in a real browser+CDN environment."
}
</script>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=DM+Serif+Display:ital@0;1&family=IBM+Plex+Mono:wght@400;500&display=swap" rel="stylesheet">
<script src="https://cdn.tailwindcss.com"></script>
<style>
@font-face{font-family:"Stain";src:url("https://cdn.jsdelivr.net/gh/multipleof4/stain.otf@master/dist/Stain.otf") format("opentype")}
body{font-family:Inter,system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,Noto Sans,sans-serif}
.mono{font-family:"IBM Plex Mono",ui-monospace,SFMono-Regular,Menlo,monospace}
</style>
</head>
<body class="bg-gray-50 text-gray-800">
<main class="max-w-2xl mx-auto flex flex-col min-h-screen p-6 lg:p-8">
<header class="text-center mb-10">
<div class="relative inline-block">
<h1 class="text-4xl font-bold text-gray-900 mb-2">Lynchmark</h1>
<span class="mono pointer-events-none absolute -top-2 -right-3 inline-flex items-center rounded-full border border-green-200 bg-green-50 text-green-700 text-xs leading-none font-medium px-2 py-1 shadow-sm">
Last updated <time id="last-updated" class="ml-1"></time>
</span>
</div>
<p class="text-base text-gray-600 max-w-lg mx-auto" style="font-family:Stain,sans-serif">
This benchmark tests the model's knowledge by tasking it to import the right library from the right CDN URL path and having the pre-existing library specific knowledge to correctly implement a solution for each challenging problem for/in the browser environment using JavaScript.
</p>
</header>
<div id="results-container" class="flex flex-col gap-6 flex-grow">
</div>
<div class="mt-12 text-center space-y-2">
<a href="/blog/gemini-optimal-temperature.html" class="block text-sm text-blue-500 hover:text-blue-700 font-medium mono">blog/gemini-optimal-temperature</a>
<a href="/blog/lynchmark-newsletter-experiment.html" class="block text-sm text-blue-500 hover:text-blue-700 font-medium mono">blog/lynchmark-newsletter-experiment</a>
</div>
<footer class="mt-10 flex justify-center">
<a
href="https://github.com/multipleof4/lynchmark"
class="inline-flex items-center gap-2 text-gray-600 hover:text-gray-900"
target="_blank"
rel="noopener noreferrer"
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
aria-hidden="true"
class="w-5 h-5 fill-current"
>
<path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38
0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52
0-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95
0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0
1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15
0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2
0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z"/>
</svg>
<span class="mono text-xs font-medium">@multipleof4/lynchmark</span>
</a>
</footer>
</main>
<script type="module">
const get=id=>document.getElementById(id);
const container=get('results-container');
const updatedEl=get('last-updated');
const now=new Date();
updatedEl.textContent=now.toLocaleDateString('en-US',{month:'short',year:'numeric'});
updatedEl.dateTime=now.toISOString().split('T')[0];
const grades=[[.97,'A+'],[.93,'A'],[.9,'A-'],[.87,'B+'],[.83,'B'],[.8,'B-'],[.77,'C+'],[.73,'C'],[.7,'C-'],[.6,'D'],[0,'F']];
const gradeOf=ratio=>grades.find(([floor])=>ratio>=floor)[1];
const run=async()=>{
const readme=await fetch('./README').then(r=>r.text());
const models=readme.match(/<!-- MODELS_START -->\n([\s\S]+?)\n<!-- MODELS_END -->/)[1].trim().split('\n');
const testsRes=await fetch('https://api.github.com/repos/multipleof4/lynchmark/contents/tests');
const testsData=await testsRes.json();
const tests=testsData.filter(d=>d.type==='dir').map(d=>d.name).sort();
for(const model of models){
const sModel=model.replace(/[\/:]/g,'_');
const card=document.createElement('section');
card.className='rounded-2xl border border-gray-200 bg-white shadow-sm overflow-hidden';
card.innerHTML=`
<div class="bg-gray-50 px-5 py-3 border-b border-gray-200">
<p class="mono text-sm text-gray-700 font-medium">${model}</p>
</div>
<ul class="p-4 space-y-2" id="list-${sModel}"></ul>`;
container.appendChild(card);
const list=get(`list-${sModel}`);
let passed=0;
let ran=0;
for(const test of tests){
const li=document.createElement('li');
li.className='flex items-center gap-3 text-sm';
list.appendChild(li);
const outUrl=`./tests/${test}/outputs/${sModel}.js`;
const srcP=fetch(outUrl).then(r=>{
if(!r.ok) throw new Error('404');
return r.text();
}).catch(()=>null);
li.innerHTML=`<svg class="animate-spin h-4 w-4 text-gray-400" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24"><circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle><path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path></svg><span class="font-medium text-gray-800">${test}</span><span class="mono text-gray-500 ml-auto">...</span>`;
const src=await srcP;
if(src===null){
li.innerHTML=`— <span class="font-medium text-gray-800">${test}</span><span class="mono text-gray-500 ml-auto">N/A</span>`;
continue;
}
ran++;
const resMatch=src.match(/\/\/ Result: (PASS|FAIL)/);
const status=resMatch?(resMatch[1]==='PASS'?'✅':'❌'):'❓';
if(status==='✅')passed++;
const fTime=src.match(/\/\/ Generation time: ([\d\.]+)s/)?.[1];
const timeStr=fTime?`${parseFloat(fTime).toFixed(3)}s`:'N/A';
li.innerHTML=`${status} <span class="font-medium text-gray-800">${test}</span><span class="mono text-gray-500 ml-auto">${timeStr}</span>`;
}
const ratio=ran?passed/ran:0;
const li=document.createElement('li');
li.className='mt-3 pt-3 border-t border-gray-200 flex items-center text-sm justify-between';
const grade=gradeOf(ratio);
li.innerHTML=`
<span class="text-gray-600">Score</span>
<span class="flex items-center gap-3">
<span class="mono text-gray-900 font-semibold">${passed}/${ran}</span>
<span class="inline-flex items-center rounded-full bg-gray-100 px-2 py-0.5 text-xs font-semibold text-gray-800">${grade}</span>
</span>`;
list.appendChild(li);
}
};
run();
</script>
</body>
</html>