From 743b66f039220ad89b3eca7400f3b76040aa79dd Mon Sep 17 00:00:00 2001
From: multipleof4 <planetrenox@protonmail.com>
Date: Wed, 3 Dec 2025 10:08:45 -0800
Subject: [PATCH] Delete blog/benchmark-analysis-2024.html

---
 blog/benchmark-analysis-2024.html | 165 ------------------------------
 1 file changed, 165 deletions(-)
 delete mode 100644 blog/benchmark-analysis-2024.html
diff --git a/blog/benchmark-analysis-2024.html b/blog/benchmark-analysis-2024.html
deleted file mode 100644
index d2c773e..0000000
--- a/blog/benchmark-analysis-2024.html
+++ /dev/null
@@ -1,165 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <title>LLM Benchmark Analysis 2024 - Lynchmark</title>
-  
-  <meta name="description" content="Comprehensive analysis of 8 LLMs across 11 challenging coding tests, revealing clear performance tiers and surprising failures.">
-  <meta property="og:title" content="LLM Benchmark Analysis 2024">
-  <meta property="og:description" content="Deep analysis of 231 test results: Claude Opus leads, Gemini surprises, and critical failures exposed.">
-  <meta property="og:type" content="article">
-  <meta property="og:url" content="https://lynchmark.com/blog/benchmark-analysis-2024">
-  <meta property="og:site_name" content="Lynchmark">
-  <link rel="canonical" href="https://lynchmark.com/blog/benchmark-analysis-2024.html">
-  
-  <script type="application/ld+json">
-  {
-    "@context": "https://schema.org",
-    "@type": "BlogPosting",
-    "headline": "LLM Benchmark Analysis 2024",
-    "datePublished": "2024-05-23",
-    "author": {"@type": "Organization", "name": "Lynchmark"},
-    "description": "Comprehensive analysis of 8 LLMs across 11 challenging coding tests."
-  }
-  </script>
-
-  <link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500&display=swap" rel="stylesheet">
-  <script src="https://cdn.tailwindcss.com"></script>
-  <style>
-    @font-face{font-family:"Stain";src:url("https://cdn.jsdelivr.net/gh/multipleof4/stain.otf@master/dist/Stain.otf") format("opentype")}
-    body{font-family:"Stain",sans-serif}
-    .mono{font-family:"IBM Plex Mono",monospace}
-  </style>
-</head>
-<body class="bg-gray-50 text-gray-800">
-  <main class="max-w-4xl mx-auto flex flex-col min-h-screen p-6 lg:p-8">
-    <nav class="mb-12 flex items-center gap-4 text-sm">
-      <a href="/" class="text-gray-500 hover:text-blue-600 transition">Lynchmark</a>
-      <span class="text-gray-300">/</span>
-      <span class="font-medium text-gray-900">Benchmark Analysis</span>
-    </nav>
-
-    <article class="bg-white rounded-2xl border border-gray-200 shadow-sm overflow-hidden">
-      <header class="bg-gradient-to-r from-blue-50 to-indigo-50 px-8 py-10 border-b border-gray-200 text-center">
-        <div class="inline-flex items-center rounded-full border border-blue-200 bg-blue-50 text-blue-700 text-xs font-bold px-3 py-1 mb-4 uppercase tracking-wide">Data Analysis</div>
-        <h1 class="text-3xl md:text-4xl font-bold text-gray-900 mb-4">LLM Benchmark Analysis 2024</h1>
-        <p class="text-lg text-gray-600 max-w-xl mx-auto">
-          231 automated tests reveal clear performance tiers, surprising failures, and critical insights for production use.
-        </p>
-      </header>
-
-      <div class="p-8 lg:p-10 space-y-10">
-          <section>
-            <h2 class="text-xl font-bold text-gray-900 mb-4">Executive Summary</h2>
-            <div class="grid md:grid-cols-2 gap-6">
-              <div class="bg-gray-50 rounded-xl p-6 border border-gray-200">
-              <h3 class="font-bold text-gray-900 mb-3">Overall Performance Ranking</h3>
-              <div class="space-y-3">
-                <div class="flex items-center justify-between">
-                  <span class="mono text-sm">1. Claude Opus 4.5 (TEMP 0.7)</span>
-              <div class="flex items-center gap-2">
-                  <span class="inline-flex items-center rounded-full bg-green-100 px-2 py-0.5 text-xs font-semibold text-green-800">10/11 Tests Passed</span>
-                </div>
-                <div class="flex items-center justify-between">
-                  <span class="mono text-sm">2. Gemini 3 Pro (TEMP 0.35)</span>
-              <div class="flex items-center gap-2">
-                  <span class="inline-flex items-center rounded-full bg-blue-100 px-2 py-0.5 text-xs font-semibold text-blue-800">10/11 Tests Passed</span>
-                </div>
-                <div class="flex items-center justify-between">
-                  <span class="mono text-sm">3. Claude Sonnet 4.5 (TEMP 0.7)</span>
-              <div class="flex items-center gap-2">
-                  <span class="inline-flex items-center rounded-full bg-yellow-100 px-2 py-0.5 text-xs font-semibold text-yellow-800">9/11 Tests Passed</span>
-                </div>
-                <div class="flex items-center justify-between">
-                  <span class="mono text-sm">4. GPT-5.1 Codex</span>
-                <span class="inline-flex items-center rounded-full bg-yellow-100 px-2 py-0.5 text-xs font-semibold text-yellow-800">9/11 Tests Passed</span>
-                </div>
-                <div class="flex items-center justify-between">
-                  <span class="mono text-sm">5. DeepSeek V3.2</span>
-                <span class="inline-flex items-center rounded-full bg-yellow-100 px-2 py-0.5 text-xs font-semibold text-yellow-800">8/11 Tests Passed</span>
-                </div>
-              </div>
-            </div>
-          </section>
-
-          <section>
-            <h2 class="text-xl font-bold text-gray-900 mb-4">Critical Failure Analysis</h2>
-            <div class="space-y-4">
-              <div class="flex items-center gap-3">
-                <div class="w-3 h-3 rounded-full bg-green-500"></div>
-              <div class="flex items-center gap-3">
-                <div class="w-3 h-3 rounded-full bg-blue-500"></div>
-              <div class="w-3 h-3 rounded-full bg-yellow-500"></div>
-              <div class="w-3 h-3 rounded-full bg-red-500"></div>
-              <div class="w-3 h-3 rounded-full bg-yellow-500"></div>
-              <div class="w-3 h-3 rounded-full bg-yellow-500"></div>
-            </div>
-            <p class="text-sm text-gray-600 leading-relaxed">
-                  <strong class="text-gray-900">Scrypt Hash Test:</strong> 4 models failed due to incorrect library imports or parameter handling.</p>
-            </div>
-          </section>
-
-          <section class="grid md:grid-cols-2 gap-8">
-            <div>
-              <h3 class="font-bold text-gray-900 mb-2">The CDN Import Challenge</h3>
-            <p class="text-sm text-gray-600 leading-relaxed">
-                    The scrypt test proved particularly challenging, with only 4 of 8 models passing. The failures reveal a critical gap in LLM knowledge: <em>correct library import paths for browser environments</em>.
-                  </p>
-            </div>
-            <div>
-              <h3 class="font-bold text-gray-900 mb-2">Library-Specific Knowledge</p>
-            <p class="text-sm text-gray-600 leading-relaxed">
-                    Models that used <code>cdn.skypack.dev</code> or incorrect version paths consistently failed.
-                  </p>
-            </div>
-          </section>
-
-          <section>
-            <h2 class="text-xl font-bold text-gray-900 mb-4">Performance Insights</h2>
-            <div class="bg-gray-50 rounded-xl p-6 border border-gray-200">
-              <div class="flex items-end gap-1 h-32 mb-2">
-                <div class="w-full bg-green-500 rounded-t" style="height: 91%"></div>
-                <div class="w-full bg-blue-500 rounded-t" style="height: 91%"></div>
-                <div class="w-full bg-blue-500 rounded-t" style="height: 91%"></div>
-                <div class="w-full bg-yellow-500 rounded-t" style="height: 73%"></div>
-                <div class="w-full bg-yellow-500 rounded-t" style="height: 73%"></div>
-                <div class="w-full bg-red-500 rounded-t" style="height: 36%"></div>
-              </div>
-              <div class="flex justify-between text-xs text-gray-500 mono">
-                <span>Claude Opus</span>
-                <span>Gemini 3 Pro</span>
-                <span>Claude Sonnet</span>
-                <span>GPT-5.1 Codex</span>
-              </div>
-            </div>
-          </section>
-
-          <section class="border-t border-gray-200 pt-8">
-            <h2 class="text-xl font-bold text-gray-900 mb-4">Key Findings</h2>
-            <div class="space-y-3">
-              <div class="flex items-start gap-2">
-                <span class="text-green-500">✓</span>
-                <span class="mono text-sm">Temperature matters: Gemini at 0.35 outperformed default settings.</div>
-            <div class="flex items-start gap-2">
-                <span class="text-green-500">✓</span>
-                <span class="text-gray-700">Claude Opus demonstrated superior library knowledge and implementation accuracy.</div>
-              </div>
-              <div class="flex items-start gap-2">
-                <span class="text-red-500">✗</span>
-                <span class="mono text-sm">Grok-4 and Minimax M2 showed significant weaknesses in complex implementations.</div>
-            </div>
-          </section>
-
-          <section class="bg-blue-50 border-l-4 border-blue-500 p-4">
-                <p class="text-blue-900 font-medium">
-                  For production-grade code generation: <span class="mono font-bold">Claude Opus 4.5 at TEMP 0.7</span> remains the most reliable choice across diverse coding challenges.</p>
-              </div>
-            </section>
-          </div>
-        </article>
-        <footer class="mt-12 text-center text-xs text-gray-500 mono">
-          Public Domain
-        </footer>
-      </main>
-    </body>
-    </html>