diff --git a/scripts/run-benchmark.js b/scripts/run-benchmark.js index 925d608..5b5bedd 100644 --- a/scripts/run-benchmark.js +++ b/scripts/run-benchmark.js @@ -2,17 +2,12 @@ import { promises as fs } from 'fs'; import path from 'path'; import { pathToFileURL } from 'url'; import axios from 'axios'; -import { exec } from 'child_process'; -import { promisify } from 'util'; -import { performance } from 'perf_hooks'; -const execPromise = promisify(exec); const CWD = process.cwd(); const README_PATH = path.join(CWD, 'README'); const TESTS_DIR = path.join(CWD, 'tests'); -const TEMP_FILE = path.join(CWD, 'scripts', 'temp_test.js'); -const getLlmCode = async (prompt, model) => { +const getLlmCode = async (prompt, model, functionName) => { try { const res = await axios.post( 'https://openrouter.ai/api/v1/chat/completions', @@ -21,27 +16,13 @@ const getLlmCode = async (prompt, model) => { ); const content = res.data.choices[0].message.content; const code = content.match(/```(?:javascript|js)?\n([\s\S]+?)\n```/)?.[1].trim() ?? content.trim(); - return code.replace(/^export\s+(default\s+)?/, ''); + return `${code.replace(/^export\s+(default\s+)?/, '')}\nexport default ${functionName};`; } catch (error) { console.error(`API Error for ${model}: ${error.message}`); return null; } }; -const runTest = async (code) => { - const start = performance.now(); - let passed = false; - try { - await fs.writeFile(TEMP_FILE, code); - await execPromise(`node ${TEMP_FILE}`); - passed = true; - } catch (error) { // eslint-disable-line no-empty - } finally { - await fs.unlink(TEMP_FILE).catch(() => {}); - } - return { passed, duration: (performance.now() - start) / 1000 }; -}; - const main = async () => { const readme = await fs.readFile(README_PATH, 'utf-8'); const models = readme.match(/\n([\s\S]+?)\n/)[1].trim().split('\n'); @@ -55,38 +36,21 @@ const main = async () => { const testsToRun = allTestDirs.slice(0, Math.ceil(allTestDirs.length * (percentage / 100))); - const results = []; for (const model of models) { - results.push(`**${model}**`); - for (const dir of allTestDirs) { - if (!testsToRun.includes(dir)) { - results.push(`- ${dir}: ⚪ Not Run`); - continue; - } + for (const dir of testsToRun) { const testModule = await import(pathToFileURL(path.join(TESTS_DIR, dir, 'test.js'))); - const { prompt, harness } = testModule.default; - console.log(`Running ${dir} for ${model}...`); - const llmCode = await getLlmCode(prompt, model); - if (llmCode) { - const outDir = path.join(TESTS_DIR, dir, 'outputs'); - await fs.mkdir(outDir, { recursive: true }); - const fname = `${model.replace(/[\/:]/g, '_')}_${new Date().toISOString().replace(/:/g, '-')}.js`; - await fs.writeFile(path.join(outDir, fname), llmCode); - const { passed, duration } = await runTest(`${llmCode}\n${harness}`); - results.push(`- ${dir}: ${passed ? '✅ Pass' : '❌ Fail'} (${duration.toFixed(3)}s)`); - } else { - results.push(`- ${dir}: ❌ API Error`); - } - } - results.push(''); - } + const { prompt, functionName } = testModule.default; + console.log(`Generating ${dir} for ${model}...`); + const llmCode = await getLlmCode(prompt, model, functionName); + if (!llmCode) continue; - const newReadme = readme.replace( - /[\s\S]*/, - `\n${results.join('\n').trim()}\n` - ); - await fs.writeFile(README_PATH, newReadme); - console.log('Benchmark complete. README updated.'); + const outDir = path.join(TESTS_DIR, dir, 'outputs'); + await fs.mkdir(outDir, { recursive: true }); + const fname = `${model.replace(/[\/:]/g, '_')}.js`; + await fs.writeFile(path.join(outDir, fname), llmCode); + } + } + console.log('Code generation complete.'); }; main().catch(console.error);