|
|
|
@@ -1,21 +1,40 @@
|
|
|
|
|
import type { GameState, ModelsState, TrainedModel, ModelCapabilities } from '@ai-tycoon/shared';
|
|
|
|
|
import { uuid, VRAM_REQUIREMENTS_BY_GENERATION } from '@ai-tycoon/shared';
|
|
|
|
|
import type {
|
|
|
|
|
GameState, ModelsState, BaseModel, ModelCapabilities, SafetyProfile,
|
|
|
|
|
TrainingPipeline, TrainingEvent, TrainingEventType,
|
|
|
|
|
ModelVariant, VariantCreationJob, EvalJob, BenchmarkResult,
|
|
|
|
|
BenchmarkDefinition,
|
|
|
|
|
} from '@ai-tycoon/shared';
|
|
|
|
|
import { BENCHMARKS } from '../data/benchmarks';
|
|
|
|
|
import {
|
|
|
|
|
uuid, VRAM_REQUIREMENTS_BY_GENERATION,
|
|
|
|
|
SFT_TIME_FRACTION, SFT_COMPUTE_FRACTION,
|
|
|
|
|
ALIGNMENT_TIME_FRACTION, ALIGNMENT_COMPUTE_FRACTION,
|
|
|
|
|
MOE_CAPABILITY_MULTIPLIER, MOE_SPEED_MULTIPLIER,
|
|
|
|
|
EVENT_BASE_PROBABILITY,
|
|
|
|
|
LOSS_SPIKE_DELAY_MIN, LOSS_SPIKE_DELAY_MAX,
|
|
|
|
|
INSTABILITY_PROGRESS_LOSS_MIN, INSTABILITY_PROGRESS_LOSS_MAX,
|
|
|
|
|
BREAKTHROUGH_CAPABILITY_BONUS_MIN, BREAKTHROUGH_CAPABILITY_BONUS_MAX,
|
|
|
|
|
EMERGENT_CAPABILITY_THRESHOLDS,
|
|
|
|
|
ALIGNMENT_METHODS,
|
|
|
|
|
SFT_SPECIALIZATION_BONUSES,
|
|
|
|
|
QUANTIZATION_CONFIGS,
|
|
|
|
|
DISTILLATION_BASE_RETENTION,
|
|
|
|
|
QUANTIZATION_TICKS,
|
|
|
|
|
} from '@ai-tycoon/shared';
|
|
|
|
|
|
|
|
|
|
export interface ModelTickResult {
|
|
|
|
|
modelsState: ModelsState;
|
|
|
|
|
modelCompleted: TrainedModel | null;
|
|
|
|
|
completedModels: BaseModel[];
|
|
|
|
|
notifications: { title: string; message: string; type: 'success' | 'warning' | 'info' }[];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function processModels(state: GameState): ModelTickResult {
|
|
|
|
|
const active = state.models.activeTraining;
|
|
|
|
|
if (!active) {
|
|
|
|
|
return { modelsState: state.models, modelCompleted: null };
|
|
|
|
|
}
|
|
|
|
|
const completedModels: BaseModel[] = [];
|
|
|
|
|
const notifications: ModelTickResult['notifications'] = [];
|
|
|
|
|
let baseModels = [...state.models.baseModels];
|
|
|
|
|
let families = [...state.models.families];
|
|
|
|
|
|
|
|
|
|
const requiredVram = VRAM_REQUIREMENTS_BY_GENERATION[active.generation] ?? 0;
|
|
|
|
|
if (requiredVram > 0 && state.compute.totalVramGB < requiredVram) {
|
|
|
|
|
return { modelsState: state.models, modelCompleted: null };
|
|
|
|
|
}
|
|
|
|
|
const totalTrainingFlops = state.compute.totalTrainingFlops * state.compute.trainingAllocation;
|
|
|
|
|
|
|
|
|
|
const researcherBoost = state.talent.departments.research.headcount *
|
|
|
|
|
state.talent.departments.research.effectiveness;
|
|
|
|
@@ -23,82 +42,487 @@ export function processModels(state: GameState): ModelTickResult {
|
|
|
|
|
state.talent.departments.engineering.effectiveness;
|
|
|
|
|
const speedMultiplier = 1 + (researcherBoost + engineerBoost) * 0.05;
|
|
|
|
|
|
|
|
|
|
const newProgress = active.progressTicks + speedMultiplier;
|
|
|
|
|
const updatedPipelines: TrainingPipeline[] = [];
|
|
|
|
|
|
|
|
|
|
if (newProgress >= active.totalTicks) {
|
|
|
|
|
const model = createTrainedModel(active.modelName, active.generation, active.allocatedCompute, active.allocatedDataTokens, state);
|
|
|
|
|
for (const pipeline of state.models.activeTrainingPipelines) {
|
|
|
|
|
if (pipeline.status !== 'active') {
|
|
|
|
|
updatedPipelines.push(pipeline);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
modelsState: {
|
|
|
|
|
...state.models,
|
|
|
|
|
trainedModels: [...state.models.trainedModels, model],
|
|
|
|
|
activeTraining: null,
|
|
|
|
|
},
|
|
|
|
|
modelCompleted: model,
|
|
|
|
|
};
|
|
|
|
|
const generation = families.find(f => f.id === pipeline.familyId)?.generation ?? 1;
|
|
|
|
|
const requiredVram = VRAM_REQUIREMENTS_BY_GENERATION[generation] ?? 0;
|
|
|
|
|
if (requiredVram > 0 && state.compute.totalVramGB < requiredVram) {
|
|
|
|
|
updatedPipelines.push({ ...pipeline, status: 'stalled' });
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const effectiveFlops = totalTrainingFlops * pipeline.allocatedComputeFraction;
|
|
|
|
|
let updated = { ...pipeline, events: [...pipeline.events] };
|
|
|
|
|
|
|
|
|
|
if (pipeline.currentStage === 'pretraining') {
|
|
|
|
|
const stage = { ...pipeline.stages.pretraining };
|
|
|
|
|
const newProgress = stage.progressTicks + speedMultiplier;
|
|
|
|
|
|
|
|
|
|
const events = generateTrainingEvents(pipeline, state);
|
|
|
|
|
let tickDelay = 0;
|
|
|
|
|
let progressLost = 0;
|
|
|
|
|
for (const event of events) {
|
|
|
|
|
updated.events.push(event);
|
|
|
|
|
if (event.type === 'loss_spike') {
|
|
|
|
|
tickDelay += event.impact.ticksDelayed ?? 0;
|
|
|
|
|
notifications.push({ title: 'Loss Spike', message: `${pipeline.modelName}: Training loss spiked! Delayed ${event.impact.ticksDelayed} ticks.`, type: 'warning' });
|
|
|
|
|
} else if (event.type === 'instability') {
|
|
|
|
|
progressLost += event.impact.progressLost ?? 0;
|
|
|
|
|
notifications.push({ title: 'Training Instability', message: `${pipeline.modelName}: Rolled back to checkpoint. Lost ${Math.round((event.impact.progressLost ?? 0) * 100)}% progress.`, type: 'warning' });
|
|
|
|
|
} else if (event.type === 'breakthrough') {
|
|
|
|
|
notifications.push({ title: 'Breakthrough!', message: `${pipeline.modelName}: Unexpected capability jump in ${event.impact.capabilityDomain}!`, type: 'success' });
|
|
|
|
|
} else if (event.type === 'hardware_failure') {
|
|
|
|
|
tickDelay += event.impact.ticksDelayed ?? 0;
|
|
|
|
|
notifications.push({ title: 'Hardware Failure', message: `${pipeline.modelName}: GPU failure during training. Recovering from checkpoint.`, type: 'warning' });
|
|
|
|
|
} else if (event.type === 'data_contamination') {
|
|
|
|
|
notifications.push({ title: 'Data Contamination', message: `${pipeline.modelName}: Copyright concerns detected in training data.`, type: 'warning' });
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const effectiveProgress = Math.max(0, newProgress - tickDelay - (stage.totalTicks * progressLost));
|
|
|
|
|
stage.progressTicks = effectiveProgress;
|
|
|
|
|
stage.computeAllocated = effectiveFlops;
|
|
|
|
|
stage.lossValue = Math.max(0.01, 10 * Math.exp(-stage.progressTicks / stage.totalTicks * 3));
|
|
|
|
|
|
|
|
|
|
if (stage.progressTicks >= stage.totalTicks) {
|
|
|
|
|
stage.isComplete = true;
|
|
|
|
|
stage.progressTicks = stage.totalTicks;
|
|
|
|
|
|
|
|
|
|
if (updated.stages.sft) {
|
|
|
|
|
updated.currentStage = 'sft';
|
|
|
|
|
notifications.push({ title: 'Pre-training Complete', message: `${pipeline.modelName}: Moving to supervised fine-tuning.`, type: 'info' });
|
|
|
|
|
} else if (updated.stages.alignment) {
|
|
|
|
|
updated.currentStage = 'alignment';
|
|
|
|
|
notifications.push({ title: 'Pre-training Complete', message: `${pipeline.modelName}: Moving to alignment.`, type: 'info' });
|
|
|
|
|
} else {
|
|
|
|
|
const model = createBaseModel(updated, state);
|
|
|
|
|
baseModels = [...baseModels, model];
|
|
|
|
|
families = families.map(f =>
|
|
|
|
|
f.id === pipeline.familyId ? { ...f, baseModelId: model.id } : f,
|
|
|
|
|
);
|
|
|
|
|
completedModels.push(model);
|
|
|
|
|
updated.status = 'completed';
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
updated = { ...updated, stages: { ...updated.stages, pretraining: stage } };
|
|
|
|
|
} else if (pipeline.currentStage === 'sft' && pipeline.stages.sft) {
|
|
|
|
|
const stage = { ...pipeline.stages.sft };
|
|
|
|
|
stage.progressTicks += speedMultiplier;
|
|
|
|
|
|
|
|
|
|
if (stage.progressTicks >= stage.totalTicks) {
|
|
|
|
|
stage.isComplete = true;
|
|
|
|
|
stage.progressTicks = stage.totalTicks;
|
|
|
|
|
|
|
|
|
|
if (updated.stages.alignment) {
|
|
|
|
|
updated.currentStage = 'alignment';
|
|
|
|
|
notifications.push({ title: 'SFT Complete', message: `${pipeline.modelName}: Moving to alignment.`, type: 'info' });
|
|
|
|
|
} else {
|
|
|
|
|
const model = createBaseModel(updated, state);
|
|
|
|
|
baseModels = [...baseModels, model];
|
|
|
|
|
families = families.map(f =>
|
|
|
|
|
f.id === pipeline.familyId ? { ...f, baseModelId: model.id } : f,
|
|
|
|
|
);
|
|
|
|
|
completedModels.push(model);
|
|
|
|
|
updated.status = 'completed';
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
updated = { ...updated, stages: { ...updated.stages, sft: stage } };
|
|
|
|
|
} else if (pipeline.currentStage === 'alignment' && pipeline.stages.alignment) {
|
|
|
|
|
const stage = { ...pipeline.stages.alignment };
|
|
|
|
|
stage.progressTicks += speedMultiplier;
|
|
|
|
|
|
|
|
|
|
if (stage.progressTicks >= stage.totalTicks) {
|
|
|
|
|
stage.isComplete = true;
|
|
|
|
|
stage.progressTicks = stage.totalTicks;
|
|
|
|
|
|
|
|
|
|
const model = createBaseModel(updated, state);
|
|
|
|
|
baseModels = [...baseModels, model];
|
|
|
|
|
families = families.map(f =>
|
|
|
|
|
f.id === pipeline.familyId ? { ...f, baseModelId: model.id } : f,
|
|
|
|
|
);
|
|
|
|
|
completedModels.push(model);
|
|
|
|
|
updated.status = 'completed';
|
|
|
|
|
}
|
|
|
|
|
updated = { ...updated, stages: { ...updated.stages, alignment: stage } };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
updatedPipelines.push(updated);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const updatedVariantJobs = processVariantJobs(state, speedMultiplier);
|
|
|
|
|
for (const variant of updatedVariantJobs.newVariants) {
|
|
|
|
|
variant.createdAtTick = state.meta.tickCount;
|
|
|
|
|
families = families.map(f =>
|
|
|
|
|
f.id === variant.familyId ? { ...f, variants: [...f.variants, variant] } : f,
|
|
|
|
|
);
|
|
|
|
|
notifications.push({
|
|
|
|
|
title: 'Variant Created',
|
|
|
|
|
message: `${variant.name} (${variant.variantType}) is ready!`,
|
|
|
|
|
type: 'success',
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const updatedEvalJobs = processEvalJobs(state);
|
|
|
|
|
|
|
|
|
|
const allDeployed = [
|
|
|
|
|
...baseModels.filter(m => m.isDeployed),
|
|
|
|
|
...families.flatMap(f => f.variants.filter(v => v.isDeployed)),
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
const bestDeployedModelScore = allDeployed.reduce((best, m) =>
|
|
|
|
|
Math.max(best, 'rawCapability' in m ? m.rawCapability : computeVariantScore(m)), 0);
|
|
|
|
|
|
|
|
|
|
const bestDeployedSafetyScore = allDeployed.reduce((best, m) =>
|
|
|
|
|
Math.max(best, m.safetyProfile.overallSafety), 0);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
modelsState: {
|
|
|
|
|
...state.models,
|
|
|
|
|
activeTraining: { ...active, progressTicks: newProgress },
|
|
|
|
|
baseModels,
|
|
|
|
|
families,
|
|
|
|
|
activeTrainingPipelines: updatedPipelines,
|
|
|
|
|
variantJobs: updatedVariantJobs.jobs,
|
|
|
|
|
evalJobs: updatedEvalJobs.jobs,
|
|
|
|
|
benchmarkResults: [...state.models.benchmarkResults, ...updatedEvalJobs.newResults],
|
|
|
|
|
bestDeployedModelScore,
|
|
|
|
|
bestDeployedSafetyScore,
|
|
|
|
|
},
|
|
|
|
|
modelCompleted: null,
|
|
|
|
|
completedModels,
|
|
|
|
|
notifications,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function createTrainedModel(
|
|
|
|
|
name: string,
|
|
|
|
|
generation: number,
|
|
|
|
|
compute: number,
|
|
|
|
|
dataTokens: number,
|
|
|
|
|
function generateTrainingEvents(pipeline: TrainingPipeline, state: GameState): TrainingEvent[] {
|
|
|
|
|
const events: TrainingEvent[] = [];
|
|
|
|
|
const params = pipeline.architecture.totalParameters;
|
|
|
|
|
const baseProbability = EVENT_BASE_PROBABILITY * Math.log10(Math.max(1, params));
|
|
|
|
|
|
|
|
|
|
const hasInterpretability = state.research.completedResearch.includes('interpretability');
|
|
|
|
|
const hasDataPipeline = state.research.completedResearch.includes('data-pipeline');
|
|
|
|
|
const hasRedundancy = state.research.completedResearch.includes('redundancy-protocols');
|
|
|
|
|
|
|
|
|
|
if (Math.random() < baseProbability * 2.0) {
|
|
|
|
|
const delay = LOSS_SPIKE_DELAY_MIN + Math.floor(Math.random() * (LOSS_SPIKE_DELAY_MAX - LOSS_SPIKE_DELAY_MIN));
|
|
|
|
|
events.push({
|
|
|
|
|
id: uuid(), type: 'loss_spike', tick: state.meta.tickCount,
|
|
|
|
|
severity: delay > 15 ? 'major' : delay > 10 ? 'moderate' : 'minor',
|
|
|
|
|
description: `Training loss spiked to ${(Math.random() * 5 + 2).toFixed(2)}`,
|
|
|
|
|
resolved: true,
|
|
|
|
|
impact: { ticksDelayed: delay },
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (params > 10 && Math.random() < baseProbability * (hasInterpretability ? 0.25 : 0.5)) {
|
|
|
|
|
const loss = INSTABILITY_PROGRESS_LOSS_MIN + Math.random() * (INSTABILITY_PROGRESS_LOSS_MAX - INSTABILITY_PROGRESS_LOSS_MIN);
|
|
|
|
|
events.push({
|
|
|
|
|
id: uuid(), type: 'instability', tick: state.meta.tickCount,
|
|
|
|
|
severity: loss > 0.12 ? 'major' : 'moderate',
|
|
|
|
|
description: 'Training run became unstable. Rolling back to last checkpoint.',
|
|
|
|
|
resolved: true,
|
|
|
|
|
impact: { progressLost: loss },
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const chinchillaRatio = pipeline.stages.pretraining.chinchillaRatio;
|
|
|
|
|
if (params > 30 && chinchillaRatio > 15 && Math.random() < baseProbability * 0.3) {
|
|
|
|
|
const capDomains: (keyof ModelCapabilities)[] = ['reasoning', 'coding', 'creative', 'math', 'knowledge', 'agents'];
|
|
|
|
|
const domain = capDomains[Math.floor(Math.random() * capDomains.length)];
|
|
|
|
|
const bonus = BREAKTHROUGH_CAPABILITY_BONUS_MIN + Math.floor(Math.random() * (BREAKTHROUGH_CAPABILITY_BONUS_MAX - BREAKTHROUGH_CAPABILITY_BONUS_MIN));
|
|
|
|
|
events.push({
|
|
|
|
|
id: uuid(), type: 'breakthrough', tick: state.meta.tickCount,
|
|
|
|
|
severity: 'major',
|
|
|
|
|
description: `Unexpected capability jump in ${domain}!`,
|
|
|
|
|
resolved: true,
|
|
|
|
|
impact: { capabilityBonus: bonus, capabilityDomain: domain },
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const [thresholdStr, capName] of Object.entries(EMERGENT_CAPABILITY_THRESHOLDS)) {
|
|
|
|
|
const threshold = Number(thresholdStr);
|
|
|
|
|
const prevProgress = pipeline.stages.pretraining.progressTicks;
|
|
|
|
|
const progressRatio = prevProgress / pipeline.stages.pretraining.totalTicks;
|
|
|
|
|
if (params >= threshold && progressRatio > 0.5 && progressRatio < 0.55) {
|
|
|
|
|
events.push({
|
|
|
|
|
id: uuid(), type: 'emergent_capability', tick: state.meta.tickCount,
|
|
|
|
|
severity: 'major',
|
|
|
|
|
description: `Model developed ${capName} capability!`,
|
|
|
|
|
resolved: true,
|
|
|
|
|
impact: { capabilityBonus: 10, capabilityDomain: 'reasoning' },
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const avgLegalRisk = state.data.ownedDatasets.length > 0
|
|
|
|
|
? state.data.ownedDatasets.reduce((sum, d) => sum + d.legalRisk, 0) / state.data.ownedDatasets.length
|
|
|
|
|
: 0;
|
|
|
|
|
if (Math.random() < baseProbability * (hasDataPipeline ? 0.25 : 0.5) * avgLegalRisk) {
|
|
|
|
|
events.push({
|
|
|
|
|
id: uuid(), type: 'data_contamination', tick: state.meta.tickCount,
|
|
|
|
|
severity: 'moderate',
|
|
|
|
|
description: 'Copyright holders identified content in training data.',
|
|
|
|
|
resolved: true,
|
|
|
|
|
impact: {},
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Math.random() < baseProbability * (hasRedundancy ? 0.1 : 0.2)) {
|
|
|
|
|
const delay = 10 + Math.floor(Math.random() * 20);
|
|
|
|
|
events.push({
|
|
|
|
|
id: uuid(), type: 'hardware_failure', tick: state.meta.tickCount,
|
|
|
|
|
severity: delay > 20 ? 'major' : 'moderate',
|
|
|
|
|
description: 'GPU cluster failure during training. Recovering from checkpoint.',
|
|
|
|
|
resolved: true,
|
|
|
|
|
impact: { ticksDelayed: delay },
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return events;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function createBaseModel(
|
|
|
|
|
pipeline: TrainingPipeline,
|
|
|
|
|
state: GameState,
|
|
|
|
|
): TrainedModel {
|
|
|
|
|
): BaseModel {
|
|
|
|
|
const { architecture, dataMix } = pipeline;
|
|
|
|
|
const compute = pipeline.stages.pretraining.computeAllocated;
|
|
|
|
|
const dataTokens = pipeline.stages.pretraining.targetTokens;
|
|
|
|
|
|
|
|
|
|
const computeFactor = Math.sqrt(compute) * 5;
|
|
|
|
|
const dataFactor = Math.log10(1 + dataTokens / 1e8) * 10;
|
|
|
|
|
const researchBonus = state.research.completedResearch.length * 3;
|
|
|
|
|
const efficiencyBonus = state.research.completedResearch.filter(r => r.includes('efficiency')).length * 5;
|
|
|
|
|
|
|
|
|
|
const baseCapability = Math.min(95, computeFactor + dataFactor + researchBonus + efficiencyBonus);
|
|
|
|
|
let rawCapability = Math.min(95, computeFactor + dataFactor + researchBonus + efficiencyBonus);
|
|
|
|
|
|
|
|
|
|
if (architecture.type === 'moe') {
|
|
|
|
|
rawCapability = Math.min(98, rawCapability * MOE_CAPABILITY_MULTIPLIER);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const researcherQuality = state.talent.departments.research.effectiveness;
|
|
|
|
|
|
|
|
|
|
const capabilities: ModelCapabilities = {
|
|
|
|
|
reasoning: clamp(baseCapability * (0.8 + Math.random() * 0.4) * (1 + researcherQuality * 0.2)),
|
|
|
|
|
coding: clamp(baseCapability * (0.7 + Math.random() * 0.5)),
|
|
|
|
|
creative: clamp(baseCapability * (0.6 + Math.random() * 0.6)),
|
|
|
|
|
multimodal: clamp(baseCapability * (0.3 + Math.random() * 0.3)),
|
|
|
|
|
agents: clamp(baseCapability * (0.2 + Math.random() * 0.3)),
|
|
|
|
|
speed: Math.max(1, 100 - compute * 0.5 + efficiencyBonus * 2),
|
|
|
|
|
reasoning: clamp(rawCapability * (0.6 + dataMix.scientific * 0.5 + dataMix.code * 0.3) * (1 + researcherQuality * 0.2)),
|
|
|
|
|
coding: clamp(rawCapability * (0.5 + dataMix.code * 1.0)),
|
|
|
|
|
creative: clamp(rawCapability * (0.4 + dataMix.books * 0.6 + dataMix.conversation * 0.3)),
|
|
|
|
|
math: clamp(rawCapability * (0.3 + dataMix.scientific * 0.7 + dataMix.code * 0.2)),
|
|
|
|
|
knowledge: clamp(rawCapability * (0.5 + dataMix.web * 0.3 + dataMix.books * 0.3)),
|
|
|
|
|
multimodal: clamp(rawCapability * (dataMix.images * 0.5 + dataMix.video * 0.4 + dataMix.audio * 0.2)),
|
|
|
|
|
agents: clamp(rawCapability * (0.2 + dataMix.code * 0.3 + dataMix.conversation * 0.2)),
|
|
|
|
|
speed: Math.max(1, 100 - architecture.totalParameters * 0.3 + efficiencyBonus * 2 + (architecture.type === 'moe' ? MOE_SPEED_MULTIPLIER * 10 : 0)),
|
|
|
|
|
contextUtilization: Math.min(100, architecture.contextWindow * 0.4),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const breakthroughBonuses: Partial<Record<keyof ModelCapabilities, number>> = {};
|
|
|
|
|
for (const event of pipeline.events) {
|
|
|
|
|
if ((event.type === 'breakthrough' || event.type === 'emergent_capability') && event.impact.capabilityDomain && event.impact.capabilityBonus) {
|
|
|
|
|
const domain = event.impact.capabilityDomain;
|
|
|
|
|
breakthroughBonuses[domain] = (breakthroughBonuses[domain] ?? 0) + event.impact.capabilityBonus;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for (const [domain, bonus] of Object.entries(breakthroughBonuses)) {
|
|
|
|
|
const key = domain as keyof ModelCapabilities;
|
|
|
|
|
capabilities[key] = clamp(capabilities[key] + bonus);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const completedStages: ('pretraining' | 'sft' | 'alignment')[] = ['pretraining'];
|
|
|
|
|
|
|
|
|
|
if (pipeline.stages.sft?.isComplete) {
|
|
|
|
|
completedStages.push('sft');
|
|
|
|
|
const sft = pipeline.stages.sft;
|
|
|
|
|
for (let i = 0; i < sft.specializations.length; i++) {
|
|
|
|
|
const spec = sft.specializations[i];
|
|
|
|
|
const bonuses = SFT_SPECIALIZATION_BONUSES[spec];
|
|
|
|
|
if (!bonuses) continue;
|
|
|
|
|
const diminishing = i === 0 ? 1.0 : i === 1 ? 0.7 : 0.4;
|
|
|
|
|
for (const [cap, value] of Object.entries(bonuses)) {
|
|
|
|
|
const key = cap as keyof ModelCapabilities;
|
|
|
|
|
capabilities[key] = clamp(capabilities[key] + value * diminishing);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const safetyResearch = state.research.completedResearch.filter(
|
|
|
|
|
r => r.includes('alignment') || r.includes('interpretability') || r.includes('constitutional'),
|
|
|
|
|
).length;
|
|
|
|
|
const safetyScore = Math.min(100, 30 + safetyResearch * 15 + Math.random() * 10);
|
|
|
|
|
let overallSafety = Math.min(100, 30 + safetyResearch * 15 + Math.random() * 10);
|
|
|
|
|
let refusalRate = overallSafety > 60 ? 0.1 : 0.03;
|
|
|
|
|
|
|
|
|
|
const safetyPenalty = safetyScore > 60 ? (safetyScore - 60) * 0.1 : 0;
|
|
|
|
|
const benchmarkScore = Math.max(0,
|
|
|
|
|
(capabilities.reasoning * 0.3 + capabilities.coding * 0.25 +
|
|
|
|
|
capabilities.creative * 0.2 + capabilities.multimodal * 0.15 + capabilities.agents * 0.1) - safetyPenalty,
|
|
|
|
|
);
|
|
|
|
|
if (pipeline.stages.alignment?.isComplete) {
|
|
|
|
|
completedStages.push('alignment');
|
|
|
|
|
const alignment = pipeline.stages.alignment;
|
|
|
|
|
const methodConfig = ALIGNMENT_METHODS[alignment.method];
|
|
|
|
|
if (methodConfig) {
|
|
|
|
|
const safetyGain = methodConfig.safetyGain * alignment.safetyWeight;
|
|
|
|
|
overallSafety = Math.min(100, overallSafety + safetyGain);
|
|
|
|
|
refusalRate = methodConfig.baseRefusal * Math.pow(alignment.safetyWeight, 1.5);
|
|
|
|
|
const capLoss = methodConfig.capabilityLoss * alignment.safetyWeight * 0.5;
|
|
|
|
|
for (const key of Object.keys(capabilities) as (keyof ModelCapabilities)[]) {
|
|
|
|
|
if (key !== 'speed' && key !== 'contextUtilization') {
|
|
|
|
|
capabilities[key] = clamp(capabilities[key] - capLoss);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const parameterCount = Math.pow(10, generation) * (0.5 + Math.random());
|
|
|
|
|
const safetyProfile: SafetyProfile = {
|
|
|
|
|
overallSafety,
|
|
|
|
|
refusalRate,
|
|
|
|
|
harmAvoidance: overallSafety,
|
|
|
|
|
instructionFollowing: capabilities.reasoning * 0.8,
|
|
|
|
|
honesty: overallSafety * 0.9,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
id: uuid(),
|
|
|
|
|
name,
|
|
|
|
|
generation,
|
|
|
|
|
parameterCount,
|
|
|
|
|
trainingDataSize: dataTokens,
|
|
|
|
|
familyId: pipeline.familyId,
|
|
|
|
|
name: pipeline.modelName,
|
|
|
|
|
architecture,
|
|
|
|
|
dataMix,
|
|
|
|
|
capabilities,
|
|
|
|
|
safetyScore,
|
|
|
|
|
benchmarkScore,
|
|
|
|
|
tuning: { preset: 'helpful-safe' },
|
|
|
|
|
safetyProfile,
|
|
|
|
|
rawCapability,
|
|
|
|
|
isDeployed: false,
|
|
|
|
|
trainedAtTick: state.meta.tickCount,
|
|
|
|
|
trainingCostTotal: compute,
|
|
|
|
|
trainingStagesCompleted: completedStages,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function processVariantJobs(
|
|
|
|
|
state: GameState,
|
|
|
|
|
speedMultiplier: number,
|
|
|
|
|
): { jobs: VariantCreationJob[]; newVariants: ModelVariant[] } {
|
|
|
|
|
const newVariants: ModelVariant[] = [];
|
|
|
|
|
const jobs = state.models.variantJobs.map(job => {
|
|
|
|
|
if (job.status !== 'active') return job;
|
|
|
|
|
const newProgress = job.progressTicks + speedMultiplier;
|
|
|
|
|
if (newProgress >= job.totalTicks) {
|
|
|
|
|
const baseModel = state.models.baseModels.find(m => m.id === job.baseModelId);
|
|
|
|
|
if (baseModel) {
|
|
|
|
|
const variant = createVariant(job, baseModel);
|
|
|
|
|
newVariants.push(variant);
|
|
|
|
|
}
|
|
|
|
|
return { ...job, status: 'completed' as const, progressTicks: job.totalTicks };
|
|
|
|
|
}
|
|
|
|
|
return { ...job, progressTicks: newProgress };
|
|
|
|
|
});
|
|
|
|
|
return { jobs, newVariants };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function createVariant(job: VariantCreationJob, base: BaseModel): ModelVariant {
|
|
|
|
|
const caps = { ...base.capabilities };
|
|
|
|
|
let costMultiplier = 1.0;
|
|
|
|
|
let speedMultiplier = 1.0;
|
|
|
|
|
let variantName = base.name;
|
|
|
|
|
let arch = { ...base.architecture };
|
|
|
|
|
|
|
|
|
|
if (job.jobType === 'distillation' && 'targetParameters' in job.config) {
|
|
|
|
|
const config = job.config;
|
|
|
|
|
const sizeRatio = config.targetParameters / base.architecture.totalParameters;
|
|
|
|
|
const retention = DISTILLATION_BASE_RETENTION + sizeRatio * 0.25;
|
|
|
|
|
for (const key of Object.keys(caps) as (keyof ModelCapabilities)[]) {
|
|
|
|
|
caps[key] = clamp(caps[key] * retention);
|
|
|
|
|
}
|
|
|
|
|
costMultiplier = sizeRatio * 0.8;
|
|
|
|
|
speedMultiplier = (1 / sizeRatio) * 0.7;
|
|
|
|
|
arch = { ...arch, totalParameters: config.targetParameters, activeParameters: config.targetParameters };
|
|
|
|
|
variantName = config.variantName;
|
|
|
|
|
} else if (job.jobType === 'fine-tuning' && 'specialization' in job.config) {
|
|
|
|
|
const config = job.config;
|
|
|
|
|
const bonuses = SFT_SPECIALIZATION_BONUSES[config.specialization];
|
|
|
|
|
if (bonuses) {
|
|
|
|
|
for (const [cap, value] of Object.entries(bonuses)) {
|
|
|
|
|
caps[cap as keyof ModelCapabilities] = clamp(caps[cap as keyof ModelCapabilities] + value);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
variantName = config.variantName;
|
|
|
|
|
} else if (job.jobType === 'quantization' && 'level' in job.config) {
|
|
|
|
|
const config = job.config;
|
|
|
|
|
const qConfig = QUANTIZATION_CONFIGS[config.level];
|
|
|
|
|
if (qConfig) {
|
|
|
|
|
for (const key of Object.keys(caps) as (keyof ModelCapabilities)[]) {
|
|
|
|
|
if (key !== 'speed') caps[key] = clamp(caps[key] * qConfig.qualityRetention);
|
|
|
|
|
}
|
|
|
|
|
caps.speed = clamp(caps.speed * qConfig.speedMultiplier);
|
|
|
|
|
costMultiplier = qConfig.costMultiplier;
|
|
|
|
|
speedMultiplier = qConfig.speedMultiplier;
|
|
|
|
|
}
|
|
|
|
|
variantName = config.variantName;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
id: uuid(),
|
|
|
|
|
familyId: base.familyId,
|
|
|
|
|
baseModelId: base.id,
|
|
|
|
|
name: variantName,
|
|
|
|
|
variantType: job.jobType === 'distillation' ? 'distilled' : job.jobType === 'fine-tuning' ? 'fine-tuned' : 'quantized',
|
|
|
|
|
architecture: arch,
|
|
|
|
|
capabilities: caps,
|
|
|
|
|
safetyProfile: { ...base.safetyProfile },
|
|
|
|
|
isDeployed: false,
|
|
|
|
|
createdAtTick: 0,
|
|
|
|
|
quantization: job.jobType === 'quantization' && 'level' in job.config ? job.config.level : undefined,
|
|
|
|
|
distillationRetention: job.jobType === 'distillation' && 'targetParameters' in job.config
|
|
|
|
|
? DISTILLATION_BASE_RETENTION + (job.config.targetParameters / base.architecture.totalParameters) * 0.25
|
|
|
|
|
: undefined,
|
|
|
|
|
finetuneSpecialization: job.jobType === 'fine-tuning' && 'specialization' in job.config ? job.config.specialization : undefined,
|
|
|
|
|
costMultiplier,
|
|
|
|
|
speedMultiplier,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function processEvalJobs(state: GameState): { jobs: EvalJob[]; newResults: BenchmarkResult[] } {
|
|
|
|
|
const newResults: BenchmarkResult[] = [];
|
|
|
|
|
const allModels: (BaseModel | ModelVariant)[] = [
|
|
|
|
|
...state.models.baseModels,
|
|
|
|
|
...state.models.families.flatMap(f => f.variants),
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
const jobs = state.models.evalJobs.map(job => {
|
|
|
|
|
if (job.status !== 'active') return job;
|
|
|
|
|
const newProgress = job.progressTicks + 1;
|
|
|
|
|
if (newProgress >= job.totalTicks) {
|
|
|
|
|
const model = allModels.find(m => m.id === job.modelId);
|
|
|
|
|
if (model) {
|
|
|
|
|
const results = computeBenchmarkScores(model, job.benchmarkIds, state.meta.tickCount);
|
|
|
|
|
newResults.push(...results);
|
|
|
|
|
return { ...job, status: 'completed' as const, progressTicks: job.totalTicks, results };
|
|
|
|
|
}
|
|
|
|
|
return { ...job, status: 'completed' as const, progressTicks: job.totalTicks };
|
|
|
|
|
}
|
|
|
|
|
return { ...job, progressTicks: newProgress };
|
|
|
|
|
});
|
|
|
|
|
return { jobs, newResults };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function computeBenchmarkScores(
|
|
|
|
|
model: BaseModel | ModelVariant,
|
|
|
|
|
benchmarkIds: string[],
|
|
|
|
|
tick: number,
|
|
|
|
|
): BenchmarkResult[] {
|
|
|
|
|
const benchmarkMap = new Map(BENCHMARKS.map(b => [b.id, b]));
|
|
|
|
|
return benchmarkIds.map(id => {
|
|
|
|
|
const bench = benchmarkMap.get(id);
|
|
|
|
|
if (!bench) return { benchmarkId: id, modelId: model.id, score: 0, ranAtTick: tick };
|
|
|
|
|
const primary = model.capabilities[bench.primaryCapability] ?? 0;
|
|
|
|
|
const secondary = bench.secondaryCapability ? (model.capabilities[bench.secondaryCapability] ?? 0) : 0;
|
|
|
|
|
const noise = (Math.random() - 0.5) * 6;
|
|
|
|
|
const score = clamp(primary * 0.7 + secondary * 0.3 + noise);
|
|
|
|
|
return { benchmarkId: id, modelId: model.id, score, ranAtTick: tick };
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function computeVariantScore(variant: ModelVariant): number {
|
|
|
|
|
const c = variant.capabilities;
|
|
|
|
|
return (c.reasoning * 0.25 + c.coding * 0.2 + c.creative * 0.15 + c.math * 0.15 + c.knowledge * 0.15 + c.agents * 0.1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function clamp(n: number): number {
|
|
|
|
|
return Math.min(100, Math.max(0, n));
|
|
|
|
|
}
|
|
|
|
|