Redesign model lifecycle: upfront SFT/alignment, multi-size families, point releases, quantization-only variants
CI / build-and-push (push) Successful in 45s
CI / build-and-push (push) Successful in 45s
Training pipeline now requires SFT specializations and alignment method configured at start — no more mid-training configuration step. Model families support multiple size tiers (Nano/Small/Medium/Large/Flagship) trained independently, mimicking real AI company model families. Point releases iterate on deployed models with 40% training time and 8% capability gain. Distillation and fine-tuning variants removed — players train smaller size tiers or configure SFT during initial training instead. Only quantization remains as a variant type. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+269
-278
@@ -1,5 +1,5 @@
|
|||||||
import { useState } from 'react';
|
import { useState } from 'react';
|
||||||
import { Play, Rocket, Globe, ChevronDown, ChevronUp, Beaker, Shield, Scissors, Wrench, Zap, BarChart3 } from 'lucide-react';
|
import { Play, Rocket, Globe, ChevronDown, ChevronUp, Beaker, Shield, Zap, BarChart3 } from 'lucide-react';
|
||||||
import { TutorialHint } from '@/components/game/TutorialHint';
|
import { TutorialHint } from '@/components/game/TutorialHint';
|
||||||
import { ConfirmModal } from '@/components/common/ConfirmModal';
|
import { ConfirmModal } from '@/components/common/ConfirmModal';
|
||||||
import { useGameStore } from '@/store';
|
import { useGameStore } from '@/store';
|
||||||
@@ -9,10 +9,14 @@ import {
|
|||||||
ALIGNMENT_METHODS,
|
ALIGNMENT_METHODS,
|
||||||
QUANTIZATION_CONFIGS,
|
QUANTIZATION_CONFIGS,
|
||||||
PARAMETER_OPTIONS,
|
PARAMETER_OPTIONS,
|
||||||
|
SIZE_TIER_MAP,
|
||||||
|
SIZE_TIER_LABELS,
|
||||||
|
SFT_SPECIALIZATION_BONUSES,
|
||||||
} from '@ai-tycoon/shared';
|
} from '@ai-tycoon/shared';
|
||||||
import type {
|
import type {
|
||||||
ModelArchitecture, DataMixAllocation, SFTSpecialization, AlignmentMethod,
|
ModelArchitecture, DataMixAllocation, SFTSpecialization, AlignmentMethod,
|
||||||
DataDomain, QuantizationLevel, BaseModel, ModelVariant, BenchmarkResult,
|
DataDomain, QuantizationLevel, BaseModel, ModelVariant, BenchmarkResult,
|
||||||
|
SizeTier, ModelFamily,
|
||||||
} from '@ai-tycoon/shared';
|
} from '@ai-tycoon/shared';
|
||||||
import { BENCHMARKS } from '@ai-tycoon/game-engine';
|
import { BENCHMARKS } from '@ai-tycoon/game-engine';
|
||||||
|
|
||||||
@@ -56,12 +60,8 @@ export function ModelsPage() {
|
|||||||
const totalData = useGameStore((s) => s.data.totalTrainingTokens);
|
const totalData = useGameStore((s) => s.data.totalTrainingTokens);
|
||||||
const currentEra = useGameStore((s) => s.meta.currentEra);
|
const currentEra = useGameStore((s) => s.meta.currentEra);
|
||||||
const startTrainingPipeline = useGameStore((s) => s.startTrainingPipeline);
|
const startTrainingPipeline = useGameStore((s) => s.startTrainingPipeline);
|
||||||
const configureSFT = useGameStore((s) => s.configureSFT);
|
|
||||||
const configureAlignment = useGameStore((s) => s.configureAlignment);
|
|
||||||
const deployModel = useGameStore((s) => s.deployModel);
|
const deployModel = useGameStore((s) => s.deployModel);
|
||||||
const deployVariant = useGameStore((s) => s.deployVariant);
|
const deployVariant = useGameStore((s) => s.deployVariant);
|
||||||
const createDistillation = useGameStore((s) => s.createDistillation);
|
|
||||||
const createFineTune = useGameStore((s) => s.createFineTune);
|
|
||||||
const createQuantization = useGameStore((s) => s.createQuantization);
|
const createQuantization = useGameStore((s) => s.createQuantization);
|
||||||
const startEvaluation = useGameStore((s) => s.startEvaluation);
|
const startEvaluation = useGameStore((s) => s.startEvaluation);
|
||||||
const setTrainingAllocation = useGameStore((s) => s.setTrainingAllocation);
|
const setTrainingAllocation = useGameStore((s) => s.setTrainingAllocation);
|
||||||
@@ -80,6 +80,15 @@ export function ModelsPage() {
|
|||||||
const [dataMix, setDataMix] = useState<DataMixAllocation>({ ...DEFAULT_DATA_MIX });
|
const [dataMix, setDataMix] = useState<DataMixAllocation>({ ...DEFAULT_DATA_MIX });
|
||||||
const [dataMixPreset, setDataMixPreset] = useState('balanced');
|
const [dataMixPreset, setDataMixPreset] = useState('balanced');
|
||||||
|
|
||||||
|
// New model lifecycle state
|
||||||
|
const [familyMode, setFamilyMode] = useState<'new' | 'existing'>('new');
|
||||||
|
const [selectedFamilyId, setSelectedFamilyId] = useState<string | null>(null);
|
||||||
|
const [isPointRelease, setIsPointRelease] = useState(false);
|
||||||
|
const [sourceModelId, setSourceModelId] = useState<string | null>(null);
|
||||||
|
const [sftSpecs, setSftSpecs] = useState<SFTSpecialization[]>(['general']);
|
||||||
|
const [alignMethod, setAlignMethod] = useState<AlignmentMethod>('rlhf');
|
||||||
|
const [safetyWeight, setSafetyWeight] = useState(0.5);
|
||||||
|
|
||||||
const trainingFlops = totalFlops * trainingAlloc;
|
const trainingFlops = totalFlops * trainingAlloc;
|
||||||
const estimatedTicks = trainingFlops > 0 ? Math.max(30, Math.ceil(180 / (1 + trainingFlops * 0.1))) : Infinity;
|
const estimatedTicks = trainingFlops > 0 ? Math.max(30, Math.ceil(180 / (1 + trainingFlops * 0.1))) : Infinity;
|
||||||
const estimatedCapability = Math.min(95, Math.sqrt(trainingFlops) * 5 + Math.log10(1 + totalData / 1e8) * 10);
|
const estimatedCapability = Math.min(95, Math.sqrt(trainingFlops) * 5 + Math.log10(1 + totalData / 1e8) * 10);
|
||||||
@@ -95,9 +104,26 @@ export function ModelsPage() {
|
|||||||
const currentEraIdx = eraOrder.indexOf(currentEra);
|
const currentEraIdx = eraOrder.indexOf(currentEra);
|
||||||
const availableBenchmarks = BENCHMARKS.filter(b => eraOrder.indexOf(b.unlockedAtEra) <= currentEraIdx);
|
const availableBenchmarks = BENCHMARKS.filter(b => eraOrder.indexOf(b.unlockedAtEra) <= currentEraIdx);
|
||||||
|
|
||||||
|
const hasAlignmentResearch = completedResearch.some(r =>
|
||||||
|
r === 'alignment-research' || r === 'interpretability' || r === 'constitutional-ai',
|
||||||
|
);
|
||||||
|
|
||||||
|
// Computed size tier
|
||||||
|
const sizeTier: SizeTier = SIZE_TIER_MAP[parameterCount] ?? 'small';
|
||||||
|
|
||||||
|
// Model name preview
|
||||||
|
const familyNameForPreview = familyMode === 'new'
|
||||||
|
? (modelName.trim() || `Family ${families.length + 1}`)
|
||||||
|
: (families.find(f => f.id === selectedFamilyId)?.name ?? 'Family');
|
||||||
|
const nextVersion = (() => {
|
||||||
|
if (!isPointRelease || !sourceModelId) return 1.0;
|
||||||
|
const src = baseModels.find(m => m.id === sourceModelId);
|
||||||
|
return src ? Math.round((src.version + 0.1) * 10) / 10 : 1.0;
|
||||||
|
})();
|
||||||
|
const modelNamePreview = `${familyNameForPreview} ${SIZE_TIER_LABELS[sizeTier]} v${nextVersion.toFixed(1)}`;
|
||||||
|
|
||||||
const handleStartTraining = () => {
|
const handleStartTraining = () => {
|
||||||
if (trainingFlops === 0) return;
|
if (trainingFlops === 0) return;
|
||||||
const name = modelName.trim() || `Model v${families.length + 1}`;
|
|
||||||
|
|
||||||
const architecture: ModelArchitecture = {
|
const architecture: ModelArchitecture = {
|
||||||
type: archType,
|
type: archType,
|
||||||
@@ -109,14 +135,23 @@ export function ModelsPage() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
startTrainingPipeline({
|
startTrainingPipeline({
|
||||||
modelName: name,
|
...(familyMode === 'new'
|
||||||
|
? { familyName: modelName.trim() || `Family ${families.length + 1}` }
|
||||||
|
: { familyId: selectedFamilyId! }),
|
||||||
architecture,
|
architecture,
|
||||||
dataMix,
|
dataMix,
|
||||||
allocatedComputeFraction: 1.0,
|
allocatedComputeFraction: 1.0,
|
||||||
targetTokens: totalData,
|
targetTokens: totalData,
|
||||||
totalTicks: estimatedTicks,
|
totalTicks: estimatedTicks,
|
||||||
|
sftSpecializations: sftSpecs,
|
||||||
|
alignmentMethod: alignMethod,
|
||||||
|
alignmentSafetyWeight: safetyWeight,
|
||||||
|
isPointRelease,
|
||||||
|
sourceModelId: sourceModelId ?? undefined,
|
||||||
});
|
});
|
||||||
setModelName('');
|
setModelName('');
|
||||||
|
setIsPointRelease(false);
|
||||||
|
setSourceModelId(null);
|
||||||
};
|
};
|
||||||
|
|
||||||
const handlePresetChange = (presetKey: string) => {
|
const handlePresetChange = (presetKey: string) => {
|
||||||
@@ -137,10 +172,6 @@ export function ModelsPage() {
|
|||||||
setDataMixPreset('custom');
|
setDataMixPreset('custom');
|
||||||
};
|
};
|
||||||
|
|
||||||
const hasAlignmentResearch = completedResearch.some(r =>
|
|
||||||
r === 'alignment-research' || r === 'interpretability' || r === 'constitutional-ai',
|
|
||||||
);
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="space-y-6">
|
<div className="space-y-6">
|
||||||
<h2 className="text-2xl font-bold">Models</h2>
|
<h2 className="text-2xl font-bold">Models</h2>
|
||||||
@@ -243,8 +274,8 @@ export function ModelsPage() {
|
|||||||
|
|
||||||
<div className="flex gap-1 mb-2">
|
<div className="flex gap-1 mb-2">
|
||||||
<StageBar label="Pre" active={pipeline.currentStage === 'pretraining'} complete={pipeline.stages.pretraining.isComplete} progress={pipeline.stages.pretraining.progressTicks / pipeline.stages.pretraining.totalTicks} />
|
<StageBar label="Pre" active={pipeline.currentStage === 'pretraining'} complete={pipeline.stages.pretraining.isComplete} progress={pipeline.stages.pretraining.progressTicks / pipeline.stages.pretraining.totalTicks} />
|
||||||
<StageBar label="SFT" active={pipeline.currentStage === 'sft'} complete={pipeline.stages.sft?.isComplete ?? false} progress={pipeline.stages.sft ? pipeline.stages.sft.progressTicks / pipeline.stages.sft.totalTicks : 0} configured={!!pipeline.stages.sft} />
|
<StageBar label="SFT" active={pipeline.currentStage === 'sft'} complete={pipeline.stages.sft.isComplete} progress={pipeline.stages.sft.progressTicks / pipeline.stages.sft.totalTicks} />
|
||||||
<StageBar label="Align" active={pipeline.currentStage === 'alignment'} complete={pipeline.stages.alignment?.isComplete ?? false} progress={pipeline.stages.alignment ? pipeline.stages.alignment.progressTicks / pipeline.stages.alignment.totalTicks : 0} configured={!!pipeline.stages.alignment} />
|
<StageBar label="Align" active={pipeline.currentStage === 'alignment'} complete={pipeline.stages.alignment.isComplete} progress={pipeline.stages.alignment.progressTicks / pipeline.stages.alignment.totalTicks} />
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="h-2 bg-surface-800 rounded-full overflow-hidden">
|
<div className="h-2 bg-surface-800 rounded-full overflow-hidden">
|
||||||
@@ -259,19 +290,6 @@ export function ModelsPage() {
|
|||||||
: `ETA: ${formatDuration(stage.totalTicks - stage.progressTicks)}`}
|
: `ETA: ${formatDuration(stage.totalTicks - stage.progressTicks)}`}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{pipeline.currentStage === 'pretraining' && !pipeline.stages.sft && !pipeline.stages.alignment && (
|
|
||||||
<div className="mt-2 flex items-center gap-2 text-xs text-warning">
|
|
||||||
<Beaker size={12} />
|
|
||||||
<span>
|
|
||||||
Post-training not configured —{' '}
|
|
||||||
<button onClick={() => setExpandedPipeline(pipeline.id)} className="text-accent hover:text-accent-light underline">
|
|
||||||
configure SFT & Alignment
|
|
||||||
</button>
|
|
||||||
{' '}or they'll be skipped.
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{isExpanded && (
|
{isExpanded && (
|
||||||
<div className="mt-3 pt-3 border-t border-surface-700 space-y-2">
|
<div className="mt-3 pt-3 border-t border-surface-700 space-y-2">
|
||||||
{pipeline.currentStage === 'pretraining' && (
|
{pipeline.currentStage === 'pretraining' && (
|
||||||
@@ -281,10 +299,6 @@ export function ModelsPage() {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{pipeline.currentStage === 'pretraining' && !pipeline.stages.pretraining.isComplete && (!pipeline.stages.sft || !pipeline.stages.alignment) && (
|
|
||||||
<PostTrainingConfig pipelineId={pipeline.id} hasAlignmentResearch={hasAlignmentResearch} completedResearch={completedResearch} configureSFT={configureSFT} configureAlignment={configureAlignment} sftConfigured={!!pipeline.stages.sft} alignmentConfigured={!!pipeline.stages.alignment} />
|
|
||||||
)}
|
|
||||||
|
|
||||||
{recentEvents.length > 0 && (
|
{recentEvents.length > 0 && (
|
||||||
<div className="space-y-1">
|
<div className="space-y-1">
|
||||||
<span className="text-xs text-surface-500 font-medium">Recent Events</span>
|
<span className="text-xs text-surface-500 font-medium">Recent Events</span>
|
||||||
@@ -357,17 +371,60 @@ export function ModelsPage() {
|
|||||||
{/* Train New Model */}
|
{/* Train New Model */}
|
||||||
{modelsTab === 'train' && <div className="bg-surface-900 border border-surface-700 rounded-xl p-4 space-y-4">
|
{modelsTab === 'train' && <div className="bg-surface-900 border border-surface-700 rounded-xl p-4 space-y-4">
|
||||||
<h3 className="font-semibold">Train New Model</h3>
|
<h3 className="font-semibold">Train New Model</h3>
|
||||||
|
|
||||||
|
{isPointRelease && sourceModelId && (
|
||||||
|
<div className="bg-accent/10 border border-accent/30 rounded-lg px-3 py-2 flex items-center justify-between">
|
||||||
|
<div className="text-sm text-accent-light">
|
||||||
|
Point Release — iterating on <span className="font-mono">{baseModels.find(m => m.id === sourceModelId)?.name ?? 'model'}</span>
|
||||||
|
<span className="text-xs text-surface-400 ml-2">(40% training time)</span>
|
||||||
|
</div>
|
||||||
|
<button onClick={() => { setIsPointRelease(false); setSourceModelId(null); }} className="text-xs text-surface-400 hover:text-surface-200">Cancel</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
<div className="space-y-3">
|
<div className="space-y-3">
|
||||||
<div className="grid grid-cols-2 gap-3">
|
|
||||||
|
{/* Family selector */}
|
||||||
<div>
|
<div>
|
||||||
<label className="block text-xs text-surface-400 mb-1">Model Name</label>
|
<label className="block text-xs text-surface-400 mb-1">Model Family</label>
|
||||||
|
<div className="flex gap-2 mb-2">
|
||||||
|
<button
|
||||||
|
onClick={() => { setFamilyMode('new'); setIsPointRelease(false); setSourceModelId(null); }}
|
||||||
|
className={`flex-1 px-3 py-2 rounded text-sm border transition-colors ${familyMode === 'new' ? 'bg-accent/20 border-accent text-accent-light' : 'bg-surface-800 border-surface-600 text-surface-300'}`}
|
||||||
|
>
|
||||||
|
New Family
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => setFamilyMode('existing')}
|
||||||
|
disabled={families.length === 0}
|
||||||
|
className={`flex-1 px-3 py-2 rounded text-sm border transition-colors ${familyMode === 'existing' ? 'bg-accent/20 border-accent text-accent-light' : 'bg-surface-800 border-surface-600 text-surface-300'} disabled:opacity-50 disabled:cursor-not-allowed`}
|
||||||
|
>
|
||||||
|
Add to Family
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{familyMode === 'new' ? (
|
||||||
<input
|
<input
|
||||||
type="text" value={modelName}
|
type="text" value={modelName}
|
||||||
onChange={(e) => setModelName(e.target.value)}
|
onChange={(e) => setModelName(e.target.value)}
|
||||||
placeholder={`Model v${families.length + 1}`}
|
placeholder={`Family ${families.length + 1}`}
|
||||||
className="w-full bg-surface-800 border border-surface-600 rounded px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-accent/50"
|
className="w-full bg-surface-800 border border-surface-600 rounded px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-accent/50"
|
||||||
/>
|
/>
|
||||||
|
) : (
|
||||||
|
<select
|
||||||
|
value={selectedFamilyId ?? ''}
|
||||||
|
onChange={(e) => setSelectedFamilyId(e.target.value || null)}
|
||||||
|
className="w-full bg-surface-800 border border-surface-600 rounded px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-accent/50"
|
||||||
|
>
|
||||||
|
<option value="">Select a family...</option>
|
||||||
|
{families.map(f => (
|
||||||
|
<option key={f.id} value={f.id}>{f.name} (Gen {f.generation})</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Architecture & Parameters */}
|
||||||
|
<div className="grid grid-cols-2 gap-3">
|
||||||
<div>
|
<div>
|
||||||
<label className="block text-xs text-surface-400 mb-1">Architecture</label>
|
<label className="block text-xs text-surface-400 mb-1">Architecture</label>
|
||||||
<div className="flex gap-2">
|
<div className="flex gap-2">
|
||||||
@@ -385,20 +442,6 @@ export function ModelsPage() {
|
|||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
|
||||||
<div className="grid grid-cols-2 gap-3">
|
|
||||||
<div>
|
|
||||||
<label className="block text-xs text-surface-400 mb-1">Parameters (Billions)</label>
|
|
||||||
<select
|
|
||||||
value={parameterCount}
|
|
||||||
onChange={(e) => setParameterCount(Number(e.target.value))}
|
|
||||||
className="w-full bg-surface-800 border border-surface-600 rounded px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-accent/50"
|
|
||||||
>
|
|
||||||
{PARAMETER_OPTIONS.map(p => (
|
|
||||||
<option key={p} value={p}>{p}B</option>
|
|
||||||
))}
|
|
||||||
</select>
|
|
||||||
</div>
|
|
||||||
<div>
|
<div>
|
||||||
<label className="block text-xs text-surface-400 mb-1">Context Window</label>
|
<label className="block text-xs text-surface-400 mb-1">Context Window</label>
|
||||||
<select
|
<select
|
||||||
@@ -413,6 +456,25 @@ export function ModelsPage() {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Parameters with size tier indicator */}
|
||||||
|
<div>
|
||||||
|
<label className="block text-xs text-surface-400 mb-1">Parameters (Billions)</label>
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<select
|
||||||
|
value={parameterCount}
|
||||||
|
onChange={(e) => setParameterCount(Number(e.target.value))}
|
||||||
|
className="flex-1 bg-surface-800 border border-surface-600 rounded px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-accent/50"
|
||||||
|
>
|
||||||
|
{PARAMETER_OPTIONS.map(p => (
|
||||||
|
<option key={p} value={p}>{p}B</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
<span className="text-xs px-2 py-1 rounded bg-accent/10 text-accent-light border border-accent/30">
|
||||||
|
{SIZE_TIER_LABELS[sizeTier]}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
{/* Data Mix */}
|
{/* Data Mix */}
|
||||||
<div>
|
<div>
|
||||||
<div className="flex items-center justify-between mb-2">
|
<div className="flex items-center justify-between mb-2">
|
||||||
@@ -448,6 +510,94 @@ export function ModelsPage() {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* SFT Configuration */}
|
||||||
|
<div>
|
||||||
|
<div className="flex items-center gap-1 mb-2">
|
||||||
|
<Beaker size={12} className="text-surface-400" />
|
||||||
|
<label className="text-xs text-surface-400">SFT Specializations</label>
|
||||||
|
</div>
|
||||||
|
<div className="flex flex-wrap gap-1">
|
||||||
|
{SFT_OPTIONS.map(opt => (
|
||||||
|
<button
|
||||||
|
key={opt.value}
|
||||||
|
onClick={() => setSftSpecs(prev =>
|
||||||
|
prev.includes(opt.value)
|
||||||
|
? prev.filter(s => s !== opt.value)
|
||||||
|
: [...prev, opt.value]
|
||||||
|
)}
|
||||||
|
className={`px-2 py-1 rounded text-xs border transition-colors ${
|
||||||
|
sftSpecs.includes(opt.value)
|
||||||
|
? 'bg-accent/20 border-accent text-accent-light'
|
||||||
|
: 'bg-surface-800 border-surface-600 text-surface-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{opt.label}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
{sftSpecs.length > 0 && (
|
||||||
|
<div className="mt-2 text-[10px] text-surface-500">
|
||||||
|
<span className="font-medium text-surface-400">Bonus preview: </span>
|
||||||
|
{sftSpecs.map(spec => {
|
||||||
|
const bonuses = SFT_SPECIALIZATION_BONUSES[spec];
|
||||||
|
if (!bonuses) return null;
|
||||||
|
const positives = Object.entries(bonuses).filter(([, v]) => v > 0).map(([k, v]) => `${k} +${v}`);
|
||||||
|
const negatives = Object.entries(bonuses).filter(([, v]) => v < 0).map(([k, v]) => `${k} ${v}`);
|
||||||
|
return (
|
||||||
|
<span key={spec} className="inline-block mr-2">
|
||||||
|
<span className="text-accent-light">{spec}</span>
|
||||||
|
{positives.length > 0 && <span className="text-success ml-1">{positives.join(', ')}</span>}
|
||||||
|
{negatives.length > 0 && <span className="text-error ml-1">{negatives.join(', ')}</span>}
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Alignment Configuration */}
|
||||||
|
<div>
|
||||||
|
<div className="flex items-center gap-1 mb-2">
|
||||||
|
<Shield size={12} className="text-surface-400" />
|
||||||
|
<label className="text-xs text-surface-400">Alignment</label>
|
||||||
|
</div>
|
||||||
|
{hasAlignmentResearch ? (
|
||||||
|
<div className="space-y-2">
|
||||||
|
<div className="flex gap-1">
|
||||||
|
{(Object.keys(ALIGNMENT_METHODS) as AlignmentMethod[]).map(method => {
|
||||||
|
const isAvailable = completedResearch.includes(ALIGNMENT_METHODS[method].requiredResearch);
|
||||||
|
return (
|
||||||
|
<button
|
||||||
|
key={method}
|
||||||
|
disabled={!isAvailable}
|
||||||
|
onClick={() => setAlignMethod(method)}
|
||||||
|
className={`px-2 py-1 rounded text-xs border transition-colors ${
|
||||||
|
alignMethod === method ? 'bg-accent/20 border-accent text-accent-light' :
|
||||||
|
!isAvailable ? 'bg-surface-800 border-surface-700 text-surface-600 cursor-not-allowed' :
|
||||||
|
'bg-surface-800 border-surface-600 text-surface-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{method.toUpperCase()}
|
||||||
|
</button>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<span className="text-[10px] text-surface-400">Safety</span>
|
||||||
|
<input type="range" min={0} max={100} value={safetyWeight * 100}
|
||||||
|
onChange={(e) => setSafetyWeight(Number(e.target.value) / 100)}
|
||||||
|
className="flex-1 accent-accent h-1" />
|
||||||
|
<span className="text-[10px] text-surface-400">Helpful</span>
|
||||||
|
<span className="text-[10px] font-mono text-surface-500 w-8 text-right">{Math.round(safetyWeight * 100)}%</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="text-xs text-surface-500 flex items-center gap-1">
|
||||||
|
<Shield size={10} /> Requires alignment research — defaults to RLHF
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
{/* Stats */}
|
{/* Stats */}
|
||||||
<div className="grid grid-cols-4 gap-3 text-sm">
|
<div className="grid grid-cols-4 gap-3 text-sm">
|
||||||
<div className="bg-surface-800 rounded-lg p-3">
|
<div className="bg-surface-800 rounded-lg p-3">
|
||||||
@@ -471,14 +621,22 @@ export function ModelsPage() {
|
|||||||
Estimated capability: <span className="text-accent-light font-mono">{estimatedCapability.toFixed(1)}/100</span>
|
Estimated capability: <span className="text-accent-light font-mono">{estimatedCapability.toFixed(1)}/100</span>
|
||||||
{archType === 'moe' && <span className="ml-2 text-xs text-accent">(+15% MoE bonus)</span>}
|
{archType === 'moe' && <span className="ml-2 text-xs text-accent">(+15% MoE bonus)</span>}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Model name preview */}
|
||||||
|
<div className="bg-surface-800/50 rounded-lg px-3 py-2 flex items-center gap-2">
|
||||||
|
<span className="text-[10px] text-surface-500">Model name:</span>
|
||||||
|
<span className="text-sm font-mono text-surface-300">{modelNamePreview}</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Start button */}
|
||||||
<div>
|
<div>
|
||||||
<button
|
<button
|
||||||
onClick={handleStartTraining}
|
onClick={handleStartTraining}
|
||||||
disabled={trainingFlops === 0}
|
disabled={trainingFlops === 0 || (familyMode === 'existing' && !selectedFamilyId)}
|
||||||
className="flex items-center gap-2 bg-accent hover:bg-accent-dark text-white px-4 py-2 rounded-lg text-sm disabled:opacity-50 disabled:cursor-not-allowed"
|
className="flex items-center gap-2 bg-accent hover:bg-accent-dark text-white px-4 py-2 rounded-lg text-sm disabled:opacity-50 disabled:cursor-not-allowed"
|
||||||
>
|
>
|
||||||
<Play size={16} />
|
<Play size={16} />
|
||||||
Start Pre-Training
|
{isPointRelease ? 'Start Point Release' : 'Start Training'}
|
||||||
</button>
|
</button>
|
||||||
{trainingFlops === 0 && totalFlops === 0 && (
|
{trainingFlops === 0 && totalFlops === 0 && (
|
||||||
<p className="text-xs text-warning mt-1">Build a data center and order racks first</p>
|
<p className="text-xs text-warning mt-1">Build a data center and order racks first</p>
|
||||||
@@ -495,63 +653,77 @@ export function ModelsPage() {
|
|||||||
<div className="space-y-3">
|
<div className="space-y-3">
|
||||||
<h3 className="font-semibold">Model Families</h3>
|
<h3 className="font-semibold">Model Families</h3>
|
||||||
{families.map(family => {
|
{families.map(family => {
|
||||||
const base = baseModels.find(m => m.familyId === family.id);
|
const familyModels = baseModels.filter(m => m.familyId === family.id);
|
||||||
const variants = family.variants;
|
const variants = family.variants;
|
||||||
const isExpanded = expandedModel === family.id;
|
const isExpanded = expandedModel === family.id;
|
||||||
|
|
||||||
if (!base) return null;
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div key={family.id} className="bg-surface-900 border border-surface-700 rounded-xl p-4">
|
<div key={family.id} className="bg-surface-900 border border-surface-700 rounded-xl p-4">
|
||||||
<div className="flex items-center justify-between">
|
{/* Family header */}
|
||||||
|
<div className="flex items-center justify-between mb-2">
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-2">
|
||||||
<button
|
<button onClick={() => setExpandedModel(isExpanded ? null : family.id)} className="text-surface-400 hover:text-surface-200">
|
||||||
onClick={() => setExpandedModel(isExpanded ? null : family.id)}
|
|
||||||
className="text-surface-400 hover:text-surface-200"
|
|
||||||
>
|
|
||||||
{isExpanded ? <ChevronUp size={14} /> : <ChevronDown size={14} />}
|
{isExpanded ? <ChevronUp size={14} /> : <ChevronDown size={14} />}
|
||||||
</button>
|
</button>
|
||||||
<div>
|
|
||||||
<h4 className="font-medium">{family.name} <span className="text-xs text-surface-500">Gen {family.generation}</span></h4>
|
<h4 className="font-medium">{family.name} <span className="text-xs text-surface-500">Gen {family.generation}</span></h4>
|
||||||
<div className="text-xs text-surface-400">
|
|
||||||
{base.architecture.totalParameters}B {base.architecture.type.toUpperCase()} · Cap: {base.rawCapability.toFixed(1)} · Safety: {base.safetyProfile.overallSafety.toFixed(0)}
|
|
||||||
{variants.length > 0 && <span className="ml-1 text-surface-500">· {variants.length} variant{variants.length > 1 ? 's' : ''}</span>}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-2">
|
||||||
<ModelActions model={base} isOpenSourced={openSourcedModels.includes(base.id)} onDeploy={() => deployModel(base.id)} onOpenSource={() => openSourceModel(base.id)} />
|
<button onClick={() => { setModelsTab('train'); setFamilyMode('existing'); setSelectedFamilyId(family.id); }} className="text-xs text-accent hover:text-accent-light">
|
||||||
|
+ Train New Size
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{isExpanded && (
|
{/* Model rows */}
|
||||||
|
{familyModels.map(model => (
|
||||||
|
<div key={model.id} className="flex items-center justify-between py-2 border-t border-surface-800 text-sm">
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<span className="font-medium">{model.name}</span>
|
||||||
|
<span className="text-xs text-surface-500">{model.architecture.totalParameters}B</span>
|
||||||
|
<span className="text-xs text-surface-500">Cap: {model.rawCapability.toFixed(1)}</span>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
{model.isDeployed ? (
|
||||||
|
<span className="text-xs px-2 py-0.5 rounded-full bg-success/20 text-success">Deployed</span>
|
||||||
|
) : (
|
||||||
|
<button onClick={() => deployModel(model.id)} className="text-xs bg-accent hover:bg-accent-dark text-white rounded px-2 py-1">Deploy</button>
|
||||||
|
)}
|
||||||
|
<button onClick={() => {
|
||||||
|
setModelsTab('train');
|
||||||
|
setFamilyMode('existing');
|
||||||
|
setSelectedFamilyId(family.id);
|
||||||
|
setIsPointRelease(true);
|
||||||
|
setSourceModelId(model.id);
|
||||||
|
setParameterCount(model.architecture.totalParameters);
|
||||||
|
setArchType(model.architecture.type);
|
||||||
|
setContextWindow(model.architecture.contextWindow);
|
||||||
|
setDataMix(model.dataMix);
|
||||||
|
setSftSpecs(model.sftSpecializations);
|
||||||
|
setAlignMethod(model.alignmentMethod ?? 'rlhf');
|
||||||
|
}} className="text-xs text-surface-400 hover:text-accent">Iterate</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
|
||||||
|
{familyModels.length === 0 && (
|
||||||
|
<p className="text-xs text-surface-500 py-2">Training in progress...</p>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Expanded: details, quantize, eval for each model */}
|
||||||
|
{isExpanded && familyModels.length > 0 && (
|
||||||
<div className="mt-4 pt-4 border-t border-surface-700 space-y-4">
|
<div className="mt-4 pt-4 border-t border-surface-700 space-y-4">
|
||||||
{/* Base model details */}
|
{familyModels.map(model => (
|
||||||
<ModelDetails model={base} benchmarkResults={benchmarkResults} />
|
<div key={model.id} className="space-y-3">
|
||||||
|
<h5 className="text-sm font-medium text-surface-300">{model.name}</h5>
|
||||||
|
<ModelDetails model={model} benchmarkResults={benchmarkResults} />
|
||||||
|
<QuantizationCreator model={model} completedResearch={completedResearch} onQuantize={createQuantization} />
|
||||||
|
<BenchmarkEvaluator modelId={model.id} modelName={model.name} availableBenchmarks={availableBenchmarks} benchmarkResults={benchmarkResults} evalJobs={evalJobs} onStartEval={startEvaluation} />
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
|
||||||
{/* Variant creation */}
|
|
||||||
<VariantCreator
|
|
||||||
model={base}
|
|
||||||
completedResearch={completedResearch}
|
|
||||||
onDistill={createDistillation}
|
|
||||||
onFineTune={createFineTune}
|
|
||||||
onQuantize={createQuantization}
|
|
||||||
/>
|
|
||||||
|
|
||||||
{/* Benchmark evaluation */}
|
|
||||||
<BenchmarkEvaluator
|
|
||||||
modelId={base.id}
|
|
||||||
modelName={base.name}
|
|
||||||
availableBenchmarks={availableBenchmarks}
|
|
||||||
benchmarkResults={benchmarkResults}
|
|
||||||
evalJobs={evalJobs}
|
|
||||||
onStartEval={startEvaluation}
|
|
||||||
/>
|
|
||||||
|
|
||||||
{/* Variants tree */}
|
|
||||||
{variants.length > 0 && (
|
{variants.length > 0 && (
|
||||||
<div className="space-y-2">
|
<div className="space-y-2">
|
||||||
<span className="text-xs font-medium text-surface-300">Variants</span>
|
<span className="text-xs font-medium text-surface-300">Quantized Variants</span>
|
||||||
{variants.map(variant => (
|
{variants.map(variant => (
|
||||||
<VariantCard
|
<VariantCard
|
||||||
key={variant.id}
|
key={variant.id}
|
||||||
@@ -754,23 +926,16 @@ function ModelDetails({ model, benchmarkResults }: { model: BaseModel; benchmark
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function VariantCreator({ model, completedResearch, onDistill, onFineTune, onQuantize }: {
|
function QuantizationCreator({ model, completedResearch, onQuantize }: {
|
||||||
model: BaseModel;
|
model: BaseModel;
|
||||||
completedResearch: string[];
|
completedResearch: string[];
|
||||||
onDistill: (baseModelId: string, targetParams: number, name: string) => void;
|
|
||||||
onFineTune: (baseModelId: string, spec: SFTSpecialization, name: string) => void;
|
|
||||||
onQuantize: (baseModelId: string, level: QuantizationLevel, name: string) => void;
|
onQuantize: (baseModelId: string, level: QuantizationLevel, name: string) => void;
|
||||||
}) {
|
}) {
|
||||||
const [showCreator, setShowCreator] = useState(false);
|
const [showCreator, setShowCreator] = useState(false);
|
||||||
const [creatorTab, setCreatorTab] = useState<'distill' | 'finetune' | 'quantize'>('quantize');
|
|
||||||
const [distillParams, setDistillParams] = useState(7);
|
|
||||||
const [ftSpec, setFtSpec] = useState<SFTSpecialization>('code');
|
|
||||||
const [quantLevel, setQuantLevel] = useState<QuantizationLevel>('int8');
|
const [quantLevel, setQuantLevel] = useState<QuantizationLevel>('int8');
|
||||||
|
|
||||||
const hasDistillation = completedResearch.includes('distillation');
|
|
||||||
const hasQuantization = completedResearch.includes('quantization') || completedResearch.includes('model-compression');
|
const hasQuantization = completedResearch.includes('quantization') || completedResearch.includes('model-compression');
|
||||||
|
|
||||||
const smallerParams = PARAMETER_OPTIONS.filter(p => p < model.architecture.totalParameters);
|
if (!hasQuantization) return null;
|
||||||
|
|
||||||
if (!showCreator) {
|
if (!showCreator) {
|
||||||
return (
|
return (
|
||||||
@@ -778,7 +943,7 @@ function VariantCreator({ model, completedResearch, onDistill, onFineTune, onQua
|
|||||||
onClick={() => setShowCreator(true)}
|
onClick={() => setShowCreator(true)}
|
||||||
className="flex items-center gap-1 text-xs text-accent hover:text-accent-light"
|
className="flex items-center gap-1 text-xs text-accent hover:text-accent-light"
|
||||||
>
|
>
|
||||||
<Wrench size={12} /> Create Variant
|
<Zap size={12} /> Create Quantized Variant
|
||||||
</button>
|
</button>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -786,69 +951,10 @@ function VariantCreator({ model, completedResearch, onDistill, onFineTune, onQua
|
|||||||
return (
|
return (
|
||||||
<div className="bg-surface-800/50 rounded-lg p-3 space-y-3">
|
<div className="bg-surface-800/50 rounded-lg p-3 space-y-3">
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<span className="text-xs font-medium text-surface-300">Create Variant</span>
|
<span className="text-xs font-medium text-surface-300">Quantize {model.name}</span>
|
||||||
<button onClick={() => setShowCreator(false)} className="text-xs text-surface-500 hover:text-surface-300">Close</button>
|
<button onClick={() => setShowCreator(false)} className="text-xs text-surface-500 hover:text-surface-300">Close</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="flex gap-1">
|
|
||||||
{hasDistillation && (
|
|
||||||
<button onClick={() => setCreatorTab('distill')}
|
|
||||||
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] border ${creatorTab === 'distill' ? 'bg-accent/20 border-accent text-accent-light' : 'bg-surface-800 border-surface-600 text-surface-400'}`}>
|
|
||||||
<Scissors size={10} /> Distill
|
|
||||||
</button>
|
|
||||||
)}
|
|
||||||
<button onClick={() => setCreatorTab('finetune')}
|
|
||||||
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] border ${creatorTab === 'finetune' ? 'bg-accent/20 border-accent text-accent-light' : 'bg-surface-800 border-surface-600 text-surface-400'}`}>
|
|
||||||
<Wrench size={10} /> Fine-tune
|
|
||||||
</button>
|
|
||||||
{hasQuantization && (
|
|
||||||
<button onClick={() => setCreatorTab('quantize')}
|
|
||||||
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] border ${creatorTab === 'quantize' ? 'bg-accent/20 border-accent text-accent-light' : 'bg-surface-800 border-surface-600 text-surface-400'}`}>
|
|
||||||
<Zap size={10} /> Quantize
|
|
||||||
</button>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{creatorTab === 'distill' && hasDistillation && (
|
|
||||||
<div className="space-y-2">
|
|
||||||
<div>
|
|
||||||
<label className="text-[10px] text-surface-400">Target Size</label>
|
|
||||||
<select value={distillParams} onChange={e => setDistillParams(Number(e.target.value))}
|
|
||||||
className="w-full bg-surface-800 border border-surface-600 rounded px-2 py-1 text-xs mt-0.5">
|
|
||||||
{smallerParams.map(p => <option key={p} value={p}>{p}B</option>)}
|
|
||||||
</select>
|
|
||||||
</div>
|
|
||||||
<div className="text-[10px] text-surface-500">
|
|
||||||
Retention: ~{((0.70 + (distillParams / model.architecture.totalParameters) * 0.25) * 100).toFixed(0)}% quality
|
|
||||||
</div>
|
|
||||||
<button onClick={() => { onDistill(model.id, distillParams, `${model.name}-${distillParams}B`); setShowCreator(false); }}
|
|
||||||
className="bg-accent hover:bg-accent-dark text-white rounded px-3 py-1 text-xs">
|
|
||||||
Start Distillation
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{creatorTab === 'finetune' && (
|
|
||||||
<div className="space-y-2">
|
|
||||||
<div>
|
|
||||||
<label className="text-[10px] text-surface-400">Specialization</label>
|
|
||||||
<div className="flex flex-wrap gap-1 mt-0.5">
|
|
||||||
{SFT_OPTIONS.map(opt => (
|
|
||||||
<button key={opt.value} onClick={() => setFtSpec(opt.value)}
|
|
||||||
className={`px-2 py-0.5 rounded text-[10px] border ${ftSpec === opt.value ? 'bg-accent/20 border-accent text-accent-light' : 'bg-surface-800 border-surface-600 text-surface-400'}`}>
|
|
||||||
{opt.label}
|
|
||||||
</button>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<button onClick={() => { onFineTune(model.id, ftSpec, `${model.name}-${ftSpec.charAt(0).toUpperCase() + ftSpec.slice(1)}`); setShowCreator(false); }}
|
|
||||||
className="bg-accent hover:bg-accent-dark text-white rounded px-3 py-1 text-xs">
|
|
||||||
Start Fine-Tuning
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{creatorTab === 'quantize' && hasQuantization && (
|
|
||||||
<div className="space-y-2">
|
<div className="space-y-2">
|
||||||
<div>
|
<div>
|
||||||
<label className="text-[10px] text-surface-400">Quantization Level</label>
|
<label className="text-[10px] text-surface-400">Quantization Level</label>
|
||||||
@@ -870,7 +976,6 @@ function VariantCreator({ model, completedResearch, onDistill, onFineTune, onQua
|
|||||||
Start Quantization
|
Start Quantization
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
)}
|
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -961,11 +1066,6 @@ function VariantCard({ variant, familyId, benchmarkResults, availableBenchmarks,
|
|||||||
const [isExpanded, setIsExpanded] = useState(false);
|
const [isExpanded, setIsExpanded] = useState(false);
|
||||||
const variantResults = benchmarkResults.filter(r => r.modelId === variant.id);
|
const variantResults = benchmarkResults.filter(r => r.modelId === variant.id);
|
||||||
|
|
||||||
const typeLabel = variant.variantType === 'distilled' ? 'Distilled'
|
|
||||||
: variant.variantType === 'fine-tuned' ? 'Fine-tuned' : 'Quantized';
|
|
||||||
const typeColor = variant.variantType === 'distilled' ? 'text-purple-400'
|
|
||||||
: variant.variantType === 'fine-tuned' ? 'text-yellow-400' : 'text-green-400';
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="bg-surface-800/50 rounded-lg p-3 ml-4 border-l-2 border-surface-600">
|
<div className="bg-surface-800/50 rounded-lg p-3 ml-4 border-l-2 border-surface-600">
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
@@ -975,9 +1075,8 @@ function VariantCard({ variant, familyId, benchmarkResults, availableBenchmarks,
|
|||||||
</button>
|
</button>
|
||||||
<div>
|
<div>
|
||||||
<span className="text-sm font-medium">{variant.name}</span>
|
<span className="text-sm font-medium">{variant.name}</span>
|
||||||
<span className={`text-[10px] ml-2 ${typeColor}`}>{typeLabel}</span>
|
<span className="text-[10px] ml-2 text-green-400">Quantized</span>
|
||||||
{variant.quantization && <span className="text-[10px] text-surface-500 ml-1">{variant.quantization.toUpperCase()}</span>}
|
{variant.quantization && <span className="text-[10px] text-surface-500 ml-1">{variant.quantization.toUpperCase()}</span>}
|
||||||
{variant.finetuneSpecialization && <span className="text-[10px] text-surface-500 ml-1">{variant.finetuneSpecialization}</span>}
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-2">
|
||||||
@@ -1108,16 +1207,15 @@ function BenchmarkLeaderboard({ benchmarkResults, baseModels, families, availabl
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function StageBar({ label, active, complete, progress, configured = true }: {
|
function StageBar({ label, active, complete, progress }: {
|
||||||
label: string; active: boolean; complete: boolean; progress: number; configured?: boolean;
|
label: string; active: boolean; complete: boolean; progress: number;
|
||||||
}) {
|
}) {
|
||||||
return (
|
return (
|
||||||
<div className="flex-1">
|
<div className="flex-1">
|
||||||
<div className={`text-[9px] text-center mb-0.5 ${!configured ? 'text-warning' : 'text-surface-500'}`}>
|
<div className="text-[9px] text-center mb-0.5 text-surface-500">
|
||||||
{label}{!configured && ' (skip)'}
|
{label}
|
||||||
</div>
|
</div>
|
||||||
<div className={`h-1 rounded-full ${
|
<div className={`h-1 rounded-full ${
|
||||||
!configured ? 'bg-surface-800 border border-dashed border-warning/30' :
|
|
||||||
complete ? 'bg-success' :
|
complete ? 'bg-success' :
|
||||||
active ? 'bg-accent' :
|
active ? 'bg-accent' :
|
||||||
'bg-surface-700'
|
'bg-surface-700'
|
||||||
@@ -1129,110 +1227,3 @@ function StageBar({ label, active, complete, progress, configured = true }: {
|
|||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function PostTrainingConfig({ pipelineId, hasAlignmentResearch, completedResearch, configureSFT, configureAlignment, sftConfigured, alignmentConfigured }: {
|
|
||||||
pipelineId: string;
|
|
||||||
hasAlignmentResearch: boolean;
|
|
||||||
completedResearch: string[];
|
|
||||||
configureSFT: (pipelineId: string, specializations: SFTSpecialization[]) => void;
|
|
||||||
configureAlignment: (pipelineId: string, method: AlignmentMethod, safetyWeight: number) => void;
|
|
||||||
sftConfigured: boolean;
|
|
||||||
alignmentConfigured: boolean;
|
|
||||||
}) {
|
|
||||||
const [selectedSpecs, setSelectedSpecs] = useState<SFTSpecialization[]>(['general']);
|
|
||||||
const [alignMethod, setAlignMethod] = useState<AlignmentMethod>('rlhf');
|
|
||||||
const [safetyWeight, setSafetyWeight] = useState(0.5);
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="space-y-3 bg-surface-800/50 rounded-lg p-3">
|
|
||||||
<div className="text-xs font-medium text-surface-300">Configure Post-Training (optional)</div>
|
|
||||||
|
|
||||||
{!sftConfigured ? (
|
|
||||||
<div className="space-y-1">
|
|
||||||
<div className="flex items-center gap-1 text-xs text-surface-400">
|
|
||||||
<Beaker size={10} /> Supervised Fine-Tuning
|
|
||||||
</div>
|
|
||||||
<div className="flex flex-wrap gap-1">
|
|
||||||
{SFT_OPTIONS.map(opt => (
|
|
||||||
<button
|
|
||||||
key={opt.value}
|
|
||||||
onClick={() => setSelectedSpecs(prev =>
|
|
||||||
prev.includes(opt.value)
|
|
||||||
? prev.filter(s => s !== opt.value)
|
|
||||||
: [...prev, opt.value]
|
|
||||||
)}
|
|
||||||
className={`px-2 py-0.5 rounded text-[10px] border transition-colors ${
|
|
||||||
selectedSpecs.includes(opt.value)
|
|
||||||
? 'bg-accent/20 border-accent text-accent-light'
|
|
||||||
: 'bg-surface-800 border-surface-600 text-surface-400'
|
|
||||||
}`}
|
|
||||||
>
|
|
||||||
{opt.label}
|
|
||||||
</button>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
<button
|
|
||||||
onClick={() => configureSFT(pipelineId, selectedSpecs)}
|
|
||||||
className="text-[10px] text-accent hover:text-accent-light"
|
|
||||||
>
|
|
||||||
Enable SFT
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
) : (
|
|
||||||
<div className="flex items-center gap-1 text-xs text-success">
|
|
||||||
<Beaker size={10} /> SFT configured
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{!alignmentConfigured ? (
|
|
||||||
hasAlignmentResearch ? (
|
|
||||||
<div className="space-y-1">
|
|
||||||
<div className="flex items-center gap-1 text-xs text-surface-400">
|
|
||||||
<Shield size={10} /> Alignment
|
|
||||||
</div>
|
|
||||||
<div className="flex gap-1">
|
|
||||||
{(Object.keys(ALIGNMENT_METHODS) as AlignmentMethod[]).map(method => {
|
|
||||||
const isAvailable = completedResearch.includes(ALIGNMENT_METHODS[method].requiredResearch);
|
|
||||||
return (
|
|
||||||
<button
|
|
||||||
key={method}
|
|
||||||
disabled={!isAvailable}
|
|
||||||
onClick={() => setAlignMethod(method)}
|
|
||||||
className={`px-2 py-0.5 rounded text-[10px] border transition-colors ${
|
|
||||||
alignMethod === method ? 'bg-accent/20 border-accent text-accent-light' :
|
|
||||||
!isAvailable ? 'bg-surface-800 border-surface-700 text-surface-600 cursor-not-allowed' :
|
|
||||||
'bg-surface-800 border-surface-600 text-surface-400'
|
|
||||||
}`}
|
|
||||||
>
|
|
||||||
{method.toUpperCase()}
|
|
||||||
</button>
|
|
||||||
);
|
|
||||||
})}
|
|
||||||
</div>
|
|
||||||
<div className="flex items-center gap-2">
|
|
||||||
<span className="text-[10px] text-surface-400">Safety</span>
|
|
||||||
<input type="range" min={0} max={100} value={safetyWeight * 100}
|
|
||||||
onChange={(e) => setSafetyWeight(Number(e.target.value) / 100)}
|
|
||||||
className="flex-1 accent-accent h-1" />
|
|
||||||
<span className="text-[10px] text-surface-400">Helpful</span>
|
|
||||||
</div>
|
|
||||||
<button
|
|
||||||
onClick={() => configureAlignment(pipelineId, alignMethod, safetyWeight)}
|
|
||||||
className="text-[10px] text-accent hover:text-accent-light"
|
|
||||||
>
|
|
||||||
Enable Alignment
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
) : (
|
|
||||||
<div className="flex items-center gap-1 text-xs text-surface-500">
|
|
||||||
<Shield size={10} /> Alignment requires research
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
) : (
|
|
||||||
<div className="flex items-center gap-1 text-xs text-success">
|
|
||||||
<Shield size={10} /> Alignment configured
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|||||||
+86
-112
@@ -13,7 +13,7 @@ import type {
|
|||||||
CoolingType, NetworkFabric,
|
CoolingType, NetworkFabric,
|
||||||
FundingRoundType, OverloadPolicy,
|
FundingRoundType, OverloadPolicy,
|
||||||
TrainingPipeline, ModelFamily, DataMixAllocation,
|
TrainingPipeline, ModelFamily, DataMixAllocation,
|
||||||
ModelArchitecture,
|
ModelArchitecture, AlignmentMethod, SizeTier,
|
||||||
SFTSpecialization, QuantizationLevel, VariantCreationJob,
|
SFTSpecialization, QuantizationLevel, VariantCreationJob,
|
||||||
EvalJob,
|
EvalJob,
|
||||||
ConsumerTierId, ApiTierId,
|
ConsumerTierId, ApiTierId,
|
||||||
@@ -36,9 +36,10 @@ import {
|
|||||||
COOLING_TYPE_CONFIGS, COOLING_ORDER, NETWORK_FABRIC_CONFIGS, FABRIC_ORDER,
|
COOLING_TYPE_CONFIGS, COOLING_ORDER, NETWORK_FABRIC_CONFIGS, FABRIC_ORDER,
|
||||||
DEFAULT_DATA_MIX,
|
DEFAULT_DATA_MIX,
|
||||||
MAX_CONCURRENT_TRAINING,
|
MAX_CONCURRENT_TRAINING,
|
||||||
DISTILLATION_TIME_FRACTION, DISTILLATION_COMPUTE_FRACTION,
|
|
||||||
FINETUNE_TIME_FRACTION, FINETUNE_COMPUTE_FRACTION,
|
|
||||||
QUANTIZATION_TICKS,
|
QUANTIZATION_TICKS,
|
||||||
|
SFT_TIME_FRACTION, ALIGNMENT_TIME_FRACTION,
|
||||||
|
SIZE_TIER_MAP, SIZE_TIER_LABELS,
|
||||||
|
POINT_RELEASE_TIME_FRACTION, POINT_RELEASE_MAX_VERSION,
|
||||||
} from '@ai-tycoon/shared';
|
} from '@ai-tycoon/shared';
|
||||||
import {
|
import {
|
||||||
emptyDCNetworkSummary, emptyCampusNetworkSummary, emptyClusterNetworkSummary,
|
emptyDCNetworkSummary, emptyCampusNetworkSummary, emptyClusterNetworkSummary,
|
||||||
@@ -115,11 +116,21 @@ interface Actions {
|
|||||||
upgradeDataCenter: (dataCenterId: string, upgrade: 'cooling' | 'redundancy') => void;
|
upgradeDataCenter: (dataCenterId: string, upgrade: 'cooling' | 'redundancy') => void;
|
||||||
upgradeCoolingType: (dataCenterId: string, targetCooling: CoolingType) => void;
|
upgradeCoolingType: (dataCenterId: string, targetCooling: CoolingType) => void;
|
||||||
upgradeNetworkFabric: (dataCenterId: string, targetFabric: NetworkFabric) => void;
|
upgradeNetworkFabric: (dataCenterId: string, targetFabric: NetworkFabric) => void;
|
||||||
startTrainingPipeline: (config: { modelName: string; architecture: ModelArchitecture; dataMix: DataMixAllocation; allocatedComputeFraction: number; targetTokens: number; totalTicks: number }) => void;
|
startTrainingPipeline: (config: {
|
||||||
configureSFT: (pipelineId: string, specializations: import('@ai-tycoon/shared').SFTSpecialization[]) => void;
|
familyId?: string;
|
||||||
configureAlignment: (pipelineId: string, method: import('@ai-tycoon/shared').AlignmentMethod, safetyWeight: number) => void;
|
familyName?: string;
|
||||||
createDistillation: (baseModelId: string, targetParameters: number, variantName: string) => void;
|
architecture: ModelArchitecture;
|
||||||
createFineTune: (baseModelId: string, specialization: SFTSpecialization, variantName: string) => void;
|
dataMix: DataMixAllocation;
|
||||||
|
allocatedComputeFraction: number;
|
||||||
|
targetTokens: number;
|
||||||
|
totalTicks: number;
|
||||||
|
sftSpecializations: SFTSpecialization[];
|
||||||
|
alignmentMethod: AlignmentMethod;
|
||||||
|
alignmentSafetyWeight: number;
|
||||||
|
isPointRelease?: boolean;
|
||||||
|
sourceModelId?: string;
|
||||||
|
}) => void;
|
||||||
|
startPointRelease: (baseModelId: string) => void;
|
||||||
createQuantization: (baseModelId: string, level: QuantizationLevel, variantName: string) => void;
|
createQuantization: (baseModelId: string, level: QuantizationLevel, variantName: string) => void;
|
||||||
startEvaluation: (modelId: string, benchmarkIds: string[]) => void;
|
startEvaluation: (modelId: string, benchmarkIds: string[]) => void;
|
||||||
deployModel: (modelId: string) => void;
|
deployModel: (modelId: string) => void;
|
||||||
@@ -917,29 +928,52 @@ export const useGameStore = create<Store>()(
|
|||||||
|
|
||||||
startTrainingPipeline: (config) => {
|
startTrainingPipeline: (config) => {
|
||||||
let created = false;
|
let created = false;
|
||||||
|
let toastName = '';
|
||||||
set((s) => {
|
set((s) => {
|
||||||
const activeCount = s.models.activeTrainingPipelines.filter(p => p.status === 'active' || p.status === 'stalled').length;
|
const activeCount = s.models.activeTrainingPipelines.filter(p => p.status === 'active' || p.status === 'stalled').length;
|
||||||
const maxSlots = MAX_CONCURRENT_TRAINING[s.meta.currentEra] ?? 1;
|
const maxSlots = MAX_CONCURRENT_TRAINING[s.meta.currentEra] ?? 1;
|
||||||
if (activeCount >= maxSlots) return s;
|
if (activeCount >= maxSlots) return s;
|
||||||
|
|
||||||
created = true;
|
created = true;
|
||||||
const familyId = uuid();
|
|
||||||
const pipelineId = uuid();
|
|
||||||
const generation = s.models.families.length + 1;
|
|
||||||
|
|
||||||
|
let familyId: string;
|
||||||
|
let updatedFamilies = [...s.models.families];
|
||||||
|
|
||||||
|
if (config.familyId) {
|
||||||
|
familyId = config.familyId;
|
||||||
|
} else {
|
||||||
|
familyId = uuid();
|
||||||
|
const generation = s.models.families.length + 1;
|
||||||
const family: ModelFamily = {
|
const family: ModelFamily = {
|
||||||
id: familyId,
|
id: familyId,
|
||||||
name: config.modelName,
|
name: config.familyName ?? 'Model',
|
||||||
generation,
|
generation,
|
||||||
baseModelId: null,
|
baseModelIds: [],
|
||||||
variants: [],
|
variants: [],
|
||||||
createdAtTick: s.meta.tickCount,
|
createdAtTick: s.meta.tickCount,
|
||||||
};
|
};
|
||||||
|
updatedFamilies = [...updatedFamilies, family];
|
||||||
|
}
|
||||||
|
|
||||||
|
const sizeTier: SizeTier = SIZE_TIER_MAP[config.architecture.totalParameters] ?? 'small';
|
||||||
|
const familyName = config.familyName ?? updatedFamilies.find(f => f.id === familyId)?.name ?? 'Model';
|
||||||
|
const version = config.isPointRelease && config.sourceModelId
|
||||||
|
? (() => {
|
||||||
|
const src = s.models.baseModels.find(m => m.id === config.sourceModelId);
|
||||||
|
return src ? Math.round((src.version + 0.1) * 10) / 10 : 1.0;
|
||||||
|
})()
|
||||||
|
: 1.0;
|
||||||
|
const modelName = `${familyName} ${SIZE_TIER_LABELS[sizeTier]} v${version.toFixed(1)}`;
|
||||||
|
toastName = modelName;
|
||||||
|
|
||||||
|
const baseTotalTicks = config.isPointRelease
|
||||||
|
? Math.ceil(config.totalTicks * POINT_RELEASE_TIME_FRACTION)
|
||||||
|
: config.totalTicks;
|
||||||
|
|
||||||
const pipeline: TrainingPipeline = {
|
const pipeline: TrainingPipeline = {
|
||||||
id: pipelineId,
|
id: uuid(),
|
||||||
familyId,
|
familyId,
|
||||||
modelName: config.modelName,
|
modelName,
|
||||||
architecture: config.architecture,
|
architecture: config.architecture,
|
||||||
dataMix: config.dataMix,
|
dataMix: config.dataMix,
|
||||||
currentStage: 'pretraining',
|
currentStage: 'pretraining',
|
||||||
@@ -949,130 +983,70 @@ export const useGameStore = create<Store>()(
|
|||||||
processedTokens: 0,
|
processedTokens: 0,
|
||||||
computeAllocated: 0,
|
computeAllocated: 0,
|
||||||
progressTicks: 0,
|
progressTicks: 0,
|
||||||
totalTicks: config.totalTicks,
|
totalTicks: baseTotalTicks,
|
||||||
lossValue: 10,
|
lossValue: 10,
|
||||||
chinchillaRatio: config.targetTokens / (config.architecture.totalParameters * 1e9),
|
chinchillaRatio: config.targetTokens / (config.architecture.totalParameters * 1e9),
|
||||||
isComplete: false,
|
isComplete: false,
|
||||||
},
|
},
|
||||||
sft: null,
|
sft: {
|
||||||
alignment: null,
|
specializations: config.sftSpecializations,
|
||||||
|
progressTicks: 0,
|
||||||
|
totalTicks: Math.ceil(baseTotalTicks * SFT_TIME_FRACTION),
|
||||||
|
isComplete: false,
|
||||||
|
},
|
||||||
|
alignment: {
|
||||||
|
method: config.alignmentMethod,
|
||||||
|
safetyWeight: config.alignmentSafetyWeight,
|
||||||
|
helpfulnessWeight: 1 - config.alignmentSafetyWeight,
|
||||||
|
progressTicks: 0,
|
||||||
|
totalTicks: Math.ceil(baseTotalTicks * ALIGNMENT_TIME_FRACTION),
|
||||||
|
isComplete: false,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
status: 'active',
|
status: 'active',
|
||||||
allocatedComputeFraction: config.allocatedComputeFraction,
|
allocatedComputeFraction: config.allocatedComputeFraction,
|
||||||
events: [],
|
events: [],
|
||||||
startedAtTick: s.meta.tickCount,
|
startedAtTick: s.meta.tickCount,
|
||||||
|
sizeTier,
|
||||||
|
isPointRelease: config.isPointRelease ?? false,
|
||||||
|
sourceModelId: config.sourceModelId ?? null,
|
||||||
};
|
};
|
||||||
|
|
||||||
return {
|
return {
|
||||||
models: {
|
models: {
|
||||||
...s.models,
|
...s.models,
|
||||||
families: [...s.models.families, family],
|
families: updatedFamilies,
|
||||||
activeTrainingPipelines: [...s.models.activeTrainingPipelines, pipeline],
|
activeTrainingPipelines: [...s.models.activeTrainingPipelines, pipeline],
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
if (created) {
|
if (created) {
|
||||||
get().addNotification({ title: 'Training Started', message: `${config.modelName} pre-training has begun.`, type: 'info', tick: get().meta.tickCount });
|
get().addNotification({ title: 'Training Started', message: `${toastName} training has begun.`, type: 'info', tick: get().meta.tickCount });
|
||||||
set({ modelsTab: 'overview' as ModelsTab });
|
set({ modelsTab: 'overview' as ModelsTab });
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
configureSFT: (pipelineId, specializations) => {
|
startPointRelease: (baseModelId) => {
|
||||||
set((s) => ({
|
const s = get();
|
||||||
models: {
|
|
||||||
...s.models,
|
|
||||||
activeTrainingPipelines: s.models.activeTrainingPipelines.map(p =>
|
|
||||||
p.id === pipelineId ? {
|
|
||||||
...p,
|
|
||||||
stages: {
|
|
||||||
...p.stages,
|
|
||||||
sft: {
|
|
||||||
specializations,
|
|
||||||
progressTicks: 0,
|
|
||||||
totalTicks: Math.ceil(p.stages.pretraining.totalTicks * 0.10),
|
|
||||||
isComplete: false,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
} : p,
|
|
||||||
),
|
|
||||||
},
|
|
||||||
}));
|
|
||||||
get().addNotification({ title: 'SFT Configured', message: `${specializations.join(', ')} specializations enabled.`, type: 'success', tick: get().meta.tickCount });
|
|
||||||
},
|
|
||||||
|
|
||||||
configureAlignment: (pipelineId, method, safetyWeight) => {
|
|
||||||
set((s) => ({
|
|
||||||
models: {
|
|
||||||
...s.models,
|
|
||||||
activeTrainingPipelines: s.models.activeTrainingPipelines.map(p =>
|
|
||||||
p.id === pipelineId ? {
|
|
||||||
...p,
|
|
||||||
stages: {
|
|
||||||
...p.stages,
|
|
||||||
alignment: {
|
|
||||||
method,
|
|
||||||
safetyWeight,
|
|
||||||
helpfulnessWeight: 1 - safetyWeight,
|
|
||||||
progressTicks: 0,
|
|
||||||
totalTicks: Math.ceil(p.stages.pretraining.totalTicks * 0.08),
|
|
||||||
isComplete: false,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
} : p,
|
|
||||||
),
|
|
||||||
},
|
|
||||||
}));
|
|
||||||
get().addNotification({ title: 'Alignment Configured', message: `${method.toUpperCase()} alignment enabled.`, type: 'success', tick: get().meta.tickCount });
|
|
||||||
},
|
|
||||||
|
|
||||||
createDistillation: (baseModelId, targetParameters, variantName) => {
|
|
||||||
let created = false;
|
|
||||||
set((s) => {
|
|
||||||
const base = s.models.baseModels.find(m => m.id === baseModelId);
|
const base = s.models.baseModels.find(m => m.id === baseModelId);
|
||||||
if (!base) return s;
|
if (!base) return;
|
||||||
created = true;
|
if (base.version >= POINT_RELEASE_MAX_VERSION) return;
|
||||||
const job: VariantCreationJob = {
|
const family = s.models.families.find(f => f.id === base.familyId);
|
||||||
id: uuid(),
|
if (!family) return;
|
||||||
familyId: base.familyId,
|
|
||||||
baseModelId,
|
|
||||||
jobType: 'distillation',
|
|
||||||
config: { targetParameters, targetArchitecture: base.architecture.type, variantName },
|
|
||||||
progressTicks: 0,
|
|
||||||
totalTicks: Math.ceil(base.trainingCostTotal > 0 ? DISTILLATION_TIME_FRACTION * 120 : 30),
|
|
||||||
allocatedComputeFraction: DISTILLATION_COMPUTE_FRACTION,
|
|
||||||
status: 'active',
|
|
||||||
};
|
|
||||||
return { models: { ...s.models, variantJobs: [...s.models.variantJobs, job] } };
|
|
||||||
});
|
|
||||||
if (created) {
|
|
||||||
get().addNotification({ title: 'Distillation Started', message: `${variantName} distillation in progress.`, type: 'info', tick: get().meta.tickCount });
|
|
||||||
set({ modelsTab: 'overview' as ModelsTab });
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
createFineTune: (baseModelId, specialization, variantName) => {
|
get().startTrainingPipeline({
|
||||||
let created = false;
|
|
||||||
set((s) => {
|
|
||||||
const base = s.models.baseModels.find(m => m.id === baseModelId);
|
|
||||||
if (!base) return s;
|
|
||||||
created = true;
|
|
||||||
const job: VariantCreationJob = {
|
|
||||||
id: uuid(),
|
|
||||||
familyId: base.familyId,
|
familyId: base.familyId,
|
||||||
baseModelId,
|
architecture: base.architecture,
|
||||||
jobType: 'fine-tuning',
|
dataMix: base.dataMix,
|
||||||
config: { specialization, datasetIds: [], variantName },
|
allocatedComputeFraction: 1.0,
|
||||||
progressTicks: 0,
|
targetTokens: base.architecture.totalParameters * 20e9,
|
||||||
totalTicks: Math.ceil(FINETUNE_TIME_FRACTION * 120),
|
totalTicks: Math.ceil(base.architecture.totalParameters * 2 + 60),
|
||||||
allocatedComputeFraction: FINETUNE_COMPUTE_FRACTION,
|
sftSpecializations: base.sftSpecializations,
|
||||||
status: 'active',
|
alignmentMethod: base.alignmentMethod ?? 'rlhf',
|
||||||
};
|
alignmentSafetyWeight: 0.5,
|
||||||
return { models: { ...s.models, variantJobs: [...s.models.variantJobs, job] } };
|
isPointRelease: true,
|
||||||
|
sourceModelId: baseModelId,
|
||||||
});
|
});
|
||||||
if (created) {
|
|
||||||
get().addNotification({ title: 'Fine-Tuning Started', message: `${variantName} fine-tuning in progress.`, type: 'info', tick: get().meta.tickCount });
|
|
||||||
set({ modelsTab: 'overview' as ModelsTab });
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
|
|
||||||
createQuantization: (baseModelId, level, variantName) => {
|
createQuantization: (baseModelId, level, variantName) => {
|
||||||
|
|||||||
@@ -7,8 +7,6 @@ import type {
|
|||||||
import { BENCHMARKS } from '../data/benchmarks';
|
import { BENCHMARKS } from '../data/benchmarks';
|
||||||
import {
|
import {
|
||||||
uuid, VRAM_REQUIREMENTS_BY_GENERATION,
|
uuid, VRAM_REQUIREMENTS_BY_GENERATION,
|
||||||
SFT_TIME_FRACTION, SFT_COMPUTE_FRACTION,
|
|
||||||
ALIGNMENT_TIME_FRACTION, ALIGNMENT_COMPUTE_FRACTION,
|
|
||||||
MOE_CAPABILITY_MULTIPLIER, MOE_SPEED_MULTIPLIER,
|
MOE_CAPABILITY_MULTIPLIER, MOE_SPEED_MULTIPLIER,
|
||||||
EVENT_BASE_PROBABILITY,
|
EVENT_BASE_PROBABILITY,
|
||||||
LOSS_SPIKE_DELAY_MIN, LOSS_SPIKE_DELAY_MAX,
|
LOSS_SPIKE_DELAY_MIN, LOSS_SPIKE_DELAY_MAX,
|
||||||
@@ -18,8 +16,8 @@ import {
|
|||||||
ALIGNMENT_METHODS,
|
ALIGNMENT_METHODS,
|
||||||
SFT_SPECIALIZATION_BONUSES,
|
SFT_SPECIALIZATION_BONUSES,
|
||||||
QUANTIZATION_CONFIGS,
|
QUANTIZATION_CONFIGS,
|
||||||
DISTILLATION_BASE_RETENTION,
|
POINT_RELEASE_CAPABILITY_GAIN,
|
||||||
QUANTIZATION_TICKS,
|
SIZE_TIER_LABELS,
|
||||||
} from '@ai-tycoon/shared';
|
} from '@ai-tycoon/shared';
|
||||||
import type { ResearchBonuses } from './researchBonuses';
|
import type { ResearchBonuses } from './researchBonuses';
|
||||||
|
|
||||||
@@ -101,60 +99,25 @@ export function processModels(state: GameState, researchBonuses?: ResearchBonuse
|
|||||||
stage.computeAllocated = effectiveFlops;
|
stage.computeAllocated = effectiveFlops;
|
||||||
stage.lossValue = Math.max(0.01, 10 * Math.exp(-stage.progressTicks / stage.totalTicks * 3));
|
stage.lossValue = Math.max(0.01, 10 * Math.exp(-stage.progressTicks / stage.totalTicks * 3));
|
||||||
|
|
||||||
const progressRatio = stage.progressTicks / stage.totalTicks;
|
|
||||||
if (progressRatio >= 0.75 && progressRatio < 0.78 && !pipeline.stages.sft && !pipeline.stages.alignment) {
|
|
||||||
notifications.push({
|
|
||||||
title: 'Post-Training Reminder',
|
|
||||||
message: `${pipeline.modelName} is 75% pre-trained. Configure SFT/Alignment now or they'll be skipped!`,
|
|
||||||
type: 'warning',
|
|
||||||
action: { label: 'Configure Now', page: 'models', modelsTab: 'overview' },
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (stage.progressTicks >= stage.totalTicks) {
|
if (stage.progressTicks >= stage.totalTicks) {
|
||||||
stage.isComplete = true;
|
stage.isComplete = true;
|
||||||
stage.progressTicks = stage.totalTicks;
|
stage.progressTicks = stage.totalTicks;
|
||||||
|
|
||||||
if (updated.stages.sft) {
|
|
||||||
updated.currentStage = 'sft';
|
updated.currentStage = 'sft';
|
||||||
notifications.push({ title: 'Pre-training Complete', message: `${pipeline.modelName}: Moving to supervised fine-tuning.`, type: 'info' });
|
notifications.push({ title: 'Pre-training Complete', message: `${pipeline.modelName}: Moving to supervised fine-tuning.`, type: 'info' });
|
||||||
} else if (updated.stages.alignment) {
|
|
||||||
updated.currentStage = 'alignment';
|
|
||||||
notifications.push({ title: 'Pre-training Complete', message: `${pipeline.modelName}: Moving to alignment.`, type: 'info' });
|
|
||||||
} else {
|
|
||||||
const model = createBaseModel(updated, state, researchBonuses);
|
|
||||||
baseModels = [...baseModels, model];
|
|
||||||
families = families.map(f =>
|
|
||||||
f.id === pipeline.familyId ? { ...f, baseModelId: model.id } : f,
|
|
||||||
);
|
|
||||||
completedModels.push(model);
|
|
||||||
updated.status = 'completed';
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
updated = { ...updated, stages: { ...updated.stages, pretraining: stage } };
|
updated = { ...updated, stages: { ...updated.stages, pretraining: stage } };
|
||||||
} else if (pipeline.currentStage === 'sft' && pipeline.stages.sft) {
|
} else if (pipeline.currentStage === 'sft') {
|
||||||
const stage = { ...pipeline.stages.sft };
|
const stage = { ...pipeline.stages.sft };
|
||||||
stage.progressTicks += speedMultiplier;
|
stage.progressTicks += speedMultiplier;
|
||||||
|
|
||||||
if (stage.progressTicks >= stage.totalTicks) {
|
if (stage.progressTicks >= stage.totalTicks) {
|
||||||
stage.isComplete = true;
|
stage.isComplete = true;
|
||||||
stage.progressTicks = stage.totalTicks;
|
stage.progressTicks = stage.totalTicks;
|
||||||
|
|
||||||
if (updated.stages.alignment) {
|
|
||||||
updated.currentStage = 'alignment';
|
updated.currentStage = 'alignment';
|
||||||
notifications.push({ title: 'SFT Complete', message: `${pipeline.modelName}: Moving to alignment.`, type: 'info' });
|
notifications.push({ title: 'SFT Complete', message: `${pipeline.modelName}: Moving to alignment.`, type: 'info' });
|
||||||
} else {
|
|
||||||
const model = createBaseModel(updated, state, researchBonuses);
|
|
||||||
baseModels = [...baseModels, model];
|
|
||||||
families = families.map(f =>
|
|
||||||
f.id === pipeline.familyId ? { ...f, baseModelId: model.id } : f,
|
|
||||||
);
|
|
||||||
completedModels.push(model);
|
|
||||||
updated.status = 'completed';
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
updated = { ...updated, stages: { ...updated.stages, sft: stage } };
|
updated = { ...updated, stages: { ...updated.stages, sft: stage } };
|
||||||
} else if (pipeline.currentStage === 'alignment' && pipeline.stages.alignment) {
|
} else if (pipeline.currentStage === 'alignment') {
|
||||||
const stage = { ...pipeline.stages.alignment };
|
const stage = { ...pipeline.stages.alignment };
|
||||||
stage.progressTicks += speedMultiplier;
|
stage.progressTicks += speedMultiplier;
|
||||||
|
|
||||||
@@ -165,7 +128,7 @@ export function processModels(state: GameState, researchBonuses?: ResearchBonuse
|
|||||||
const model = createBaseModel(updated, state, researchBonuses);
|
const model = createBaseModel(updated, state, researchBonuses);
|
||||||
baseModels = [...baseModels, model];
|
baseModels = [...baseModels, model];
|
||||||
families = families.map(f =>
|
families = families.map(f =>
|
||||||
f.id === pipeline.familyId ? { ...f, baseModelId: model.id } : f,
|
f.id === pipeline.familyId ? { ...f, baseModelIds: [...f.baseModelIds, model.id] } : f,
|
||||||
);
|
);
|
||||||
completedModels.push(model);
|
completedModels.push(model);
|
||||||
updated.status = 'completed';
|
updated.status = 'completed';
|
||||||
@@ -320,35 +283,46 @@ function createBaseModel(
|
|||||||
const dataTokens = pipeline.stages.pretraining.targetTokens;
|
const dataTokens = pipeline.stages.pretraining.targetTokens;
|
||||||
const params = architecture.totalParameters;
|
const params = architecture.totalParameters;
|
||||||
|
|
||||||
// Pillar 1: Parameters (0-30) — larger models have higher ceiling
|
const sourceModel = pipeline.isPointRelease && pipeline.sourceModelId
|
||||||
|
? state.models.baseModels.find(m => m.id === pipeline.sourceModelId)
|
||||||
|
: null;
|
||||||
|
|
||||||
|
let rawCapability: number;
|
||||||
|
let capabilities: ModelCapabilities;
|
||||||
|
|
||||||
|
if (sourceModel) {
|
||||||
|
rawCapability = Math.min(98, sourceModel.rawCapability * (1 + POINT_RELEASE_CAPABILITY_GAIN));
|
||||||
|
|
||||||
|
capabilities = { ...sourceModel.capabilities };
|
||||||
|
const boost = POINT_RELEASE_CAPABILITY_GAIN;
|
||||||
|
for (const key of Object.keys(capabilities) as (keyof ModelCapabilities)[]) {
|
||||||
|
if (key !== 'speed' && key !== 'contextUtilization') {
|
||||||
|
capabilities[key] = clamp(capabilities[key] * (1 + boost));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
const paramFactor = Math.min(30, Math.log2(1 + params) * 4.5);
|
const paramFactor = Math.min(30, Math.log2(1 + params) * 4.5);
|
||||||
|
|
||||||
// Pillar 2: Compute (0-25) — compute relative to parameter count (Chinchilla scaling)
|
|
||||||
const computePerParam = compute / Math.max(1, params);
|
const computePerParam = compute / Math.max(1, params);
|
||||||
const computeFactor = Math.min(25, Math.sqrt(computePerParam) * 8);
|
const computeFactor = Math.min(25, Math.sqrt(computePerParam) * 8);
|
||||||
|
|
||||||
// Pillar 3: Data (0-20) — token count with quality multiplier
|
|
||||||
const dataQualityMultiplier = 1 + (researchBonuses?.dataQualityBonus ?? 0);
|
const dataQualityMultiplier = 1 + (researchBonuses?.dataQualityBonus ?? 0);
|
||||||
const dataFactor = Math.min(20, Math.log10(1 + dataTokens / 1e8) * 8 * dataQualityMultiplier);
|
const dataFactor = Math.min(20, Math.log10(1 + dataTokens / 1e8) * 8 * dataQualityMultiplier);
|
||||||
|
|
||||||
// Pillar 4: Research (0-20) — accumulated research knowledge
|
|
||||||
const capabilityResearchBonus = researchBonuses?.globalCapabilityBonus ?? 0;
|
const capabilityResearchBonus = researchBonuses?.globalCapabilityBonus ?? 0;
|
||||||
const researchFactor = Math.min(20, capabilityResearchBonus + state.research.completedResearch.length * 0.5);
|
const researchFactor = Math.min(20, capabilityResearchBonus + state.research.completedResearch.length * 0.5);
|
||||||
|
|
||||||
let rawCapability = Math.min(95, paramFactor + computeFactor + dataFactor + researchFactor);
|
rawCapability = Math.min(95, paramFactor + computeFactor + dataFactor + researchFactor);
|
||||||
|
|
||||||
if (architecture.type === 'moe') {
|
if (architecture.type === 'moe') {
|
||||||
rawCapability = Math.min(98, rawCapability * MOE_CAPABILITY_MULTIPLIER);
|
rawCapability = Math.min(98, rawCapability * MOE_CAPABILITY_MULTIPLIER);
|
||||||
}
|
}
|
||||||
|
|
||||||
// MoE tradeoff: total params need full VRAM even though only active params run
|
|
||||||
// This is enforced in the UI/store when checking VRAM requirements
|
|
||||||
|
|
||||||
const researcherQuality = state.talent.departments.research.effectiveness;
|
const researcherQuality = state.talent.departments.research.effectiveness;
|
||||||
const contextBonus = Math.log2(Math.max(1, architecture.contextWindow / 4)) * 3;
|
const contextBonus = Math.log2(Math.max(1, architecture.contextWindow / 4)) * 3;
|
||||||
const contextPenalty = Math.max(0, Math.log2(architecture.contextWindow / 8)) * 2;
|
const contextPenalty = Math.max(0, Math.log2(architecture.contextWindow / 8)) * 2;
|
||||||
|
|
||||||
const capabilities: ModelCapabilities = {
|
capabilities = {
|
||||||
reasoning: clamp(rawCapability * (0.6 + dataMix.scientific * 0.5 + dataMix.code * 0.3) * (1 + researcherQuality * 0.2)),
|
reasoning: clamp(rawCapability * (0.6 + dataMix.scientific * 0.5 + dataMix.code * 0.3) * (1 + researcherQuality * 0.2)),
|
||||||
coding: clamp(rawCapability * (0.5 + dataMix.code * 1.0)),
|
coding: clamp(rawCapability * (0.5 + dataMix.code * 1.0)),
|
||||||
creative: clamp(rawCapability * (0.4 + dataMix.books * 0.6 + dataMix.conversation * 0.3)),
|
creative: clamp(rawCapability * (0.4 + dataMix.books * 0.6 + dataMix.conversation * 0.3)),
|
||||||
@@ -379,12 +353,14 @@ function createBaseModel(
|
|||||||
const key = domain as keyof ModelCapabilities;
|
const key = domain as keyof ModelCapabilities;
|
||||||
capabilities[key] = clamp(capabilities[key] + bonus);
|
capabilities[key] = clamp(capabilities[key] + bonus);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const completedStages: ('pretraining' | 'sft' | 'alignment')[] = ['pretraining'];
|
const completedStages: ('pretraining' | 'sft' | 'alignment')[] = ['pretraining'];
|
||||||
|
|
||||||
if (pipeline.stages.sft?.isComplete) {
|
if (pipeline.stages.sft.isComplete) {
|
||||||
completedStages.push('sft');
|
completedStages.push('sft');
|
||||||
const sft = pipeline.stages.sft;
|
const sft = pipeline.stages.sft;
|
||||||
|
if (!sourceModel) {
|
||||||
for (let i = 0; i < sft.specializations.length; i++) {
|
for (let i = 0; i < sft.specializations.length; i++) {
|
||||||
const spec = sft.specializations[i];
|
const spec = sft.specializations[i];
|
||||||
const bonuses = SFT_SPECIALIZATION_BONUSES[spec];
|
const bonuses = SFT_SPECIALIZATION_BONUSES[spec];
|
||||||
@@ -396,12 +372,13 @@ function createBaseModel(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const safetyResearchBonus = researchBonuses?.safetyBonus ?? 0;
|
const safetyResearchBonus = researchBonuses?.safetyBonus ?? 0;
|
||||||
let overallSafety = Math.min(100, 30 + safetyResearchBonus + Math.random() * 10);
|
let overallSafety = Math.min(100, 30 + safetyResearchBonus + Math.random() * 10);
|
||||||
let refusalRate = overallSafety > 60 ? 0.1 : 0.03;
|
let refusalRate = overallSafety > 60 ? 0.1 : 0.03;
|
||||||
|
|
||||||
if (pipeline.stages.alignment?.isComplete) {
|
if (pipeline.stages.alignment.isComplete) {
|
||||||
completedStages.push('alignment');
|
completedStages.push('alignment');
|
||||||
const alignment = pipeline.stages.alignment;
|
const alignment = pipeline.stages.alignment;
|
||||||
const methodConfig = ALIGNMENT_METHODS[alignment.method];
|
const methodConfig = ALIGNMENT_METHODS[alignment.method];
|
||||||
@@ -409,6 +386,7 @@ function createBaseModel(
|
|||||||
const safetyGain = methodConfig.safetyGain * alignment.safetyWeight;
|
const safetyGain = methodConfig.safetyGain * alignment.safetyWeight;
|
||||||
overallSafety = Math.min(100, overallSafety + safetyGain);
|
overallSafety = Math.min(100, overallSafety + safetyGain);
|
||||||
refusalRate = methodConfig.baseRefusal * Math.pow(alignment.safetyWeight, 1.5);
|
refusalRate = methodConfig.baseRefusal * Math.pow(alignment.safetyWeight, 1.5);
|
||||||
|
if (!sourceModel) {
|
||||||
const capLoss = methodConfig.capabilityLoss * alignment.safetyWeight * 0.5;
|
const capLoss = methodConfig.capabilityLoss * alignment.safetyWeight * 0.5;
|
||||||
for (const key of Object.keys(capabilities) as (keyof ModelCapabilities)[]) {
|
for (const key of Object.keys(capabilities) as (keyof ModelCapabilities)[]) {
|
||||||
if (key !== 'speed' && key !== 'contextUtilization') {
|
if (key !== 'speed' && key !== 'contextUtilization') {
|
||||||
@@ -417,6 +395,7 @@ function createBaseModel(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const safetyProfile: SafetyProfile = {
|
const safetyProfile: SafetyProfile = {
|
||||||
overallSafety,
|
overallSafety,
|
||||||
@@ -426,10 +405,15 @@ function createBaseModel(
|
|||||||
honesty: overallSafety * 0.9,
|
honesty: overallSafety * 0.9,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const family = state.models.families.find(f => f.id === pipeline.familyId);
|
||||||
|
const version = sourceModel ? Math.round((sourceModel.version + 0.1) * 10) / 10 : 1.0;
|
||||||
|
const familyName = family?.name ?? pipeline.modelName;
|
||||||
|
const autoName = `${familyName} ${SIZE_TIER_LABELS[pipeline.sizeTier]} v${version.toFixed(1)}`;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id: uuid(),
|
id: uuid(),
|
||||||
familyId: pipeline.familyId,
|
familyId: pipeline.familyId,
|
||||||
name: pipeline.modelName,
|
name: autoName,
|
||||||
architecture,
|
architecture,
|
||||||
dataMix,
|
dataMix,
|
||||||
capabilities,
|
capabilities,
|
||||||
@@ -439,6 +423,10 @@ function createBaseModel(
|
|||||||
trainedAtTick: state.meta.tickCount,
|
trainedAtTick: state.meta.tickCount,
|
||||||
trainingCostTotal: compute,
|
trainingCostTotal: compute,
|
||||||
trainingStagesCompleted: completedStages,
|
trainingStagesCompleted: completedStages,
|
||||||
|
sizeTier: pipeline.sizeTier,
|
||||||
|
version,
|
||||||
|
sftSpecializations: pipeline.stages.sft.specializations,
|
||||||
|
alignmentMethod: pipeline.stages.alignment.method,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -467,30 +455,7 @@ function createVariant(job: VariantCreationJob, base: BaseModel): ModelVariant {
|
|||||||
const caps = { ...base.capabilities };
|
const caps = { ...base.capabilities };
|
||||||
let costMultiplier = 1.0;
|
let costMultiplier = 1.0;
|
||||||
let speedMultiplier = 1.0;
|
let speedMultiplier = 1.0;
|
||||||
let variantName = base.name;
|
|
||||||
let arch = { ...base.architecture };
|
|
||||||
|
|
||||||
if (job.jobType === 'distillation' && 'targetParameters' in job.config) {
|
|
||||||
const config = job.config;
|
|
||||||
const sizeRatio = config.targetParameters / base.architecture.totalParameters;
|
|
||||||
const retention = DISTILLATION_BASE_RETENTION + sizeRatio * 0.25;
|
|
||||||
for (const key of Object.keys(caps) as (keyof ModelCapabilities)[]) {
|
|
||||||
caps[key] = clamp(caps[key] * retention);
|
|
||||||
}
|
|
||||||
costMultiplier = sizeRatio * 0.8;
|
|
||||||
speedMultiplier = (1 / sizeRatio) * 0.7;
|
|
||||||
arch = { ...arch, totalParameters: config.targetParameters, activeParameters: config.targetParameters };
|
|
||||||
variantName = config.variantName;
|
|
||||||
} else if (job.jobType === 'fine-tuning' && 'specialization' in job.config) {
|
|
||||||
const config = job.config;
|
|
||||||
const bonuses = SFT_SPECIALIZATION_BONUSES[config.specialization];
|
|
||||||
if (bonuses) {
|
|
||||||
for (const [cap, value] of Object.entries(bonuses)) {
|
|
||||||
caps[cap as keyof ModelCapabilities] = clamp(caps[cap as keyof ModelCapabilities] + value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
variantName = config.variantName;
|
|
||||||
} else if (job.jobType === 'quantization' && 'level' in job.config) {
|
|
||||||
const config = job.config;
|
const config = job.config;
|
||||||
const qConfig = QUANTIZATION_CONFIGS[config.level];
|
const qConfig = QUANTIZATION_CONFIGS[config.level];
|
||||||
if (qConfig) {
|
if (qConfig) {
|
||||||
@@ -501,25 +466,19 @@ function createVariant(job: VariantCreationJob, base: BaseModel): ModelVariant {
|
|||||||
costMultiplier = qConfig.costMultiplier;
|
costMultiplier = qConfig.costMultiplier;
|
||||||
speedMultiplier = qConfig.speedMultiplier;
|
speedMultiplier = qConfig.speedMultiplier;
|
||||||
}
|
}
|
||||||
variantName = config.variantName;
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id: uuid(),
|
id: uuid(),
|
||||||
familyId: base.familyId,
|
familyId: base.familyId,
|
||||||
baseModelId: base.id,
|
baseModelId: base.id,
|
||||||
name: variantName,
|
name: config.variantName,
|
||||||
variantType: job.jobType === 'distillation' ? 'distilled' : job.jobType === 'fine-tuning' ? 'fine-tuned' : 'quantized',
|
variantType: 'quantized',
|
||||||
architecture: arch,
|
architecture: { ...base.architecture },
|
||||||
capabilities: caps,
|
capabilities: caps,
|
||||||
safetyProfile: { ...base.safetyProfile },
|
safetyProfile: { ...base.safetyProfile },
|
||||||
isDeployed: false,
|
isDeployed: false,
|
||||||
createdAtTick: 0,
|
createdAtTick: 0,
|
||||||
quantization: job.jobType === 'quantization' && 'level' in job.config ? job.config.level : undefined,
|
quantization: config.level,
|
||||||
distillationRetention: job.jobType === 'distillation' && 'targetParameters' in job.config
|
|
||||||
? DISTILLATION_BASE_RETENTION + (job.config.targetParameters / base.architecture.totalParameters) * 0.25
|
|
||||||
: undefined,
|
|
||||||
finetuneSpecialization: job.jobType === 'fine-tuning' && 'specialization' in job.config ? job.config.specialization : undefined,
|
|
||||||
costMultiplier,
|
costMultiplier,
|
||||||
speedMultiplier,
|
speedMultiplier,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import type { DCTier, DCTierConfig, RackSkuId, RackSkuConfig, SwitchTier, SwitchTierConfig, CampusTierCost, ClusterCostConfig, CoolingType, CoolingTypeConfig, NetworkFabric, NetworkFabricConfig } from '../types/infrastructure';
|
import type { DCTier, DCTierConfig, RackSkuId, RackSkuConfig, SwitchTier, SwitchTierConfig, CampusTierCost, ClusterCostConfig, CoolingType, CoolingTypeConfig, NetworkFabric, NetworkFabricConfig } from '../types/infrastructure';
|
||||||
import type { Era } from '../types/gameState';
|
import type { Era } from '../types/gameState';
|
||||||
import type { ConsumerTierId, ApiTierId, SeasonalPhase, EnterprisePipelineStage, EnterpriseSegment, TAMSegmentId } from '../types/market';
|
import type { ConsumerTierId, ApiTierId, SeasonalPhase, EnterprisePipelineStage, EnterpriseSegment, TAMSegmentId } from '../types/market';
|
||||||
|
import type { SizeTier } from '../types/models';
|
||||||
|
|
||||||
export const TICK_INTERVAL_MS = 1000;
|
export const TICK_INTERVAL_MS = 1000;
|
||||||
export const MAX_OFFLINE_TICKS = 86_400;
|
export const MAX_OFFLINE_TICKS = 86_400;
|
||||||
@@ -34,13 +35,24 @@ export const MAX_CONCURRENT_TRAINING: Record<string, number> = {
|
|||||||
startup: 1, scaleup: 2, bigtech: 4, agi: 8,
|
startup: 1, scaleup: 2, bigtech: 4, agi: 8,
|
||||||
};
|
};
|
||||||
|
|
||||||
export const DISTILLATION_COMPUTE_FRACTION = 0.15;
|
|
||||||
export const DISTILLATION_TIME_FRACTION = 0.20;
|
|
||||||
export const DISTILLATION_BASE_RETENTION = 0.70;
|
|
||||||
export const FINETUNE_COMPUTE_FRACTION = 0.03;
|
|
||||||
export const FINETUNE_TIME_FRACTION = 0.08;
|
|
||||||
export const QUANTIZATION_TICKS = 8;
|
export const QUANTIZATION_TICKS = 8;
|
||||||
|
|
||||||
|
export const SIZE_TIER_MAP: Record<number, SizeTier> = {
|
||||||
|
1: 'nano', 3: 'nano',
|
||||||
|
7: 'small', 13: 'small',
|
||||||
|
30: 'medium', 70: 'medium',
|
||||||
|
130: 'large', 300: 'large',
|
||||||
|
700: 'flagship', 1400: 'flagship',
|
||||||
|
};
|
||||||
|
|
||||||
|
export const SIZE_TIER_LABELS: Record<SizeTier, string> = {
|
||||||
|
nano: 'Nano', small: 'Small', medium: 'Medium', large: 'Large', flagship: 'Flagship',
|
||||||
|
};
|
||||||
|
|
||||||
|
export const POINT_RELEASE_TIME_FRACTION = 0.40;
|
||||||
|
export const POINT_RELEASE_CAPABILITY_GAIN = 0.08;
|
||||||
|
export const POINT_RELEASE_MAX_VERSION = 9;
|
||||||
|
|
||||||
export const MOE_CAPABILITY_MULTIPLIER = 1.15;
|
export const MOE_CAPABILITY_MULTIPLIER = 1.15;
|
||||||
export const MOE_SPEED_MULTIPLIER = 1.3;
|
export const MOE_SPEED_MULTIPLIER = 1.3;
|
||||||
export const PARAMETER_OPTIONS = [1, 3, 7, 13, 30, 70, 130, 300, 700, 1400];
|
export const PARAMETER_OPTIONS = [1, 3, 7, 13, 30, 70, 130, 300, 700, 1400];
|
||||||
|
|||||||
@@ -52,4 +52,4 @@ export const INITIAL_SETTINGS: GameSettings = {
|
|||||||
musicVolume: 0.5,
|
musicVolume: 0.5,
|
||||||
};
|
};
|
||||||
|
|
||||||
export const SAVE_VERSION = 7;
|
export const SAVE_VERSION = 8;
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import type { Era } from './gameState';
|
|||||||
import type { DataDomain } from './data';
|
import type { DataDomain } from './data';
|
||||||
|
|
||||||
export type ArchitectureType = 'dense' | 'moe';
|
export type ArchitectureType = 'dense' | 'moe';
|
||||||
|
export type SizeTier = 'nano' | 'small' | 'medium' | 'large' | 'flagship';
|
||||||
|
|
||||||
export interface ModelArchitecture {
|
export interface ModelArchitecture {
|
||||||
type: ArchitectureType;
|
type: ArchitectureType;
|
||||||
@@ -27,13 +28,16 @@ export interface TrainingPipeline {
|
|||||||
currentStage: TrainingStage;
|
currentStage: TrainingStage;
|
||||||
stages: {
|
stages: {
|
||||||
pretraining: PreTrainingConfig;
|
pretraining: PreTrainingConfig;
|
||||||
sft: SFTConfig | null;
|
sft: SFTConfig;
|
||||||
alignment: AlignmentConfig | null;
|
alignment: AlignmentConfig;
|
||||||
};
|
};
|
||||||
status: TrainingJobStatus;
|
status: TrainingJobStatus;
|
||||||
allocatedComputeFraction: number;
|
allocatedComputeFraction: number;
|
||||||
events: TrainingEvent[];
|
events: TrainingEvent[];
|
||||||
startedAtTick: number;
|
startedAtTick: number;
|
||||||
|
sizeTier: SizeTier;
|
||||||
|
isPointRelease: boolean;
|
||||||
|
sourceModelId: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface PreTrainingConfig {
|
export interface PreTrainingConfig {
|
||||||
@@ -125,9 +129,13 @@ export interface BaseModel {
|
|||||||
trainedAtTick: number;
|
trainedAtTick: number;
|
||||||
trainingCostTotal: number;
|
trainingCostTotal: number;
|
||||||
trainingStagesCompleted: TrainingStage[];
|
trainingStagesCompleted: TrainingStage[];
|
||||||
|
sizeTier: SizeTier;
|
||||||
|
version: number;
|
||||||
|
sftSpecializations: SFTSpecialization[];
|
||||||
|
alignmentMethod: AlignmentMethod | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export type VariantType = 'distilled' | 'fine-tuned' | 'quantized';
|
export type VariantType = 'quantized';
|
||||||
export type QuantizationLevel = 'fp16' | 'int8' | 'int4' | 'int2';
|
export type QuantizationLevel = 'fp16' | 'int8' | 'int4' | 'int2';
|
||||||
|
|
||||||
export interface ModelVariant {
|
export interface ModelVariant {
|
||||||
@@ -142,8 +150,6 @@ export interface ModelVariant {
|
|||||||
isDeployed: boolean;
|
isDeployed: boolean;
|
||||||
createdAtTick: number;
|
createdAtTick: number;
|
||||||
quantization?: QuantizationLevel;
|
quantization?: QuantizationLevel;
|
||||||
distillationRetention?: number;
|
|
||||||
finetuneSpecialization?: SFTSpecialization;
|
|
||||||
costMultiplier: number;
|
costMultiplier: number;
|
||||||
speedMultiplier: number;
|
speedMultiplier: number;
|
||||||
}
|
}
|
||||||
@@ -152,37 +158,25 @@ export interface ModelFamily {
|
|||||||
id: string;
|
id: string;
|
||||||
name: string;
|
name: string;
|
||||||
generation: number;
|
generation: number;
|
||||||
baseModelId: string | null;
|
baseModelIds: string[];
|
||||||
variants: ModelVariant[];
|
variants: ModelVariant[];
|
||||||
createdAtTick: number;
|
createdAtTick: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export type VariantJobType = 'distillation' | 'fine-tuning' | 'quantization';
|
export type VariantJobType = 'quantization';
|
||||||
|
|
||||||
export interface VariantCreationJob {
|
export interface VariantCreationJob {
|
||||||
id: string;
|
id: string;
|
||||||
familyId: string;
|
familyId: string;
|
||||||
baseModelId: string;
|
baseModelId: string;
|
||||||
jobType: VariantJobType;
|
jobType: VariantJobType;
|
||||||
config: DistillationConfig | FineTuneConfig | QuantizationConfig;
|
config: QuantizationConfig;
|
||||||
progressTicks: number;
|
progressTicks: number;
|
||||||
totalTicks: number;
|
totalTicks: number;
|
||||||
allocatedComputeFraction: number;
|
allocatedComputeFraction: number;
|
||||||
status: 'active' | 'completed';
|
status: 'active' | 'completed';
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface DistillationConfig {
|
|
||||||
targetParameters: number;
|
|
||||||
targetArchitecture: ArchitectureType;
|
|
||||||
variantName: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface FineTuneConfig {
|
|
||||||
specialization: SFTSpecialization;
|
|
||||||
datasetIds: string[];
|
|
||||||
variantName: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface QuantizationConfig {
|
export interface QuantizationConfig {
|
||||||
level: QuantizationLevel;
|
level: QuantizationLevel;
|
||||||
variantName: string;
|
variantName: string;
|
||||||
|
|||||||
Reference in New Issue
Block a user