Redesign model lifecycle: upfront SFT/alignment, multi-size families, point releases, quantization-only variants
CI / build-and-push (push) Successful in 45s
CI / build-and-push (push) Successful in 45s
Training pipeline now requires SFT specializations and alignment method configured at start — no more mid-training configuration step. Model families support multiple size tiers (Nano/Small/Medium/Large/Flagship) trained independently, mimicking real AI company model families. Point releases iterate on deployed models with 40% training time and 8% capability gain. Distillation and fine-tuning variants removed — players train smaller size tiers or configure SFT during initial training instead. Only quantization remains as a variant type. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import type { DCTier, DCTierConfig, RackSkuId, RackSkuConfig, SwitchTier, SwitchTierConfig, CampusTierCost, ClusterCostConfig, CoolingType, CoolingTypeConfig, NetworkFabric, NetworkFabricConfig } from '../types/infrastructure';
|
||||
import type { Era } from '../types/gameState';
|
||||
import type { ConsumerTierId, ApiTierId, SeasonalPhase, EnterprisePipelineStage, EnterpriseSegment, TAMSegmentId } from '../types/market';
|
||||
import type { SizeTier } from '../types/models';
|
||||
|
||||
export const TICK_INTERVAL_MS = 1000;
|
||||
export const MAX_OFFLINE_TICKS = 86_400;
|
||||
@@ -34,13 +35,24 @@ export const MAX_CONCURRENT_TRAINING: Record<string, number> = {
|
||||
startup: 1, scaleup: 2, bigtech: 4, agi: 8,
|
||||
};
|
||||
|
||||
export const DISTILLATION_COMPUTE_FRACTION = 0.15;
|
||||
export const DISTILLATION_TIME_FRACTION = 0.20;
|
||||
export const DISTILLATION_BASE_RETENTION = 0.70;
|
||||
export const FINETUNE_COMPUTE_FRACTION = 0.03;
|
||||
export const FINETUNE_TIME_FRACTION = 0.08;
|
||||
export const QUANTIZATION_TICKS = 8;
|
||||
|
||||
export const SIZE_TIER_MAP: Record<number, SizeTier> = {
|
||||
1: 'nano', 3: 'nano',
|
||||
7: 'small', 13: 'small',
|
||||
30: 'medium', 70: 'medium',
|
||||
130: 'large', 300: 'large',
|
||||
700: 'flagship', 1400: 'flagship',
|
||||
};
|
||||
|
||||
export const SIZE_TIER_LABELS: Record<SizeTier, string> = {
|
||||
nano: 'Nano', small: 'Small', medium: 'Medium', large: 'Large', flagship: 'Flagship',
|
||||
};
|
||||
|
||||
export const POINT_RELEASE_TIME_FRACTION = 0.40;
|
||||
export const POINT_RELEASE_CAPABILITY_GAIN = 0.08;
|
||||
export const POINT_RELEASE_MAX_VERSION = 9;
|
||||
|
||||
export const MOE_CAPABILITY_MULTIPLIER = 1.15;
|
||||
export const MOE_SPEED_MULTIPLIER = 1.3;
|
||||
export const PARAMETER_OPTIONS = [1, 3, 7, 13, 30, 70, 130, 300, 700, 1400];
|
||||
|
||||
@@ -52,4 +52,4 @@ export const INITIAL_SETTINGS: GameSettings = {
|
||||
musicVolume: 0.5,
|
||||
};
|
||||
|
||||
export const SAVE_VERSION = 7;
|
||||
export const SAVE_VERSION = 8;
|
||||
|
||||
@@ -2,6 +2,7 @@ import type { Era } from './gameState';
|
||||
import type { DataDomain } from './data';
|
||||
|
||||
export type ArchitectureType = 'dense' | 'moe';
|
||||
export type SizeTier = 'nano' | 'small' | 'medium' | 'large' | 'flagship';
|
||||
|
||||
export interface ModelArchitecture {
|
||||
type: ArchitectureType;
|
||||
@@ -27,13 +28,16 @@ export interface TrainingPipeline {
|
||||
currentStage: TrainingStage;
|
||||
stages: {
|
||||
pretraining: PreTrainingConfig;
|
||||
sft: SFTConfig | null;
|
||||
alignment: AlignmentConfig | null;
|
||||
sft: SFTConfig;
|
||||
alignment: AlignmentConfig;
|
||||
};
|
||||
status: TrainingJobStatus;
|
||||
allocatedComputeFraction: number;
|
||||
events: TrainingEvent[];
|
||||
startedAtTick: number;
|
||||
sizeTier: SizeTier;
|
||||
isPointRelease: boolean;
|
||||
sourceModelId: string | null;
|
||||
}
|
||||
|
||||
export interface PreTrainingConfig {
|
||||
@@ -125,9 +129,13 @@ export interface BaseModel {
|
||||
trainedAtTick: number;
|
||||
trainingCostTotal: number;
|
||||
trainingStagesCompleted: TrainingStage[];
|
||||
sizeTier: SizeTier;
|
||||
version: number;
|
||||
sftSpecializations: SFTSpecialization[];
|
||||
alignmentMethod: AlignmentMethod | null;
|
||||
}
|
||||
|
||||
export type VariantType = 'distilled' | 'fine-tuned' | 'quantized';
|
||||
export type VariantType = 'quantized';
|
||||
export type QuantizationLevel = 'fp16' | 'int8' | 'int4' | 'int2';
|
||||
|
||||
export interface ModelVariant {
|
||||
@@ -142,8 +150,6 @@ export interface ModelVariant {
|
||||
isDeployed: boolean;
|
||||
createdAtTick: number;
|
||||
quantization?: QuantizationLevel;
|
||||
distillationRetention?: number;
|
||||
finetuneSpecialization?: SFTSpecialization;
|
||||
costMultiplier: number;
|
||||
speedMultiplier: number;
|
||||
}
|
||||
@@ -152,37 +158,25 @@ export interface ModelFamily {
|
||||
id: string;
|
||||
name: string;
|
||||
generation: number;
|
||||
baseModelId: string | null;
|
||||
baseModelIds: string[];
|
||||
variants: ModelVariant[];
|
||||
createdAtTick: number;
|
||||
}
|
||||
|
||||
export type VariantJobType = 'distillation' | 'fine-tuning' | 'quantization';
|
||||
export type VariantJobType = 'quantization';
|
||||
|
||||
export interface VariantCreationJob {
|
||||
id: string;
|
||||
familyId: string;
|
||||
baseModelId: string;
|
||||
jobType: VariantJobType;
|
||||
config: DistillationConfig | FineTuneConfig | QuantizationConfig;
|
||||
config: QuantizationConfig;
|
||||
progressTicks: number;
|
||||
totalTicks: number;
|
||||
allocatedComputeFraction: number;
|
||||
status: 'active' | 'completed';
|
||||
}
|
||||
|
||||
export interface DistillationConfig {
|
||||
targetParameters: number;
|
||||
targetArchitecture: ArchitectureType;
|
||||
variantName: string;
|
||||
}
|
||||
|
||||
export interface FineTuneConfig {
|
||||
specialization: SFTSpecialization;
|
||||
datasetIds: string[];
|
||||
variantName: string;
|
||||
}
|
||||
|
||||
export interface QuantizationConfig {
|
||||
level: QuantizationLevel;
|
||||
variantName: string;
|
||||
|
||||
Reference in New Issue
Block a user