""" 配置文件,集中管理模型训练参数和配置 """ # 模型配置 MODEL_CONFIGS = { "gemma-3-270m-it": { "model_name": "unsloth/gemma-3-270m-it", "uri": r"C:\Users\123ee\.cache\modelscope\hub\models\unsloth\gemma-3-270m-it", "chat_template": "gemma-3", "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] }, "gemma-3-270m": { "model_name": "unsloth/gemma-3-270m-it", "uri": r"C:\Users\123ee\.cache\modelscope\hub\models\unsloth\gemma-3-270m", "chat_template": "gemma-3", "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] }, "qwen3-600m": { "model_name": "Qwen/Qwen3-600m", "uri": r"C:\Users\123ee\.cache\modelscope\hub\models\Qwen\Qwen3-600m", "chat_template": "qwen3", "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] } } # 训练配置 TRAINING_CONFIG = { "per_device_train_batch_size": 2, "gradient_accumulation_steps": 4, "warmup_steps": 5, "max_steps": 300, "learning_rate": 2e-4, "logging_steps": 10, "optim": "adamw_8bit", "lr_scheduler_type": "linear", "seed": 3407, "weight_decay": 0.01, "report_to": "none" } # LoRA配置 LORA_CONFIG = { "r": 128, "lora_alpha": 128, "lora_dropout": 0, "bias": "none", "random_state": 3407, "use_rslor": False, "loftq_config": None } # 生成配置 GENERATION_CONFIG = { "max_new_tokens": 1024*16, "temperature": 1.0, "top_p": 0.95, "top_k": 3 }