59 lines
1.6 KiB
Python
59 lines
1.6 KiB
Python
"""
|
|
配置文件,集中管理模型训练参数和配置
|
|
"""
|
|
|
|
# 模型配置
|
|
MODEL_CONFIGS = {
|
|
"gemma-3-270m-it": {
|
|
"model_name": "unsloth/gemma-3-270m-it",
|
|
"uri": r"C:\Users\123ee\.cache\modelscope\hub\models\unsloth\gemma-3-270m-it",
|
|
"chat_template": "gemma-3",
|
|
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
|
|
},
|
|
"gemma-3-270m": {
|
|
"model_name": "unsloth/gemma-3-270m-it",
|
|
"uri": r"C:\Users\123ee\.cache\modelscope\hub\models\unsloth\gemma-3-270m",
|
|
"chat_template": "gemma-3",
|
|
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
|
|
},
|
|
"qwen3-600m": {
|
|
"model_name": "Qwen/Qwen3-600m",
|
|
"uri": r"C:\Users\123ee\.cache\modelscope\hub\models\Qwen\Qwen3-600m",
|
|
"chat_template": "qwen3",
|
|
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
|
|
}
|
|
}
|
|
|
|
# 训练配置
|
|
TRAINING_CONFIG = {
|
|
"per_device_train_batch_size": 2,
|
|
"gradient_accumulation_steps": 4,
|
|
"warmup_steps": 5,
|
|
"max_steps": 300,
|
|
"learning_rate": 2e-4,
|
|
"logging_steps": 10,
|
|
"optim": "adamw_8bit",
|
|
"lr_scheduler_type": "linear",
|
|
"seed": 3407,
|
|
"weight_decay": 0.01,
|
|
"report_to": "none"
|
|
}
|
|
|
|
# LoRA配置
|
|
LORA_CONFIG = {
|
|
"r": 128,
|
|
"lora_alpha": 128,
|
|
"lora_dropout": 0,
|
|
"bias": "none",
|
|
"random_state": 3407,
|
|
"use_rslor": False,
|
|
"loftq_config": None
|
|
}
|
|
|
|
# 生成配置
|
|
GENERATION_CONFIG = {
|
|
"max_new_tokens": 1024*16,
|
|
"temperature": 1.0,
|
|
"top_p": 0.95,
|
|
"top_k": 3
|
|
} |