Files
gemma3-finetuning/test.py
lychang 560097cb27 init
2025-08-26 09:21:48 +08:00

44 lines
1.6 KiB
Python

from dotenv import load_dotenv
load_dotenv()
from unsloth import FastLanguageModel, FastModel
from transformers import TextStreamer
model_name = r"E:\code\python\fine-tuning\gemma-3-270m-finetuned"
model, tokenizer = FastModel.from_pretrained(
model_name=model_name,
max_seq_length=512,
dtype=None,
load_in_4bit=False,
full_finetuning=False)
model = FastLanguageModel.get_peft_model(model,
r=128,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj",
"up_proj", "down_proj"],
use_gradient_checkpointing="unsloth",
lora_alpha=128,
lora_dropout=0,
bias="none",
random_state=3407,
use_rslor=False,
loftq_config=None)
FastLanguageModel.for_inference(model)
def chat(model, message):
outputs = model.generate(
**tokenizer([message], return_tensors="pt").to("cuda"),
max_new_tokens=1024,
streamer= TextStreamer(tokenizer, skip_prompt=True),
temperature=1.0, top_p=0.95, top_k=64,
)
if __name__ == '__main__':
while True:
message = input("user:")
if message == "exit":
break
chat(model, f"<start_of_turn>user\n{message}\n<end_of_turn>\n<start_of_turn>model\n")