Files
gemma3-finetuning/data/data_transform.py
2025-09-03 00:20:29 +08:00

24 lines
808 B
Python

import json
import pandas as pd
from datasets import Dataset
def get_chat_data():
a = pd.read_excel("致远速联日常销售问答话术.xlsx")
a.fillna("", inplace=True)
data = []
for i in a.values:
if i[1] == "": continue
data.append({"conversations": [{"role": "user", "content": i[1]}, {"role": "assistant", "content": i[2]}]})
return Dataset.from_pandas(pd.DataFrame(data))
def get_chat_data2():
with open("train_data.json","r",encoding="utf-8") as f:
train_data = json.load(f)
data = []
for i in train_data:
data.append({"conversations": [{"role": "user", "content": i["instruction"]}, {"role": "assistant", "content": i["output"]}]})
return Dataset.from_pandas(pd.DataFrame(data))
if __name__ == '__main__':
get_chat_data2()