24 lines
808 B
Python
24 lines
808 B
Python
import json
|
|
|
|
import pandas as pd
|
|
from datasets import Dataset
|
|
|
|
def get_chat_data():
|
|
a = pd.read_excel("致远速联日常销售问答话术.xlsx")
|
|
a.fillna("", inplace=True)
|
|
data = []
|
|
for i in a.values:
|
|
if i[1] == "": continue
|
|
data.append({"conversations": [{"role": "user", "content": i[1]}, {"role": "assistant", "content": i[2]}]})
|
|
return Dataset.from_pandas(pd.DataFrame(data))
|
|
|
|
def get_chat_data2():
|
|
with open("train_data.json","r",encoding="utf-8") as f:
|
|
train_data = json.load(f)
|
|
data = []
|
|
for i in train_data:
|
|
data.append({"conversations": [{"role": "user", "content": i["instruction"]}, {"role": "assistant", "content": i["output"]}]})
|
|
return Dataset.from_pandas(pd.DataFrame(data))
|
|
|
|
if __name__ == '__main__':
|
|
get_chat_data2() |