外贸黄页网站,海外购物网站建设,平顶山做网站的公司,网站开发的软硬件需求使用pypi安装xiaothink#xff1a;
pip install xiaothink1.0.2下载模型#xff1a; 万语-50M 开始评估(修改模型路径后即可直接开始运行#xff0c;结果保存在output文件夹里)#xff1a;
import os
import json
import pandas as pd
import re
from tqdm import tqdm
i…使用pypi安装xiaothink
pip install xiaothink1.0.2下载模型 万语-50M 开始评估(修改模型路径后即可直接开始运行结果保存在output文件夹里)
import os
import json
import pandas as pd
import re
from tqdm import tqdm
import random
import time
import requests
from xiaothink.llm.inference.test_formal import *
modelQianyanModel(MT40.231,ckpt_dirrpath\to\wanyv\model\ckpt_test_40_2_3_1_formal_open)def chat_x(inp,temp0.3):return model.chat_SingleTurn(inp,temptemp,loopTrue,stop。)#from collections import Counterdef pre(question: str, options_str: str) - str:question question.replace(答案, )options_str options_str.replace(答案, )if not A in question:#你只需要直接-让我们首先一步步思考最后在回答末尾prompt_template 题目{question}\n{options_str}\n让我们首先一步步思考最后在回答末尾给出一个字母作为你的答案(A或B或C或D)prompt_template2 题目{question}\n选项{options_str}\n给出答案prompt_template3 {question}\n{options_str}\nprompt_template4 {question}\n{options_str}\n给出你的选择prompt_template5 题目{question}\n{options_str}\n答案else:prompt_template 题目{question}\n让我们首先一步步思考最后在回答末尾给出一个字母作为你的答案(A或B或C或D)prompt_template2 题目{question}\n给出答案prompt_template3 {question}\nprompt_template4 {question}\n给出你的选择prompt_template5 题目{question}\n答案ansd{}# Run the chat_core function 5 times and collect answersanswers []for _ in range(1):response chat_x(prompt_template.format(questionquestion, options_stroptions_str))#print(response)# Extract answer from responsefor option in ABCD:if option in response:answers.append(option)ansd[option]responsebreakelse:print(AI选项检查, repr(response))answers.append(A) # Default to A if no option foundansd[A]# Count occurrences of each answeranswer_counts Counter(answers)# Find the most common answer(s)most_common_answers answer_counts.most_common()highest_frequency most_common_answers[0][1]most_frequent_answers [answer for answer, count in most_common_answers if count highest_frequency]# Choose one of the most frequent answers (if theres a tie, choose the first alphabetically)final_answer min(most_frequent_answers)with open(ceval_text_sklm.txt,a,encodingutf-8) as f:f.write(
{instruction: {prompt_template}, input: , output: {final_answer}}\n.replace({prompt_template},prompt_template.format(questionquestion, options_stroptions_str).replace(\n,\\n)).replace({final_answer},ansd[final_answer]),)with open(ceval_text_sklm.txt,a,encodingutf-8) as f:f.write(
{instruction: {prompt_template}, input: , output: {final_answer}}\n.replace({prompt_template},prompt_template2.format(questionquestion, options_stroptions_str).replace(\n,\\n)).replace({final_answer},ansd[final_answer]),)with open(ceval_text_sklm.txt,a,encodingutf-8) as f:f.write(
{instruction: {prompt_template}, input: , output: {final_answer}}\n.replace({prompt_template},prompt_template3.format(questionquestion, options_stroptions_str).replace(\n,\\n)).replace({final_answer},ansd[final_answer]),)with open(ceval_text_sklm.txt,a,encodingutf-8) as f:f.write(
{instruction: {prompt_template}, input: , output: {final_answer}}\n.replace({prompt_template},prompt_template4.format(questionquestion, options_stroptions_str).replace(\n,\\n)).replace({final_answer},ansd[final_answer]),)with open(ceval_text_sklm.txt,a,encodingutf-8) as f:f.write(
{instruction: {prompt_template}, input: , output: {final_answer}}\n.replace({prompt_template},prompt_template5.format(questionquestion, options_stroptions_str).replace(\n,\\n)).replace({final_answer},ansd[final_answer]),)return final_answerclass Llama_Evaluator:def __init__(self, choices, k):self.choices choicesself.k kdef eval_subject(self, subject_name,test_df,dev_dfNone,few_shotFalse,cotFalse,save_result_dirNone,with_promptFalse,constrained_decodingFalse,do_testFalse):all_answers {}correct_num 0if save_result_dir:result []score []if few_shot:history self.generate_few_shot_prompt(subject_name, dev_df, cotcot)else:history answers [NA] * len(test_df) if do_test is True else list(test_df[answer])for row_index, row in tqdm(test_df.iterrows(), totallen(test_df)):question self.format_example(row, include_answerFalse, cotcot, with_promptwith_prompt)options_str self.format_options(row)instruction history question \n选项 options_strans pre(instruction, options_str)if ans answers[row_index]:correct_num 1correct 1else:correct 0print(f\nbegin {str(row_index)})print(question: , question)print(options: , options_str)print(ans: , ans)print(ground truth: , answers[row_index], \n)if save_result_dir:result.append(ans)score.append(correct)print(fend {str(row_index)})all_answers[str(row_index)] anscorrect_ratio 100 * correct_num / len(answers)if save_result_dir:test_df[model_output] resulttest_df[correctness] scoretest_df.to_csv(os.path.join(save_result_dir, f{subject_name}_test.csv))return correct_ratio, all_answersdef format_example(self, line, include_answerTrue, cotFalse, with_promptFalse):example line[question]for choice in self.choices:example f\n{choice}. {line[f{choice}]}if include_answer:if cot:example \n答案让我们一步一步思考\n \line[explanation] f\n所以答案是{line[answer]}。\n\nelse:example \n答案 line[answer] \n\nelse:if with_prompt is False:if cot:example \n答案让我们一步一步思考\n1.else:example \n答案else:if cot:example \n答案是什么让我们一步一步思考\n1.else:example \n答案是什么 return exampledef generate_few_shot_prompt(self, subject, dev_df, cotFalse):prompt f以下是中国关于{subject}考试的单项选择题请选出其中的正确答案。\n\nk self.kif self.k -1:k dev_df.shape[0]for i in range(k):prompt self.format_example(dev_df.iloc[i, :],include_answerTrue,cotcot)return promptdef format_options(self, line):options_str for choice in self.choices:options_str f{choice}: {line[f{choice}]} return options_strdef main(model_path, output_dir, take, few_shotFalse, cotFalse, with_promptFalse, constrained_decodingFalse, do_testFalse, n_times1, do_save_csvFalse):assert os.path.exists(subject_mapping.json), subject_mapping.json not found!with open(subject_mapping.json) as f:subject_mapping json.load(f)filenames os.listdir(data/val)subject_list [val_file.replace(_val.csv, ) for val_file in filenames]accuracy, summary {}, {}run_date time.strftime(%Y-%m-%d_%H-%M-%S, time.localtime(time.time()))save_result_dir os.path.join(output_dir, ftake{take})if not os.path.exists(save_result_dir):os.makedirs(save_result_dir, exist_okTrue)evaluator Llama_Evaluator(choiceschoices, kn_times)all_answers {}for index, subject_name in tqdm(list(enumerate(subject_list)),desc主进度):print(f{index / len(subject_list)} Inference starts at {run_date} on {model_path} with subject of {subject_name}!)val_file_path os.path.join(data/val, f{subject_name}_val.csv)dev_file_path os.path.join(data/dev, f{subject_name}_dev.csv)test_file_path os.path.join(data/test, f{subject_name}_test.csv)val_df pd.read_csv(val_file_path) if not do_test else pd.read_csv(test_file_path)dev_df pd.read_csv(dev_file_path) if few_shot else Nonecorrect_ratio, answers evaluator.eval_subject(subject_name, val_df, dev_df,save_result_dirsave_result_dir if do_save_csv else None,few_shotfew_shot,cotcot,with_promptwith_prompt,constrained_decodingconstrained_decoding,do_testdo_test)print(fSubject: {subject_name})print(fAcc: {correct_ratio})accuracy[subject_name] correct_ratiosummary[subject_name] {score: correct_ratio,num: len(val_df),correct: correct_ratio * len(val_df) / 100}all_answers[subject_name] answersjson.dump(all_answers, open(save_result_dir /submission.json, w), ensure_asciiFalse, indent4)print(Accuracy:)for k, v in accuracy.items():print(k, : , v)total_num 0total_correct 0summary[grouped] {STEM: {correct: 0.0, num: 0},Social Science: {correct: 0.0, num: 0},Humanities: {correct: 0.0, num: 0},Other: {correct: 0.0, num: 0}}for subj, info in subject_mapping.items():group info[2]summary[grouped][group][num] summary[subj][num]summary[grouped][group][correct] summary[subj][correct]for group, info in summary[grouped].items():info[score] info[correct] / info[num]total_num info[num]total_correct info[correct]summary[All] {score: total_correct / total_num, num: total_num, correct: total_correct}json.dump(summary, open(save_result_dir /summary.json, w), ensure_asciiFalse, indent2)# Example usage
if __name__ __main__:model_path path/to/modeloutput_dir outputtake 0few_shot Falsecot Falsewith_prompt Falseconstrained_decoding Falsedo_test True#Falsen_times 1do_save_csv Falsemain(model_path, output_dir, take, few_shot, cot, with_prompt, constrained_decoding, do_test, n_times, do_save_csv)