opencompass / .github /scripts /eval_regression_base_models.py
msj19's picture
Add files using upload-large-folder tool
9b40ad5 verified
from mmengine.config import read_base
from opencompass.models import VLLM, HuggingFaceBaseModel, TurboMindModel
with read_base():
# choose a list of datasets
from opencompass.configs.datasets.gpqa.gpqa_openai_simple_evals_gen_5aeece import \
gpqa_datasets # noqa: F401, E501
from opencompass.configs.datasets.gsm8k.gsm8k_gen_17d0dc import \
gsm8k_datasets # noqa: F401, E501
from opencompass.configs.datasets.race.race_ppl import \
race_datasets # noqa: F401, E501
from opencompass.configs.datasets.winogrande.winogrande_5shot_ll_252f01 import \
winogrande_datasets # noqa: F401, E501
from ...rjob import eval, infer # noqa: F401, E501
race_datasets = [race_datasets[1]]
models = [
dict(
type=TurboMindModel,
abbr='qwen3-8b-base-turbomind',
path='Qwen/Qwen3-0.6B-Base',
engine_config=dict(max_batch_size=1, tp=1),
gen_config=dict(top_k=1,
temperature=1e-6,
top_p=0.9,
max_new_tokens=2048),
max_seq_len=8192,
max_out_len=2048,
batch_size=1,
run_cfg=dict(num_gpus=1),
),
dict(
type=VLLM,
abbr='qwen3-8b-base-vllm',
path='Qwen/Qwen3-0.6B-Base',
model_kwargs=dict(tensor_parallel_size=1, gpu_memory_utilization=0.6),
max_seq_len=8192,
max_out_len=2048,
batch_size=16,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=1),
),
dict(type=HuggingFaceBaseModel,
abbr='qwen3-8b-base-hf',
path='Qwen/Qwen3-0.6B-Base',
max_out_len=1024,
batch_size=4,
run_cfg=dict(num_gpus=1))
]
datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])
for d in datasets:
d['reader_cfg']['test_range'] = '[0:32]'
for m in models:
if 'turbomind' in m['abbr'] or 'lmdeploy' in m['abbr']:
m['engine_config']['max_batch_size'] = 1
m['batch_size'] = 1
models = sorted(models, key=lambda x: x['run_cfg']['num_gpus'])
summarizer = dict(
dataset_abbrs=[
['gsm8k', 'accuracy'],
['GPQA_diamond', 'accuracy'],
['race-high', 'accuracy'],
['winogrande', 'accuracy'],
],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith('_summary_groups')], []),
)