From 4cbe11326e2d78bc5f2c0ca4887c9fc93d3efbac Mon Sep 17 00:00:00 2001 From: yuiseki Date: Fri, 29 Mar 2024 13:19:29 +0900 Subject: [PATCH] Add recipes/A5000_24GB_x8/coder-math-ja-wikipedia.yaml --- .../coder-math-ja-wikipedia.yaml | 32 +++++++++++++++++++ recipes/A5000_24GB_x8/coder-math-ja.yaml | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 recipes/A5000_24GB_x8/coder-math-ja-wikipedia.yaml diff --git a/recipes/A5000_24GB_x8/coder-math-ja-wikipedia.yaml b/recipes/A5000_24GB_x8/coder-math-ja-wikipedia.yaml new file mode 100644 index 0000000..5fbb01d --- /dev/null +++ b/recipes/A5000_24GB_x8/coder-math-ja-wikipedia.yaml @@ -0,0 +1,32 @@ +target_task: tasks/nlp/translation.md +base_model_id: yuiseki/tinyllama-ja-wikipedia-1.5T-v0.1 +model_name: tinyllama-coder-math-ja-wikipedia-v0.1 +output_base_dir: /data/output +dataset_id: kunishou/OpenMathInstruct-1-1.8m-ja +dataset_input_field_name: question_ja +dataset_output_field_name: generated_solution_ja +dataset_train_split_seed: 42 +dataset_train_split_test_size: 0.2 +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +train_claim_gpu_num: 3 +train_per_device_train_batch_size: 8 +train_gradient_accumulation_steps: 2 +train_num_train_epochs: 4 +train_max_steps: 1000 +train_fp16: True +inference_max_new_tokens: 32 +evaluations: + - + prompt: "thank you" + expected_output: "ありがとう" + - + prompt: "Hello" + expected_output: "こんにちは" + - + prompt: "How are you?" + expected_output: "お元気ですか?" + - + prompt: "I am hungry" + expected_output: "お腹が空いています" diff --git a/recipes/A5000_24GB_x8/coder-math-ja.yaml b/recipes/A5000_24GB_x8/coder-math-ja.yaml index dc493eb..e4c6a80 100644 --- a/recipes/A5000_24GB_x8/coder-math-ja.yaml +++ b/recipes/A5000_24GB_x8/coder-math-ja.yaml @@ -1,5 +1,5 @@ target_task: tasks/nlp/translation.md -base_model_id: yuiseki/tinyllama-ja-wikipedia-1.5T-v0.1 +base_model_id: TinyLlama/TinyLlama-1.1B-intermediate-step-715k-1.5T model_name: tinyllama-coder-math-ja-v0.1 output_base_dir: /data/output dataset_id: kunishou/OpenMathInstruct-1-1.8m-ja