diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb index 7bd2747b..9a9cf515 100644 --- a/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/World-1B5-mem-finetune.ipynb @@ -176,7 +176,7 @@ "tags": [] }, "source": [ - "## Finetune 1 (0 -> 4k) : Dataset preperation\n", + "## Finetune 1 (0 -> 2x2k) : Dataset preperation\n", "\n", "Stage 1, handles total context size of 2048. Meaning it will be tuned for memory task of 1 to approximately 1024 tokens of size." ] @@ -328,7 +328,7 @@ "tags": [] }, "source": [ - "## Finetune 1 (0 -> 4k) : The actual tune!" + "## Finetune 1 (0 -> 2x2k) : The actual tune!" ] }, { @@ -429,7 +429,7 @@ "id": "8259d9b5", "metadata": {}, "source": [ - "## Finetune 1 (0 -> 2*2k) : The actual tune!" + "## Finetune 1 (0 -> 2x2k) : The actual tune!" ] }, { @@ -485,7 +485,7 @@ "id": "6fe5d71b", "metadata": {}, "source": [ - "## Finetune 2 (0 -> 2*4k) : Dataset preperation\n", + "## Finetune 2 (2x2k -> 2x4k) : Dataset preperation\n", "\n", "Stage 2, handles total context size of 8k. Meaning it will be tuned for memory task of approximately 4k tokens of size." ] @@ -2272,7 +2272,7 @@ "#\n", "# Ramping up the 3000+ - 400 words dataset\n", "# \n", - "for i in {3000..6000..25} \n", + "for i in {3025..6000..25} \n", "do\n", " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", @@ -2325,7 +2325,7 @@ "id": "60244472", "metadata": {}, "source": [ - "## Finetune 1 (0 -> 2*2k) : The actual tune!" + "## Finetune 2 (2x2k -> 2x4k) : The actual tune!" ] }, { diff --git a/notebook/rwkv-x-exp/v5-exp/memory-test/World-3B-mem-finetune.ipynb b/notebook/rwkv-x-exp/v5-exp/memory-test/World-3B-mem-finetune.ipynb new file mode 100644 index 00000000..7e316ffa --- /dev/null +++ b/notebook/rwkv-x-exp/v5-exp/memory-test/World-3B-mem-finetune.ipynb @@ -0,0 +1,2590 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "093065b5", + "metadata": { + "papermill": { + "duration": 0.005853, + "end_time": "2024-01-23T11:31:12.266593", + "exception": false, + "start_time": "2024-01-23T11:31:12.260740", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# RWKV World Memory Finetune (Memory Finetune)\n", + "\n", + "This takes an existing RWKV world model, and finetune them specifically for the memory repeat task of various sizes.\n", + "This test is used as an approximation of testing the model token memory size in the \"worse case scenerio\"\n", + "\n", + "- Using randomized data, so prior learning does not help, nor is it possible to compress the data\n", + "- Using a variety of token lengths, to avoid overfitting to a single length\n", + "- Based on the pretrained model (rwkv world)\n", + "- This process does \"destroy the model\" but it helps quantify the model limits\n", + "\n", + "In practise however, the model may show \"attention range\" longer then what is benchmarked, as natural text is highly compressible. Unlike the pure randomized data that was being tested here.\n", + "\n", + "This runner has been optimized to run on 8 x 80GB vram nodes, you should allocate atleast 1TB disk space.\n", + "\n", + "> This project assumes you have the rwkv-infctx conda env setup, and you are executing in that environment - see the main README.md for the conda env setup steps" + ] + }, + { + "cell_type": "markdown", + "id": "0a0d2888", + "metadata": { + "papermill": { + "duration": 0.004809, + "end_time": "2024-01-23T11:31:12.276770", + "exception": false, + "start_time": "2024-01-23T11:31:12.271961", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Configure your environment settings\n", + "(!Important: you will need to rerun the below cell, if you restart your kernel)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e8b16f4a", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:12.286314Z", + "iopub.status.busy": "2024-01-23T11:31:12.285325Z", + "iopub.status.idle": "2024-01-23T11:31:12.303712Z", + "shell.execute_reply": "2024-01-23T11:31:12.302559Z" + }, + "papermill": { + "duration": 0.024597, + "end_time": "2024-01-23T11:31:12.306334", + "exception": false, + "start_time": "2024-01-23T11:31:12.281737", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEEPSPEED_STRAT: deepspeed_stage_1\n", + "ENABLE_WANDB: True\n", + "GPU_DEVICES: auto\n", + "NOTEBOOK_DIR: /workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test\n", + "TRAINER_DIR: /workspace/RWKV-infctx-trainer/RWKV-v5\n", + "PROJECT_DIR: /workspace/RWKV-infctx-trainer\n" + ] + } + ], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "WANDB_PREFIX=\"[8xA100] RWKV-v5-3B-World\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# The model sizing\n", + "MODEL_NAME=\"RWKV-v5-3B-world.pth\"\n", + "MODEL_URL=\"https://huggingface.co/BlinkDL/rwkv-5-world/resolve/main/RWKV-5-World-3B-v2-20231118-ctx16k.pth?download=true\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "MEMORY_SCRIPT_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./notebook/util-scripts/memory_script\"))\n", + "\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "markdown", + "id": "dc0dcc53", + "metadata": { + "papermill": { + "duration": 0.003834, + "end_time": "2024-01-23T11:31:12.316887", + "exception": false, + "start_time": "2024-01-23T11:31:12.313053", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Download the pretrained model\n", + "(if you want to skip the the basemodel train + instruct tune)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db947b68", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:12.326806Z", + "iopub.status.busy": "2024-01-23T11:31:12.325636Z", + "iopub.status.idle": "2024-01-23T11:31:12.823116Z", + "shell.execute_reply": "2024-01-23T11:31:12.821743Z" + }, + "papermill": { + "duration": 0.505449, + "end_time": "2024-01-23T11:31:12.826085", + "exception": false, + "start_time": "2024-01-23T11:31:12.320636", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets wget the model files\n", + "!cd \"{PROJECT_DIR}\" && mkdir -p \"{PROJECT_DIR}/model\"\n", + "!cd \"{PROJECT_DIR}/model\" && \\\n", + " wget -O \"{MODEL_NAME}\" -nc \"{MODEL_URL}\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "e8c15352", + "metadata": { + "papermill": { + "duration": 0.005467, + "end_time": "2024-01-23T11:31:12.837257", + "exception": false, + "start_time": "2024-01-23T11:31:12.831790", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Finetune 1 (0 -> 4k) : Dataset preperation\n", + "\n", + "Stage 1, handles total context size of 2048. Meaning it will be tuned for memory task of 1 to approximately 1024 tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02bfca27", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:12.843769Z", + "iopub.status.busy": "2024-01-23T11:31:12.843411Z", + "iopub.status.idle": "2024-01-23T11:31:16.588281Z", + "shell.execute_reply": "2024-01-23T11:31:16.587130Z" + }, + "papermill": { + "duration": 3.751098, + "end_time": "2024-01-23T11:31:16.591186", + "exception": false, + "start_time": "2024-01-23T11:31:12.840088", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Folder and eval pip setup\n", + "!cp -r \"{MEMORY_SCRIPT_DIR}/\" \"{NOTEBOOK_DIR}/\"\n", + "!python3 -m pip install rwkv asyncio aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c38e51c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:16.604643Z", + "iopub.status.busy": "2024-01-23T11:31:16.604064Z", + "iopub.status.idle": "2024-01-23T11:31:19.825147Z", + "shell.execute_reply": "2024-01-23T11:31:19.823826Z" + }, + "papermill": { + "duration": 3.231491, + "end_time": "2024-01-23T11:31:19.828814", + "exception": false, + "start_time": "2024-01-23T11:31:16.597323", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for < 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 150 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {110..200..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 125 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 50+ - 400 words dataset\n", + "# \n", + "for i in {210..4000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0577a12c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:31:20.066235Z", + "iopub.status.busy": "2024-01-23T11:31:20.065803Z", + "iopub.status.idle": "2024-01-23T11:34:10.695875Z", + "shell.execute_reply": "2024-01-23T11:34:10.694270Z" + }, + "papermill": { + "duration": 170.715947, + "end_time": "2024-01-23T11:34:10.699529", + "exception": false, + "start_time": "2024-01-23T11:31:19.983582", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-1-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-1-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "82e837d9", + "metadata": { + "papermill": { + "duration": 0.113925, + "end_time": "2024-01-23T11:34:10.936645", + "exception": false, + "start_time": "2024-01-23T11:34:10.822720", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Finetune 1 (0 -> 4k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "127af572", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T11:34:11.167985Z", + "iopub.status.busy": "2024-01-23T11:34:11.167540Z", + "iopub.status.idle": "2024-01-23T13:03:10.590814Z", + "shell.execute_reply": "2024-01-23T13:03:10.588629Z" + }, + "papermill": { + "duration": 5339.542389, + "end_time": "2024-01-23T13:03:10.594019", + "exception": false, + "start_time": "2024-01-23T11:34:11.051630", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-1-tune.yaml\" \\\n", + " --model.load_model=\"../model/{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-1 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=8 \\\n", + " --model.ctx_len=8192" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81395227", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:03:10.911324Z", + "iopub.status.busy": "2024-01-23T13:03:10.910769Z", + "iopub.status.idle": "2024-01-23T13:03:39.572566Z", + "shell.execute_reply": "2024-01-23T13:03:39.571025Z" + }, + "papermill": { + "duration": 28.823213, + "end_time": "2024-01-23T13:03:39.575536", + "exception": false, + "start_time": "2024-01-23T13:03:10.752323", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-1-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-1-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "595a1b18", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-23T13:05:37.852892Z", + "iopub.status.busy": "2024-01-23T13:05:37.852266Z", + "iopub.status.idle": "2024-01-23T13:12:24.206962Z", + "shell.execute_reply": "2024-01-23T13:12:24.205778Z" + }, + "papermill": { + "duration": 406.525339, + "end_time": "2024-01-23T13:12:24.209784", + "exception": false, + "start_time": "2024-01-23T13:05:37.684445", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets do a memory eval!\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\"\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-1-{MODEL_NAME}\" \"none\" 1000 3000" + ] + }, + { + "cell_type": "markdown", + "id": "bca9abd4", + "metadata": {}, + "source": [ + "## Finetune 2 (2x2k -> 2x4k) : Dataset preperation\n", + "\n", + "Stage 2, handles total context size of 8k. Meaning it will be tuned for memory task of approximately 4k tokens of size." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8a4da53a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n", + "Generated JSONL file with - 5 max words, 100 samples - at ./dataset/gen-word-5-count.jsonl\n", + "Generated JSONL file with - 2 max words, 100 samples - at ./dataset/word-2-count.jsonl\n", + "Generated JSONL file with - 25 max words, 100 samples - at ./dataset/gen-word-25-count.jsonl\n", + "Generated JSONL file with - 45 max words, 100 samples - at ./dataset/gen-word-45-count.jsonl\n", + "Generated JSONL file with - 4 max words, 100 samples - at ./dataset/word-4-count.jsonl\n", + "Generated JSONL file with - 10 max words, 100 samples - at ./dataset/gen-word-10-count.jsonl\n", + "Generated JSONL file with - 15 max words, 100 samples - at ./dataset/gen-word-15-count.jsonl\n", + "Generated JSONL file with - 20 max words, 100 samples - at ./dataset/gen-word-20-count.jsonl\n", + "Generated JSONL file with - 40 max words, 100 samples - at ./dataset/gen-word-40-count.jsonl\n", + "Generated JSONL file with - 30 max words, 100 samples - at ./dataset/gen-word-30-count.jsonl\n", + "Generated JSONL file with - 70 max words, 100 samples - at ./dataset/gen-word-70-count.jsonl\n", + "Generated JSONL file with - 35 max words, 100 samples - at ./dataset/gen-word-35-count.jsonl\n", + "Generated JSONL file with - 50 max words, 100 samples - at ./dataset/gen-word-50-count.jsonl\n", + "Generated JSONL file with - 60 max words, 100 samples - at ./dataset/gen-word-60-count.jsonl\n", + "Generated JSONL file with - 90 max words, 100 samples - at ./dataset/gen-word-90-count.jsonl\n", + "Generated JSONL file with - 80 max words, 100 samples - at ./dataset/gen-word-80-count.jsonl\n", + "Generated JSONL file with - 85 max words, 100 samples - at ./dataset/gen-word-85-count.jsonl\n", + "Generated JSONL file with - 120 max words, 75 samples - at ./dataset/gen-word-120-count.jsonl\n", + "Generated JSONL file with - 65 max words, 100 samples - at ./dataset/gen-word-65-count.jsonl\n", + "Generated JSONL file with - 75 max words, 100 samples - at ./dataset/gen-word-75-count.jsonl\n", + "Generated JSONL file with - 55 max words, 100 samples - at ./dataset/gen-word-55-count.jsonl\n", + "Generated JSONL file with - 230 max words, 75 samples - at ./dataset/gen-word-230-count.jsonl\n", + "Generated JSONL file with - 360 max words, 75 samples - at ./dataset/gen-word-360-count.jsonl\n", + "Generated JSONL file with - 200 max words, 75 samples - at ./dataset/gen-word-200-count.jsonl\n", + "Generated JSONL file with - 260 max words, 75 samples - at ./dataset/gen-word-260-count.jsonl\n", + "Generated JSONL file with - 140 max words, 75 samples - at ./dataset/gen-word-140-count.jsonl\n", + "Generated JSONL file with - 110 max words, 75 samples - at ./dataset/gen-word-110-count.jsonl\n", + "Generated JSONL file with - 270 max words, 75 samples - at ./dataset/gen-word-270-count.jsonl\n", + "Generated JSONL file with - 460 max words, 75 samples - at ./dataset/gen-word-460-count.jsonl\n", + "Generated JSONL file with - 400 max words, 75 samples - at ./dataset/gen-word-400-count.jsonl\n", + "Generated JSONL file with - 300 max words, 75 samples - at ./dataset/gen-word-300-count.jsonl\n", + "Generated JSONL file with - 95 max words, 100 samples - at ./dataset/gen-word-95-count.jsonl\n", + "Generated JSONL file with - 280 max words, 75 samples - at ./dataset/gen-word-280-count.jsonl\n", + "Generated JSONL file with - 150 max words, 75 samples - at ./dataset/gen-word-150-count.jsonl\n", + "Generated JSONL file with - 160 max words, 75 samples - at ./dataset/gen-word-160-count.jsonl\n", + "Generated JSONL file with - 240 max words, 75 samples - at ./dataset/gen-word-240-count.jsonlGenerated JSONL file with - 190 max words, 75 samples - at ./dataset/gen-word-190-count.jsonl\n", + "\n", + "Generated JSONL file with - 580 max words, 75 samples - at ./dataset/gen-word-580-count.jsonl\n", + "Generated JSONL file with - 290 max words, 75 samples - at ./dataset/gen-word-290-count.jsonl\n", + "Generated JSONL file with - 210 max words, 75 samples - at ./dataset/gen-word-210-count.jsonl\n", + "Generated JSONL file with - 250 max words, 75 samples - at ./dataset/gen-word-250-count.jsonl\n", + "Generated JSONL file with - 130 max words, 75 samples - at ./dataset/gen-word-130-count.jsonl\n", + "Generated JSONL file with - 410 max words, 75 samples - at ./dataset/gen-word-410-count.jsonl\n", + "Generated JSONL file with - 180 max words, 75 samples - at ./dataset/gen-word-180-count.jsonl\n", + "Generated JSONL file with - 320 max words, 75 samples - at ./dataset/gen-word-320-count.jsonl\n", + "Generated JSONL file with - 170 max words, 75 samples - at ./dataset/gen-word-170-count.jsonl\n", + "Generated JSONL file with - 220 max words, 75 samples - at ./dataset/gen-word-220-count.jsonl\n", + "Generated JSONL file with - 370 max words, 75 samples - at ./dataset/gen-word-370-count.jsonl\n", + "Generated JSONL file with - 100 max words, 100 samples - at ./dataset/gen-word-100-count.jsonl\n", + "Generated JSONL file with - 450 max words, 75 samples - at ./dataset/gen-word-450-count.jsonl\n", + "Generated JSONL file with - 500 max words, 75 samples - at ./dataset/gen-word-500-count.jsonl\n", + "Generated JSONL file with - 310 max words, 75 samples - at ./dataset/gen-word-310-count.jsonl\n", + "Generated JSONL file with - 430 max words, 75 samples - at ./dataset/gen-word-430-count.jsonl\n", + "Generated JSONL file with - 420 max words, 75 samples - at ./dataset/gen-word-420-count.jsonl\n", + "Generated JSONL file with - 380 max words, 75 samples - at ./dataset/gen-word-380-count.jsonl\n", + "Generated JSONL file with - 530 max words, 75 samples - at ./dataset/gen-word-530-count.jsonl\n", + "Generated JSONL file with - 350 max words, 75 samples - at ./dataset/gen-word-350-count.jsonl\n", + "Generated JSONL file with - 510 max words, 75 samples - at ./dataset/gen-word-510-count.jsonl\n", + "Generated JSONL file with - 340 max words, 75 samples - at ./dataset/gen-word-340-count.jsonl\n", + "Generated JSONL file with - 560 max words, 75 samples - at ./dataset/gen-word-560-count.jsonl\n", + "Generated JSONL file with - 550 max words, 75 samples - at ./dataset/gen-word-550-count.jsonl\n", + "Generated JSONL file with - 330 max words, 75 samples - at ./dataset/gen-word-330-count.jsonl\n", + "Generated JSONL file with - 490 max words, 75 samples - at ./dataset/gen-word-490-count.jsonl\n", + "Generated JSONL file with - 470 max words, 75 samples - at ./dataset/gen-word-470-count.jsonl\n", + "Generated JSONL file with - 520 max words, 75 samples - at ./dataset/gen-word-520-count.jsonl\n", + "Generated JSONL file with - 440 max words, 75 samples - at ./dataset/gen-word-440-count.jsonl\n", + "Generated JSONL file with - 390 max words, 75 samples - at ./dataset/gen-word-390-count.jsonl\n", + "Generated JSONL file with - 480 max words, 75 samples - at ./dataset/gen-word-480-count.jsonl\n", + "Generated JSONL file with - 570 max words, 75 samples - at ./dataset/gen-word-570-count.jsonl\n", + "Generated JSONL file with - 680 max words, 75 samples - at ./dataset/gen-word-680-count.jsonl\n", + "Generated a single JSONL file with 680 samples (75 token repeat) - 290 max words - at ./dataset/shuffle-word-290-count.jsonl\n", + "Generated a single JSONL file with 694 samples (75 token repeat) - 260 max words - at ./dataset/shuffle-word-260-count.jsonl\n", + "Generated a single JSONL file with 527 samples (75 token repeat) - 360 max words - at ./dataset/shuffle-word-360-count.jsonl\n", + "Generated JSONL file with - 590 max words, 75 samples - at ./dataset/gen-word-590-count.jsonl\n", + "Generated a single JSONL file with 685 samples (75 token repeat) - 270 max words - at ./dataset/shuffle-word-270-count.jsonl\n", + "Generated JSONL file with - 610 max words, 75 samples - at ./dataset/gen-word-610-count.jsonl\n", + "Generated a single JSONL file with 532 samples (75 token repeat) - 320 max words - at ./dataset/shuffle-word-320-count.jsonl\n", + "Generated a single JSONL file with 1005 samples (75 token repeat) - 200 max words - at ./dataset/shuffle-word-200-count.jsonl\n", + "Generated a single JSONL file with 1034 samples (75 token repeat) - 180 max words - at ./dataset/shuffle-word-180-count.jsonl\n", + "Generated a single JSONL file with 681 samples (75 token repeat) - 300 max words - at ./dataset/shuffle-word-300-count.jsonl\n", + "Generated a single JSONL file with 523 samples (75 token repeat) - 370 max words - at ./dataset/shuffle-word-370-count.jsonl\n", + "Generated a single JSONL file with 436 samples (75 token repeat) - 480 max words - at ./dataset/shuffle-word-480-count.jsonl\n", + "Generated JSONL file with - 640 max words, 75 samples - at ./dataset/gen-word-640-count.jsonl\n", + "Generated a single JSONL file with 444 samples (75 token repeat) - 430 max words - at ./dataset/shuffle-word-430-count.jsonlGenerated a single JSONL file with 789 samples (75 token repeat) - 210 max words - at ./dataset/shuffle-word-210-count.jsonl\n", + "\n", + "Generated a single JSONL file with 305 samples (75 token repeat) - 650 max words - at ./dataset/shuffle-word-650-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 530 max words - at ./dataset/shuffle-word-530-count.jsonlGenerated a single JSONL file with 373 samples (75 token repeat) - 550 max words - at ./dataset/shuffle-word-550-count.jsonl\n", + "Generated a single JSONL file with 686 samples (75 token repeat) - 280 max words - at ./dataset/shuffle-word-280-count.jsonl\n", + "\n", + "Generated a single JSONL file with 699 samples (75 token repeat) - 250 max words - at ./dataset/shuffle-word-250-count.jsonl\n", + "Generated JSONL file with - 690 max words, 75 samples - at ./dataset/gen-word-690-count.jsonl\n", + "Generated a single JSONL file with 1153 samples (75 token repeat) - 150 max words - at ./dataset/shuffle-word-150-count.jsonl\n", + "Generated a single JSONL file with 534 samples (75 token repeat) - 310 max words - at ./dataset/shuffle-word-310-count.jsonl\n", + "Generated a single JSONL file with 443 samples (75 token repeat) - 450 max words - at ./dataset/shuffle-word-450-count.jsonl\n", + "Generated a single JSONL file with 4083 samples (100 token repeat) - 65 max words - at ./dataset/shuffle-word-65-count.jsonlGenerated a single JSONL file with 375 samples (75 token repeat) - 510 max words - at ./dataset/shuffle-word-510-count.jsonl\n", + "\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 390 max words - at ./dataset/shuffle-word-390-count.jsonl\n", + "Generated a single JSONL file with 432 samples (75 token repeat) - 500 max words - at ./dataset/shuffle-word-500-count.jsonl\n", + "Generated a single JSONL file with 1491 samples (75 token repeat) - 110 max words - at ./dataset/shuffle-word-110-count.jsonl\n", + "Generated a single JSONL file with 447 samples (75 token repeat) - 410 max words - at ./dataset/shuffle-word-410-count.jsonl\n", + "Generated a single JSONL file with 1018 samples (75 token repeat) - 190 max words - at ./dataset/shuffle-word-190-count.jsonl\n", + "Generated a single JSONL file with 724 samples (75 token repeat) - 240 max words - at ./dataset/shuffle-word-240-count.jsonl\n", + "Generated a single JSONL file with 1287 samples (75 token repeat) - 130 max words - at ./dataset/shuffle-word-130-count.jsonl\n", + "Generated a single JSONL file with 1098 samples (75 token repeat) - 160 max words - at ./dataset/shuffle-word-160-count.jsonl\n", + "Generated a single JSONL file with 2650 samples (100 token repeat) - 100 max words - at ./dataset/shuffle-word-100-count.jsonl\n", + "Generated JSONL file with - 1670 max words, 75 samples - at ./dataset/gen-word-1670-count.jsonl\n", + "Generated a single JSONL file with 1227 samples (75 token repeat) - 140 max words - at ./dataset/shuffle-word-140-count.jsonl\n", + "Generated a single JSONL file with 3543 samples (100 token repeat) - 75 max words - at ./dataset/shuffle-word-75-count.jsonl\n", + "Generated a single JSONL file with 2916 samples (100 token repeat) - 90 max words - at ./dataset/shuffle-word-90-count.jsonl\n", + "Generated a single JSONL file with 439 samples (75 token repeat) - 460 max words - at ./dataset/shuffle-word-460-count.jsonl\n", + "Generated JSONL file with - 1470 max words, 75 samples - at ./dataset/gen-word-1470-count.jsonl\n", + "Generated a single JSONL file with 4794 samples (100 token repeat) - 55 max words - at ./dataset/shuffle-word-55-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 620 max words - at ./dataset/shuffle-word-620-count.jsonl\n", + "Generated a single JSONL file with 1056 samples (75 token repeat) - 170 max words - at ./dataset/shuffle-word-170-count.jsonl\n", + "Generated JSONL file with - 800 max words, 75 samples - at ./dataset/gen-word-800-count.jsonl\n", + "Generated JSONL file with - 630 max words, 75 samples - at ./dataset/gen-word-630-count.jsonl\n", + "Generated JSONL file with - 600 max words, 75 samples - at ./dataset/gen-word-600-count.jsonl\n", + "Generated a single JSONL file with 522 samples (75 token repeat) - 400 max words - at ./dataset/shuffle-word-400-count.jsonl\n", + "Generated a single JSONL file with 3773 samples (100 token repeat) - 70 max words - at ./dataset/shuffle-word-70-count.jsonl\n", + "Generated a single JSONL file with 3298 samples (100 token repeat) - 80 max words - at ./dataset/shuffle-word-80-count.jsonl\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 350 max words - at ./dataset/shuffle-word-350-count.jsonl\n", + "Generated a single JSONL file with 3145 samples (100 token repeat) - 85 max words - at ./dataset/shuffle-word-85-count.jsonl\n", + "Generated a single JSONL file with 526 samples (75 token repeat) - 340 max words - at ./dataset/shuffle-word-340-count.jsonl\n", + "Generated a single JSONL file with 525 samples (75 token repeat) - 330 max words - at ./dataset/shuffle-word-330-count.jsonl\n", + "Generated JSONL file with - 620 max words, 75 samples - at ./dataset/gen-word-620-count.jsonl\n", + "Generated JSONL file with - 650 max words, 75 samples - at ./dataset/gen-word-650-count.jsonl\n", + "Generated a single JSONL file with 5860 samples (100 token repeat) - 45 max words - at ./dataset/shuffle-word-45-count.jsonl\n", + "Generated a single JSONL file with 4379 samples (100 token repeat) - 60 max words - at ./dataset/shuffle-word-60-count.jsonl\n", + "Generated a single JSONL file with 1382 samples (75 token repeat) - 120 max words - at ./dataset/shuffle-word-120-count.jsonl\n", + "Generated a single JSONL file with 755 samples (75 token repeat) - 220 max words - at ./dataset/shuffle-word-220-count.jsonl\n", + "Generated a single JSONL file with 747 samples (75 token repeat) - 230 max words - at ./dataset/shuffle-word-230-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 520 max words - at ./dataset/shuffle-word-520-count.jsonl\n", + "Generated JSONL file with - 540 max words, 75 samples - at ./dataset/gen-word-540-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 570 max words - at ./dataset/shuffle-word-570-count.jsonl\n", + "Generated a single JSONL file with 5255 samples (100 token repeat) - 50 max words - at ./dataset/shuffle-word-50-count.jsonl\n", + "Generated a single JSONL file with 442 samples (75 token repeat) - 470 max words - at ./dataset/shuffle-word-470-count.jsonl\n", + "Generated JSONL file with - 960 max words, 75 samples - at ./dataset/gen-word-960-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 850 max words - at ./dataset/shuffle-word-850-count.jsonl\n", + "Generated a single JSONL file with 435 samples (75 token repeat) - 490 max words - at ./dataset/shuffle-word-490-count.jsonl\n", + "Generated JSONL file with - 1420 max words, 75 samples - at ./dataset/gen-word-1420-count.jsonl\n", + "Generated JSONL file with - 910 max words, 75 samples - at ./dataset/gen-word-910-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 560 max words - at ./dataset/shuffle-word-560-count.jsonl\n", + "Generated a single JSONL file with 6581 samples (100 token repeat) - 40 max words - at ./dataset/shuffle-word-40-count.jsonl\n", + "Generated JSONL file with - 670 max words, 75 samples - at ./dataset/gen-word-670-count.jsonl\n", + "Generated a single JSONL file with 528 samples (75 token repeat) - 380 max words - at ./dataset/shuffle-word-380-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 630 max words - at ./dataset/shuffle-word-630-count.jsonl\n", + "Generated JSONL file with - 660 max words, 75 samples - at ./dataset/gen-word-660-count.jsonl\n", + "Generated a single JSONL file with 2798 samples (100 token repeat) - 95 max words - at ./dataset/shuffle-word-95-count.jsonl\n", + "Generated JSONL file with - 740 max words, 75 samples - at ./dataset/gen-word-740-count.jsonl\n", + "Generated JSONL file with - 990 max words, 75 samples - at ./dataset/gen-word-990-count.jsonl\n", + "Generated a single JSONL file with 303 samples (75 token repeat) - 660 max words - at ./dataset/shuffle-word-660-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 680 max words - at ./dataset/shuffle-word-680-count.jsonl\n", + "Generated a single JSONL file with 8727 samples (100 token repeat) - 30 max words - at ./dataset/shuffle-word-30-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 600 max words - at ./dataset/shuffle-word-600-count.jsonl\n", + "Generated JSONL file with - 710 max words, 75 samples - at ./dataset/gen-word-710-count.jsonl\n", + "Generated JSONL file with - 950 max words, 75 samples - at ./dataset/gen-word-950-count.jsonl\n", + "Generated a single JSONL file with 374 samples (75 token repeat) - 590 max words - at ./dataset/shuffle-word-590-count.jsonl\n", + "Generated JSONL file with - 1060 max words, 75 samples - at ./dataset/gen-word-1060-count.jsonl\n", + "Generated JSONL file with - 970 max words, 75 samples - at ./dataset/gen-word-970-count.jsonl\n", + "Generated a single JSONL file with 7513 samples (100 token repeat) - 35 max words - at ./dataset/shuffle-word-35-count.jsonl\n", + "Generated JSONL file with - 980 max words, 75 samples - at ./dataset/gen-word-980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1610 max words - at ./dataset/shuffle-word-1610-count.jsonl\n", + "Generated JSONL file with - 1030 max words, 75 samples - at ./dataset/gen-word-1030-count.jsonl\n", + "Generated JSONL file with - 1000 max words, 75 samples - at ./dataset/gen-word-1000-count.jsonl\n", + "Generated JSONL file with - 1020 max words, 75 samples - at ./dataset/gen-word-1020-count.jsonl\n", + "Generated JSONL file with - 700 max words, 75 samples - at ./dataset/gen-word-700-count.jsonl\n", + "Generated JSONL file with - 720 max words, 75 samples - at ./dataset/gen-word-720-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 950 max words - at ./dataset/shuffle-word-950-count.jsonl\n", + "Generated JSONL file with - 1010 max words, 75 samples - at ./dataset/gen-word-1010-count.jsonl\n", + "Generated JSONL file with - 730 max words, 75 samples - at ./dataset/gen-word-730-count.jsonl\n", + "Generated a single JSONL file with 444 samples (75 token repeat) - 420 max words - at ./dataset/shuffle-word-420-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 940 max words - at ./dataset/shuffle-word-940-count.jsonl\n", + "Generated a single JSONL file with 435 samples (75 token repeat) - 440 max words - at ./dataset/shuffle-word-440-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 640 max words - at ./dataset/shuffle-word-640-count.jsonl\n", + "Generated a single JSONL file with 10623 samples (100 token repeat) - 25 max words - at ./dataset/shuffle-word-25-count.jsonl\n", + "Generated a single JSONL file with 301 samples (75 token repeat) - 610 max words - at ./dataset/shuffle-word-610-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 910 max words - at ./dataset/shuffle-word-910-count.jsonl\n", + "Generated a single JSONL file with 13076 samples (100 token repeat) - 20 max words - at ./dataset/shuffle-word-20-count.jsonl\n", + "Generated a single JSONL file with 373 samples (75 token repeat) - 580 max words - at ./dataset/shuffle-word-580-count.jsonl\n", + "Generated JSONL file with - 760 max words, 75 samples - at ./dataset/gen-word-760-count.jsonl\n", + "Generated a single JSONL file with 375 samples (75 token repeat) - 540 max words - at ./dataset/shuffle-word-540-count.jsonl\n", + "Generated JSONL file with - 860 max words, 75 samples - at ./dataset/gen-word-860-count.jsonl\n", + "Generated JSONL file with - 940 max words, 75 samples - at ./dataset/gen-word-940-count.jsonl\n", + "Generated JSONL file with - 1880 max words, 75 samples - at ./dataset/gen-word-1880-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 960 max words - at ./dataset/shuffle-word-960-count.jsonl\n", + "Generated JSONL file with - 770 max words, 75 samples - at ./dataset/gen-word-770-count.jsonl\n", + "Generated a single JSONL file with 239 samples (75 token repeat) - 870 max words - at ./dataset/shuffle-word-870-count.jsonl\n", + "Generated JSONL file with - 810 max words, 75 samples - at ./dataset/gen-word-810-count.jsonl\n", + "Generated a single JSONL file with 238 samples (75 token repeat) - 900 max words - at ./dataset/shuffle-word-900-count.jsonl\n", + "Generated JSONL file with - 750 max words, 75 samples - at ./dataset/gen-word-750-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 760 max words - at ./dataset/shuffle-word-760-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 980 max words - at ./dataset/shuffle-word-980-count.jsonl\n", + "Generated JSONL file with - 820 max words, 75 samples - at ./dataset/gen-word-820-count.jsonl\n", + "Generated JSONL file with - 1040 max words, 75 samples - at ./dataset/gen-word-1040-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 770 max words - at ./dataset/shuffle-word-770-count.jsonl\n", + "Generated JSONL file with - 840 max words, 75 samples - at ./dataset/gen-word-840-count.jsonl\n", + "Generated JSONL file with - 790 max words, 75 samples - at ./dataset/gen-word-790-count.jsonl\n", + "Generated a single JSONL file with 305 samples (75 token repeat) - 670 max words - at ./dataset/shuffle-word-670-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 780 max words - at ./dataset/shuffle-word-780-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1000 max words - at ./dataset/shuffle-word-1000-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 990 max words - at ./dataset/shuffle-word-990-count.jsonl\n", + "Generated JSONL file with - 850 max words, 75 samples - at ./dataset/gen-word-850-count.jsonl\n", + "Generated JSONL file with - 1110 max words, 75 samples - at ./dataset/gen-word-1110-count.jsonl\n", + "Generated JSONL file with - 890 max words, 75 samples - at ./dataset/gen-word-890-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1010 max words - at ./dataset/shuffle-word-1010-count.jsonl\n", + "Generated JSONL file with - 930 max words, 75 samples - at ./dataset/gen-word-930-count.jsonl\n", + "Generated JSONL file with - 1120 max words, 75 samples - at ./dataset/gen-word-1120-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 970 max words - at ./dataset/shuffle-word-970-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 710 max words - at ./dataset/shuffle-word-710-count.jsonl\n", + "Generated JSONL file with - 870 max words, 75 samples - at ./dataset/gen-word-870-count.jsonl\n", + "Generated a single JSONL file with 299 samples (75 token repeat) - 750 max words - at ./dataset/shuffle-word-750-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1050 max words - at ./dataset/shuffle-word-1050-count.jsonl\n", + "Generated JSONL file with - 1180 max words, 75 samples - at ./dataset/gen-word-1180-count.jsonl\n", + "Generated JSONL file with - 920 max words, 75 samples - at ./dataset/gen-word-920-count.jsonl\n", + "Generated JSONL file with - 780 max words, 75 samples - at ./dataset/gen-word-780-count.jsonl\n", + "Generated a single JSONL file with 243 samples (75 token repeat) - 890 max words - at ./dataset/shuffle-word-890-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 740 max words - at ./dataset/shuffle-word-740-count.jsonl\n", + "Generated JSONL file with - 1640 max words, 75 samples - at ./dataset/gen-word-1640-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1020 max words - at ./dataset/shuffle-word-1020-count.jsonl\n", + "Generated a single JSONL file with 302 samples (75 token repeat) - 690 max words - at ./dataset/shuffle-word-690-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 700 max words - at ./dataset/shuffle-word-700-count.jsonl\n", + "Generated a single JSONL file with 241 samples (75 token repeat) - 880 max words - at ./dataset/shuffle-word-880-count.jsonl\n", + "Generated JSONL file with - 830 max words, 75 samples - at ./dataset/gen-word-830-count.jsonl\n", + "Generated a single JSONL file with 17734 samples (100 token repeat) - 15 max words - at ./dataset/shuffle-word-15-count.jsonl\n", + "Generated JSONL file with - 1140 max words, 75 samples - at ./dataset/gen-word-1140-count.jsonl\n", + "Generated JSONL file with - 1150 max words, 75 samples - at ./dataset/gen-word-1150-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 730 max words - at ./dataset/shuffle-word-730-count.jsonl\n", + "Generated JSONL file with - 1070 max words, 75 samples - at ./dataset/gen-word-1070-count.jsonl\n", + "Generated a single JSONL file with 186 samples (75 token repeat) - 1250 max words - at ./dataset/shuffle-word-1250-count.jsonl\n", + "Generated JSONL file with - 1700 max words, 75 samples - at ./dataset/gen-word-1700-count.jsonl\n", + "Generated JSONL file with - 2430 max words, 75 samples - at ./dataset/gen-word-2430-count.jsonl\n", + "Generated JSONL file with - 1200 max words, 75 samples - at ./dataset/gen-word-1200-count.jsonl\n", + "Generated a single JSONL file with 300 samples (75 token repeat) - 790 max words - at ./dataset/shuffle-word-790-count.jsonl\n", + "Generated JSONL file with - 1360 max words, 75 samples - at ./dataset/gen-word-1360-count.jsonl\n", + "Generated JSONL file with - 1130 max words, 75 samples - at ./dataset/gen-word-1130-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1060 max words - at ./dataset/shuffle-word-1060-count.jsonl\n", + "Generated a single JSONL file with 238 samples (75 token repeat) - 820 max words - at ./dataset/shuffle-word-820-count.jsonl\n", + "Generated a single JSONL file with 238 samples (75 token repeat) - 810 max words - at ./dataset/shuffle-word-810-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 830 max words - at ./dataset/shuffle-word-830-count.jsonl\n", + "Generated JSONL file with - 2750 max words, 75 samples - at ./dataset/gen-word-2750-count.jsonl\n", + "Generated JSONL file with - 1230 max words, 75 samples - at ./dataset/gen-word-1230-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1540 max words - at ./dataset/shuffle-word-1540-count.jsonl\n", + "Generated a single JSONL file with 296 samples (75 token repeat) - 800 max words - at ./dataset/shuffle-word-800-count.jsonl\n", + "Generated a single JSONL file with 222 samples (75 token repeat) - 1130 max words - at ./dataset/shuffle-word-1130-count.jsonl\n", + "Generated JSONL file with - 1860 max words, 75 samples - at ./dataset/gen-word-1860-count.jsonl\n", + "Generated JSONL file with - 1160 max words, 75 samples - at ./dataset/gen-word-1160-count.jsonl\n", + "Generated JSONL file with - 1320 max words, 75 samples - at ./dataset/gen-word-1320-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1140 max words - at ./dataset/shuffle-word-1140-count.jsonl\n", + "Generated a single JSONL file with 298 samples (75 token repeat) - 720 max words - at ./dataset/shuffle-word-720-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3575 max words - at ./dataset/shuffle-word-3575-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 930 max words - at ./dataset/shuffle-word-930-count.jsonl\n", + "Generated JSONL file with - 1930 max words, 75 samples - at ./dataset/gen-word-1930-count.jsonl\n", + "Generated JSONL file with - 880 max words, 75 samples - at ./dataset/gen-word-880-count.jsonl\n", + "Generated JSONL file with - 2360 max words, 75 samples - at ./dataset/gen-word-2360-count.jsonl\n", + "Generated a single JSONL file with 26117 samples (100 token repeat) - 10 max words - at ./dataset/shuffle-word-10-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3300 max words - at ./dataset/shuffle-word-3300-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1120 max words - at ./dataset/shuffle-word-1120-count.jsonl\n", + "Generated JSONL file with - 1080 max words, 75 samples - at ./dataset/gen-word-1080-count.jsonl\n", + "Generated a single JSONL file with 94 samples (75 token repeat) - 2700 max words - at ./dataset/shuffle-word-2700-count.jsonl\n", + "Generated JSONL file with - 3450 max words, 100 samples - at ./dataset/gen-word-3450-count.jsonl\n", + "Generated JSONL file with - 1210 max words, 75 samples - at ./dataset/gen-word-1210-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1630 max words - at ./dataset/shuffle-word-1630-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2230 max words - at ./dataset/shuffle-word-2230-count.jsonl\n", + "Generated JSONL file with - 2300 max words, 75 samples - at ./dataset/gen-word-2300-count.jsonl\n", + "Generated JSONL file with - 1100 max words, 75 samples - at ./dataset/gen-word-1100-count.jsonl\n", + "Generated a single JSONL file with 221 samples (75 token repeat) - 1150 max words - at ./dataset/shuffle-word-1150-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2310 max words - at ./dataset/shuffle-word-2310-count.jsonl\n", + "Generated JSONL file with - 1220 max words, 75 samples - at ./dataset/gen-word-1220-count.jsonl\n", + "Generated a single JSONL file with 243 samples (75 token repeat) - 860 max words - at ./dataset/shuffle-word-860-count.jsonl\n", + "Generated JSONL file with - 1170 max words, 75 samples - at ./dataset/gen-word-1170-count.jsonl\n", + "Generated JSONL file with - 1190 max words, 75 samples - at ./dataset/gen-word-1190-count.jsonl\n", + "Generated JSONL file with - 3900 max words, 100 samples - at ./dataset/gen-word-3900-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1800 max words - at ./dataset/shuffle-word-1800-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2190 max words - at ./dataset/shuffle-word-2190-count.jsonl\n", + "Generated JSONL file with - 2530 max words, 75 samples - at ./dataset/gen-word-2530-count.jsonl\n", + "Generated a single JSONL file with 235 samples (75 token repeat) - 840 max words - at ./dataset/shuffle-word-840-count.jsonl\n", + "Generated JSONL file with - 2970 max words, 75 samples - at ./dataset/gen-word-2970-count.jsonl\n", + "Generated JSONL file with - 900 max words, 75 samples - at ./dataset/gen-word-900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5350 max words - at ./dataset/shuffle-word-5350-count.jsonl\n", + "Generated a single JSONL file with 191 samples (75 token repeat) - 1280 max words - at ./dataset/shuffle-word-1280-count.jsonl\n", + "Generated JSONL file with - 1050 max words, 75 samples - at ./dataset/gen-word-1050-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1170 max words - at ./dataset/shuffle-word-1170-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 920 max words - at ./dataset/shuffle-word-920-count.jsonl\n", + "Generated a single JSONL file with 223 samples (75 token repeat) - 1160 max words - at ./dataset/shuffle-word-1160-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1070 max words - at ./dataset/shuffle-word-1070-count.jsonl\n", + "Generated a single JSONL file with 224 samples (75 token repeat) - 1200 max words - at ./dataset/shuffle-word-1200-count.jsonl\n", + "Generated JSONL file with - 1260 max words, 75 samples - at ./dataset/gen-word-1260-count.jsonl\n", + "Generated JSONL file with - 1280 max words, 75 samples - at ./dataset/gen-word-1280-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1100 max words - at ./dataset/shuffle-word-1100-count.jsonl\n", + "Generated a single JSONL file with 196 samples (75 token repeat) - 1210 max words - at ./dataset/shuffle-word-1210-count.jsonl\n", + "Generated JSONL file with - 1810 max words, 75 samples - at ./dataset/gen-word-1810-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1310 max words - at ./dataset/shuffle-word-1310-count.jsonl\n", + "Generated JSONL file with - 1290 max words, 75 samples - at ./dataset/gen-word-1290-count.jsonl\n", + "Generated JSONL file with - 1250 max words, 75 samples - at ./dataset/gen-word-1250-count.jsonl\n", + "Generated JSONL file with - 1090 max words, 75 samples - at ./dataset/gen-word-1090-count.jsonl\n", + "Generated JSONL file with - 1310 max words, 75 samples - at ./dataset/gen-word-1310-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1190 max words - at ./dataset/shuffle-word-1190-count.jsonl\n", + "Generated JSONL file with - 1270 max words, 75 samples - at ./dataset/gen-word-1270-count.jsonl\n", + "Generated JSONL file with - 1240 max words, 75 samples - at ./dataset/gen-word-1240-count.jsonl\n", + "Generated JSONL file with - 1300 max words, 75 samples - at ./dataset/gen-word-1300-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1090 max words - at ./dataset/shuffle-word-1090-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1340 max words - at ./dataset/shuffle-word-1340-count.jsonl\n", + "Generated JSONL file with - 1350 max words, 75 samples - at ./dataset/gen-word-1350-count.jsonl\n", + "Generated a single JSONL file with 186 samples (75 token repeat) - 1230 max words - at ./dataset/shuffle-word-1230-count.jsonl\n", + "Generated a single JSONL file with 222 samples (75 token repeat) - 1110 max words - at ./dataset/shuffle-word-1110-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1040 max words - at ./dataset/shuffle-word-1040-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1180 max words - at ./dataset/shuffle-word-1180-count.jsonl\n", + "Generated a single JSONL file with 193 samples (75 token repeat) - 1260 max words - at ./dataset/shuffle-word-1260-count.jsonl\n", + "Generated JSONL file with - 1480 max words, 75 samples - at ./dataset/gen-word-1480-count.jsonl\n", + "Generated JSONL file with - 1340 max words, 75 samples - at ./dataset/gen-word-1340-count.jsonl\n", + "Generated a single JSONL file with 191 samples (75 token repeat) - 1220 max words - at ./dataset/shuffle-word-1220-count.jsonl\n", + "Generated JSONL file with - 1370 max words, 75 samples - at ./dataset/gen-word-1370-count.jsonl\n", + "Generated JSONL file with - 1330 max words, 75 samples - at ./dataset/gen-word-1330-count.jsonl\n", + "Generated a single JSONL file with 178 samples (75 token repeat) - 1270 max words - at ./dataset/shuffle-word-1270-count.jsonl\n", + "Generated JSONL file with - 1440 max words, 75 samples - at ./dataset/gen-word-1440-count.jsonl\n", + "Generated JSONL file with - 1380 max words, 75 samples - at ./dataset/gen-word-1380-count.jsonl\n", + "Generated a single JSONL file with 185 samples (75 token repeat) - 1240 max words - at ./dataset/shuffle-word-1240-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1320 max words - at ./dataset/shuffle-word-1320-count.jsonl\n", + "Generated JSONL file with - 1390 max words, 75 samples - at ./dataset/gen-word-1390-count.jsonl\n", + "Generated JSONL file with - 1410 max words, 75 samples - at ./dataset/gen-word-1410-count.jsonl\n", + "Generated a single JSONL file with 187 samples (75 token repeat) - 1290 max words - at ./dataset/shuffle-word-1290-count.jsonl\n", + "Generated JSONL file with - 1450 max words, 75 samples - at ./dataset/gen-word-1450-count.jsonl\n", + "Generated JSONL file with - 1460 max words, 75 samples - at ./dataset/gen-word-1460-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1400 max words - at ./dataset/shuffle-word-1400-count.jsonl\n", + "Generated JSONL file with - 1510 max words, 75 samples - at ./dataset/gen-word-1510-count.jsonl\n", + "Generated JSONL file with - 4875 max words, 100 samples - at ./dataset/gen-word-4875-count.jsonl\n", + "Generated a single JSONL file with 152 samples (75 token repeat) - 1350 max words - at ./dataset/shuffle-word-1350-count.jsonl\n", + "Generated JSONL file with - 3175 max words, 100 samples - at ./dataset/gen-word-3175-count.jsonl\n", + "Generated JSONL file with - 1530 max words, 75 samples - at ./dataset/gen-word-1530-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1030 max words - at ./dataset/shuffle-word-1030-count.jsonl\n", + "Generated JSONL file with - 1540 max words, 75 samples - at ./dataset/gen-word-1540-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1330 max words - at ./dataset/shuffle-word-1330-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1440 max words - at ./dataset/shuffle-word-1440-count.jsonl\n", + "Generated a single JSONL file with 225 samples (75 token repeat) - 1080 max words - at ./dataset/shuffle-word-1080-count.jsonl\n", + "Generated a single JSONL file with 154 samples (75 token repeat) - 1360 max words - at ./dataset/shuffle-word-1360-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1420 max words - at ./dataset/shuffle-word-1420-count.jsonl\n", + "Generated a single JSONL file with 192 samples (75 token repeat) - 1300 max words - at ./dataset/shuffle-word-1300-count.jsonl\n", + "Generated JSONL file with - 1520 max words, 75 samples - at ./dataset/gen-word-1520-count.jsonl\n", + "Generated a single JSONL file with 153 samples (75 token repeat) - 1370 max words - at ./dataset/shuffle-word-1370-count.jsonl\n", + "Generated JSONL file with - 1560 max words, 75 samples - at ./dataset/gen-word-1560-count.jsonl\n", + "Generated JSONL file with - 1550 max words, 75 samples - at ./dataset/gen-word-1550-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1450 max words - at ./dataset/shuffle-word-1450-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1510 max words - at ./dataset/shuffle-word-1510-count.jsonl\n", + "Generated a single JSONL file with 88 samples (75 token repeat) - 2680 max words - at ./dataset/shuffle-word-2680-count.jsonl\n", + "Generated JSONL file with - 1400 max words, 75 samples - at ./dataset/gen-word-1400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1480 max words - at ./dataset/shuffle-word-1480-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1460 max words - at ./dataset/shuffle-word-1460-count.jsonl\n", + "Generated JSONL file with - 1430 max words, 75 samples - at ./dataset/gen-word-1430-count.jsonl\n", + "Generated JSONL file with - 1600 max words, 75 samples - at ./dataset/gen-word-1600-count.jsonl\n", + "Generated JSONL file with - 1730 max words, 75 samples - at ./dataset/gen-word-1730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1500 max words - at ./dataset/shuffle-word-1500-count.jsonl\n", + "Generated JSONL file with - 1590 max words, 75 samples - at ./dataset/gen-word-1590-count.jsonl\n", + "Generated JSONL file with - 1500 max words, 75 samples - at ./dataset/gen-word-1500-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1590 max words - at ./dataset/shuffle-word-1590-count.jsonl\n", + "Generated a single JSONL file with 155 samples (75 token repeat) - 1390 max words - at ./dataset/shuffle-word-1390-count.jsonl\n", + "Generated JSONL file with - 1490 max words, 75 samples - at ./dataset/gen-word-1490-count.jsonl\n", + "Generated JSONL file with - 1570 max words, 75 samples - at ./dataset/gen-word-1570-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1550 max words - at ./dataset/shuffle-word-1550-count.jsonl\n", + "Generated a single JSONL file with 151 samples (75 token repeat) - 1380 max words - at ./dataset/shuffle-word-1380-count.jsonl\n", + "Generated JSONL file with - 1630 max words, 75 samples - at ./dataset/gen-word-1630-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1530 max words - at ./dataset/shuffle-word-1530-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1910 max words - at ./dataset/shuffle-word-1910-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1410 max words - at ./dataset/shuffle-word-1410-count.jsonl\n", + "Generated JSONL file with - 1950 max words, 75 samples - at ./dataset/gen-word-1950-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1560 max words - at ./dataset/shuffle-word-1560-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1430 max words - at ./dataset/shuffle-word-1430-count.jsonl\n", + "Generated JSONL file with - 1610 max words, 75 samples - at ./dataset/gen-word-1610-count.jsonl\n", + "Generated JSONL file with - 1680 max words, 75 samples - at ./dataset/gen-word-1680-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1600 max words - at ./dataset/shuffle-word-1600-count.jsonl\n", + "Generated JSONL file with - 2000 max words, 75 samples - at ./dataset/gen-word-2000-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1620 max words - at ./dataset/shuffle-word-1620-count.jsonlGenerated a single JSONL file with 150 samples (75 token repeat) - 1580 max words - at ./dataset/shuffle-word-1580-count.jsonl\n", + "\n", + "Generated JSONL file with - 1650 max words, 75 samples - at ./dataset/gen-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1470 max words - at ./dataset/shuffle-word-1470-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1770 max words - at ./dataset/shuffle-word-1770-count.jsonl\n", + "Generated JSONL file with - 2100 max words, 75 samples - at ./dataset/gen-word-2100-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1720 max words - at ./dataset/shuffle-word-1720-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1490 max words - at ./dataset/shuffle-word-1490-count.jsonl\n", + "Generated JSONL file with - 1580 max words, 75 samples - at ./dataset/gen-word-1580-count.jsonl\n", + "Generated JSONL file with - 1740 max words, 75 samples - at ./dataset/gen-word-1740-count.jsonl\n", + "Generated JSONL file with - 1690 max words, 75 samples - at ./dataset/gen-word-1690-count.jsonl\n", + "Generated JSONL file with - 2270 max words, 75 samples - at ./dataset/gen-word-2270-count.jsonl\n", + "Generated JSONL file with - 1660 max words, 75 samples - at ./dataset/gen-word-1660-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1970 max words - at ./dataset/shuffle-word-1970-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1750 max words - at ./dataset/shuffle-word-1750-count.jsonl\n", + "Generated JSONL file with - 1750 max words, 75 samples - at ./dataset/gen-word-1750-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1570 max words - at ./dataset/shuffle-word-1570-count.jsonl\n", + "Generated JSONL file with - 1720 max words, 75 samples - at ./dataset/gen-word-1720-count.jsonl\n", + "Generated JSONL file with - 1620 max words, 75 samples - at ./dataset/gen-word-1620-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1520 max words - at ./dataset/shuffle-word-1520-count.jsonl\n", + "Generated JSONL file with - 1770 max words, 75 samples - at ./dataset/gen-word-1770-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2010 max words - at ./dataset/shuffle-word-2010-count.jsonl\n", + "Generated JSONL file with - 1780 max words, 75 samples - at ./dataset/gen-word-1780-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1650 max words - at ./dataset/shuffle-word-1650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1680 max words - at ./dataset/shuffle-word-1680-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1640 max words - at ./dataset/shuffle-word-1640-count.jsonl\n", + "Generated a single JSONL file with 120 samples (75 token repeat) - 2520 max words - at ./dataset/shuffle-word-2520-count.jsonl\n", + "Generated JSONL file with - 2650 max words, 75 samples - at ./dataset/gen-word-2650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3950 max words - at ./dataset/shuffle-word-3950-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1690 max words - at ./dataset/shuffle-word-1690-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1670 max words - at ./dataset/shuffle-word-1670-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1660 max words - at ./dataset/shuffle-word-1660-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1710 max words - at ./dataset/shuffle-word-1710-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3450 max words - at ./dataset/shuffle-word-3450-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1930 max words - at ./dataset/shuffle-word-1930-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1700 max words - at ./dataset/shuffle-word-1700-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2860 max words - at ./dataset/shuffle-word-2860-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1740 max words - at ./dataset/shuffle-word-1740-count.jsonl\n", + "Generated JSONL file with - 2810 max words, 75 samples - at ./dataset/gen-word-2810-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1760 max words - at ./dataset/shuffle-word-1760-count.jsonl\n", + "Generated a single JSONL file with 55780 samples (100 token repeat) - 5 max words - at ./dataset/shuffle-word-5-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2250 max words - at ./dataset/shuffle-word-2250-count.jsonl\n", + "Generated JSONL file with - 1710 max words, 75 samples - at ./dataset/gen-word-1710-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2110 max words - at ./dataset/shuffle-word-2110-count.jsonl\n", + "Generated JSONL file with - 2800 max words, 75 samples - at ./dataset/gen-word-2800-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2330 max words - at ./dataset/shuffle-word-2330-count.jsonl\n", + "Generated JSONL file with - 2990 max words, 75 samples - at ./dataset/gen-word-2990-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4175 max words - at ./dataset/shuffle-word-4175-count.jsonl\n", + "Generated JSONL file with - 1960 max words, 75 samples - at ./dataset/gen-word-1960-count.jsonl\n", + "Generated JSONL file with - 2740 max words, 75 samples - at ./dataset/gen-word-2740-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2340 max words - at ./dataset/shuffle-word-2340-count.jsonl\n", + "Generated JSONL file with - 2290 max words, 75 samples - at ./dataset/gen-word-2290-count.jsonl\n", + "Generated JSONL file with - 1800 max words, 75 samples - at ./dataset/gen-word-1800-count.jsonl\n", + "Generated JSONL file with - 1840 max words, 75 samples - at ./dataset/gen-word-1840-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1730 max words - at ./dataset/shuffle-word-1730-count.jsonl\n", + "Generated JSONL file with - 1760 max words, 75 samples - at ./dataset/gen-word-1760-count.jsonl\n", + "Generated a single JSONL file with 92 samples (75 token repeat) - 2690 max words - at ./dataset/shuffle-word-2690-count.jsonl\n", + "Generated JSONL file with - 1820 max words, 75 samples - at ./dataset/gen-word-1820-count.jsonl\n", + "Generated JSONL file with - 1790 max words, 75 samples - at ./dataset/gen-word-1790-count.jsonl\n", + "Generated JSONL file with - 1830 max words, 75 samples - at ./dataset/gen-word-1830-count.jsonl\n", + "Generated JSONL file with - 1850 max words, 75 samples - at ./dataset/gen-word-1850-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1830 max words - at ./dataset/shuffle-word-1830-count.jsonl\n", + "Generated JSONL file with - 1890 max words, 75 samples - at ./dataset/gen-word-1890-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1780 max words - at ./dataset/shuffle-word-1780-count.jsonl\n", + "Generated JSONL file with - 2940 max words, 75 samples - at ./dataset/gen-word-2940-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2420 max words - at ./dataset/shuffle-word-2420-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2150 max words - at ./dataset/shuffle-word-2150-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1840 max words - at ./dataset/shuffle-word-1840-count.jsonl\n", + "Generated JSONL file with - 1900 max words, 75 samples - at ./dataset/gen-word-1900-count.jsonl\n", + "Generated a single JSONL file with 81 samples (75 token repeat) - 2730 max words - at ./dataset/shuffle-word-2730-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1790 max words - at ./dataset/shuffle-word-1790-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1860 max words - at ./dataset/shuffle-word-1860-count.jsonl\n", + "Generated JSONL file with - 1910 max words, 75 samples - at ./dataset/gen-word-1910-count.jsonl\n", + "Generated JSONL file with - 3150 max words, 100 samples - at ./dataset/gen-word-3150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4275 max words - at ./dataset/shuffle-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4800 max words - at ./dataset/shuffle-word-4800-count.jsonl\n", + "Generated JSONL file with - 1920 max words, 75 samples - at ./dataset/gen-word-1920-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1810 max words - at ./dataset/shuffle-word-1810-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1820 max words - at ./dataset/shuffle-word-1820-count.jsonl\n", + "Generated JSONL file with - 2030 max words, 75 samples - at ./dataset/gen-word-2030-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2240 max words - at ./dataset/shuffle-word-2240-count.jsonl\n", + "Generated JSONL file with - 2470 max words, 75 samples - at ./dataset/gen-word-2470-count.jsonl\n", + "Generated JSONL file with - 2020 max words, 75 samples - at ./dataset/gen-word-2020-count.jsonl\n", + "Generated JSONL file with - 2180 max words, 75 samples - at ./dataset/gen-word-2180-count.jsonl\n", + "Generated JSONL file with - 1870 max words, 75 samples - at ./dataset/gen-word-1870-count.jsonl\n", + "Generated JSONL file with - 1970 max words, 75 samples - at ./dataset/gen-word-1970-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2050 max words - at ./dataset/shuffle-word-2050-count.jsonl\n", + "Generated JSONL file with - 2350 max words, 75 samples - at ./dataset/gen-word-2350-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1880 max words - at ./dataset/shuffle-word-1880-count.jsonl\n", + "Generated JSONL file with - 2010 max words, 75 samples - at ./dataset/gen-word-2010-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1980 max words - at ./dataset/shuffle-word-1980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1920 max words - at ./dataset/shuffle-word-1920-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1850 max words - at ./dataset/shuffle-word-1850-count.jsonl\n", + "Generated JSONL file with - 1990 max words, 75 samples - at ./dataset/gen-word-1990-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2470 max words - at ./dataset/shuffle-word-2470-count.jsonl\n", + "Generated JSONL file with - 1940 max words, 75 samples - at ./dataset/gen-word-1940-count.jsonl\n", + "Generated JSONL file with - 4650 max words, 100 samples - at ./dataset/gen-word-4650-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1990 max words - at ./dataset/shuffle-word-1990-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1890 max words - at ./dataset/shuffle-word-1890-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2020 max words - at ./dataset/shuffle-word-2020-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1960 max words - at ./dataset/shuffle-word-1960-count.jsonl\n", + "Generated JSONL file with - 5100 max words, 100 samples - at ./dataset/gen-word-5100-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2000 max words - at ./dataset/shuffle-word-2000-count.jsonl\n", + "Generated JSONL file with - 2120 max words, 75 samples - at ./dataset/gen-word-2120-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2060 max words - at ./dataset/shuffle-word-2060-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1950 max words - at ./dataset/shuffle-word-1950-count.jsonl\n", + "Generated JSONL file with - 4300 max words, 100 samples - at ./dataset/gen-word-4300-count.jsonl\n", + "Generated JSONL file with - 2070 max words, 75 samples - at ./dataset/gen-word-2070-count.jsonl\n", + "Generated JSONL file with - 2080 max words, 75 samples - at ./dataset/gen-word-2080-count.jsonl\n", + "Generated JSONL file with - 2190 max words, 75 samples - at ./dataset/gen-word-2190-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1900 max words - at ./dataset/shuffle-word-1900-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1870 max words - at ./dataset/shuffle-word-1870-count.jsonl\n", + "Generated JSONL file with - 2110 max words, 75 samples - at ./dataset/gen-word-2110-count.jsonl\n", + "Generated JSONL file with - 1980 max words, 75 samples - at ./dataset/gen-word-1980-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2040 max words - at ./dataset/shuffle-word-2040-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2100 max words - at ./dataset/shuffle-word-2100-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2070 max words - at ./dataset/shuffle-word-2070-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2090 max words - at ./dataset/shuffle-word-2090-count.jsonl\n", + "Generated JSONL file with - 2060 max words, 75 samples - at ./dataset/gen-word-2060-count.jsonl\n", + "Generated JSONL file with - 2160 max words, 75 samples - at ./dataset/gen-word-2160-count.jsonl\n", + "Generated JSONL file with - 2040 max words, 75 samples - at ./dataset/gen-word-2040-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2180 max words - at ./dataset/shuffle-word-2180-count.jsonl\n", + "Generated JSONL file with - 2170 max words, 75 samples - at ./dataset/gen-word-2170-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2170 max words - at ./dataset/shuffle-word-2170-count.jsonl\n", + "Generated JSONL file with - 4475 max words, 100 samples - at ./dataset/gen-word-4475-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2200 max words - at ./dataset/shuffle-word-2200-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2140 max words - at ./dataset/shuffle-word-2140-count.jsonl\n", + "Generated JSONL file with - 2250 max words, 75 samples - at ./dataset/gen-word-2250-count.jsonl\n", + "Generated JSONL file with - 2710 max words, 75 samples - at ./dataset/gen-word-2710-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2160 max words - at ./dataset/shuffle-word-2160-count.jsonl\n", + "Generated JSONL file with - 2590 max words, 75 samples - at ./dataset/gen-word-2590-count.jsonl\n", + "Generated JSONL file with - 2280 max words, 75 samples - at ./dataset/gen-word-2280-count.jsonl\n", + "Generated JSONL file with - 2200 max words, 75 samples - at ./dataset/gen-word-2200-count.jsonl\n", + "Generated JSONL file with - 2130 max words, 75 samples - at ./dataset/gen-word-2130-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2220 max words - at ./dataset/shuffle-word-2220-count.jsonl\n", + "Generated JSONL file with - 2260 max words, 75 samples - at ./dataset/gen-word-2260-count.jsonl\n", + "Generated JSONL file with - 2340 max words, 75 samples - at ./dataset/gen-word-2340-count.jsonl\n", + "Generated JSONL file with - 2230 max words, 75 samples - at ./dataset/gen-word-2230-count.jsonl\n", + "Generated JSONL file with - 5200 max words, 100 samples - at ./dataset/gen-word-5200-count.jsonl\n", + "Generated JSONL file with - 2400 max words, 75 samples - at ./dataset/gen-word-2400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2320 max words - at ./dataset/shuffle-word-2320-count.jsonl\n", + "Generated JSONL file with - 2140 max words, 75 samples - at ./dataset/gen-word-2140-count.jsonl\n", + "Generated JSONL file with - 2580 max words, 75 samples - at ./dataset/gen-word-2580-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2280 max words - at ./dataset/shuffle-word-2280-count.jsonl\n", + "Generated JSONL file with - 2450 max words, 75 samples - at ./dataset/gen-word-2450-count.jsonl\n", + "Generated a single JSONL file with 120 samples (75 token repeat) - 2530 max words - at ./dataset/shuffle-word-2530-count.jsonl\n", + "Generated JSONL file with - 2370 max words, 75 samples - at ./dataset/gen-word-2370-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2260 max words - at ./dataset/shuffle-word-2260-count.jsonl\n", + "Generated JSONL file with - 2380 max words, 75 samples - at ./dataset/gen-word-2380-count.jsonl\n", + "Generated JSONL file with - 2220 max words, 75 samples - at ./dataset/gen-word-2220-count.jsonl\n", + "Generated JSONL file with - 2330 max words, 75 samples - at ./dataset/gen-word-2330-count.jsonl\n", + "Generated JSONL file with - 2240 max words, 75 samples - at ./dataset/gen-word-2240-count.jsonl\n", + "Generated a single JSONL file with 136 samples (75 token repeat) - 2460 max words - at ./dataset/shuffle-word-2460-count.jsonl\n", + "Generated JSONL file with - 2210 max words, 75 samples - at ./dataset/gen-word-2210-count.jsonl\n", + "Generated JSONL file with - 2310 max words, 75 samples - at ./dataset/gen-word-2310-count.jsonl\n", + "Generated JSONL file with - 2410 max words, 75 samples - at ./dataset/gen-word-2410-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2290 max words - at ./dataset/shuffle-word-2290-count.jsonl\n", + "Generated JSONL file with - 2150 max words, 75 samples - at ./dataset/gen-word-2150-count.jsonl\n", + "Generated JSONL file with - 2560 max words, 75 samples - at ./dataset/gen-word-2560-count.jsonl\n", + "Generated a single JSONL file with 147 samples (75 token repeat) - 2350 max words - at ./dataset/shuffle-word-2350-count.jsonl\n", + "Generated JSONL file with - 2320 max words, 75 samples - at ./dataset/gen-word-2320-count.jsonl\n", + "Generated a single JSONL file with 135 samples (75 token repeat) - 2410 max words - at ./dataset/shuffle-word-2410-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2030 max words - at ./dataset/shuffle-word-2030-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2210 max words - at ./dataset/shuffle-word-2210-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 3000 max words - at ./dataset/shuffle-word-3000-count.jsonl\n", + "Generated JSONL file with - 2980 max words, 75 samples - at ./dataset/gen-word-2980-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2400 max words - at ./dataset/shuffle-word-2400-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 1940 max words - at ./dataset/shuffle-word-1940-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2380 max words - at ./dataset/shuffle-word-2380-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2300 max words - at ./dataset/shuffle-word-2300-count.jsonl\n", + "Generated JSONL file with - 2390 max words, 75 samples - at ./dataset/gen-word-2390-count.jsonl\n", + "Generated JSONL file with - 2090 max words, 75 samples - at ./dataset/gen-word-2090-count.jsonl\n", + "Generated a single JSONL file with 138 samples (75 token repeat) - 2450 max words - at ./dataset/shuffle-word-2450-count.jsonl\n", + "Generated JSONL file with - 2050 max words, 75 samples - at ./dataset/gen-word-2050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5825 max words - at ./dataset/shuffle-word-5825-count.jsonl\n", + "Generated a single JSONL file with 138 samples (75 token repeat) - 2440 max words - at ./dataset/shuffle-word-2440-count.jsonl\n", + "Generated JSONL file with - 3475 max words, 100 samples - at ./dataset/gen-word-3475-count.jsonl\n", + "Generated a single JSONL file with 147 samples (75 token repeat) - 2390 max words - at ./dataset/shuffle-word-2390-count.jsonl\n", + "Generated JSONL file with - 5775 max words, 100 samples - at ./dataset/gen-word-5775-count.jsonl\n", + "Generated a single JSONL file with 149 samples (75 token repeat) - 2370 max words - at ./dataset/shuffle-word-2370-count.jsonl\n", + "Generated JSONL file with - 2930 max words, 75 samples - at ./dataset/gen-word-2930-count.jsonl\n", + "Generated JSONL file with - 2420 max words, 75 samples - at ./dataset/gen-word-2420-count.jsonl\n", + "Generated JSONL file with - 2790 max words, 75 samples - at ./dataset/gen-word-2790-count.jsonl\n", + "Generated JSONL file with - 5625 max words, 100 samples - at ./dataset/gen-word-5625-count.jsonl\n", + "Generated a single JSONL file with 148 samples (75 token repeat) - 2360 max words - at ./dataset/shuffle-word-2360-count.jsonl\n", + "Generated JSONL file with - 2440 max words, 75 samples - at ./dataset/gen-word-2440-count.jsonl\n", + "Generated JSONL file with - 2460 max words, 75 samples - at ./dataset/gen-word-2460-count.jsonl\n", + "Generated a single JSONL file with 114 samples (75 token repeat) - 2590 max words - at ./dataset/shuffle-word-2590-count.jsonl\n", + "Generated a single JSONL file with 86 samples (75 token repeat) - 2630 max words - at ./dataset/shuffle-word-2630-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2270 max words - at ./dataset/shuffle-word-2270-count.jsonl\n", + "Generated a single JSONL file with 115 samples (75 token repeat) - 2570 max words - at ./dataset/shuffle-word-2570-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3175 max words - at ./dataset/shuffle-word-3175-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2120 max words - at ./dataset/shuffle-word-2120-count.jsonl\n", + "Generated a single JSONL file with 117 samples (75 token repeat) - 2580 max words - at ./dataset/shuffle-word-2580-count.jsonl\n", + "Generated JSONL file with - 2570 max words, 75 samples - at ./dataset/gen-word-2570-count.jsonl\n", + "Generated a single JSONL file with 108 samples (75 token repeat) - 2560 max words - at ./dataset/shuffle-word-2560-count.jsonl\n", + "Generated JSONL file with - 2490 max words, 75 samples - at ./dataset/gen-word-2490-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2130 max words - at ./dataset/shuffle-word-2130-count.jsonl\n", + "Generated a single JSONL file with 139 samples (75 token repeat) - 2480 max words - at ./dataset/shuffle-word-2480-count.jsonl\n", + "Generated a single JSONL file with 150 samples (75 token repeat) - 2080 max words - at ./dataset/shuffle-word-2080-count.jsonl\n", + "Generated a single JSONL file with 144 samples (75 token repeat) - 2430 max words - at ./dataset/shuffle-word-2430-count.jsonl\n", + "Generated a single JSONL file with 118 samples (75 token repeat) - 2600 max words - at ./dataset/shuffle-word-2600-count.jsonl\n", + "Generated JSONL file with - 2630 max words, 75 samples - at ./dataset/gen-word-2630-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2610 max words - at ./dataset/shuffle-word-2610-count.jsonl\n", + "Generated JSONL file with - 2540 max words, 75 samples - at ./dataset/gen-word-2540-count.jsonl\n", + "Generated JSONL file with - 2480 max words, 75 samples - at ./dataset/gen-word-2480-count.jsonl\n", + "Generated JSONL file with - 2600 max words, 75 samples - at ./dataset/gen-word-2600-count.jsonl\n", + "Generated JSONL file with - 2610 max words, 75 samples - at ./dataset/gen-word-2610-count.jsonl\n", + "Generated JSONL file with - 2510 max words, 75 samples - at ./dataset/gen-word-2510-count.jsonl\n", + "Generated a single JSONL file with 89 samples (75 token repeat) - 2620 max words - at ./dataset/shuffle-word-2620-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2990 max words - at ./dataset/shuffle-word-2990-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2900 max words - at ./dataset/shuffle-word-2900-count.jsonl\n", + "Generated a single JSONL file with 142 samples (75 token repeat) - 2490 max words - at ./dataset/shuffle-word-2490-count.jsonl\n", + "Generated a single JSONL file with 87 samples (75 token repeat) - 2640 max words - at ./dataset/shuffle-word-2640-count.jsonl\n", + "Generated a single JSONL file with 90 samples (75 token repeat) - 2670 max words - at ./dataset/shuffle-word-2670-count.jsonl\n", + "Generated JSONL file with - 2620 max words, 75 samples - at ./dataset/gen-word-2620-count.jsonl\n", + "Generated a single JSONL file with 108 samples (75 token repeat) - 2510 max words - at ./dataset/shuffle-word-2510-count.jsonl\n", + "Generated a single JSONL file with 80 samples (75 token repeat) - 2650 max words - at ./dataset/shuffle-word-2650-count.jsonl\n", + "Generated JSONL file with - 2500 max words, 75 samples - at ./dataset/gen-word-2500-count.jsonl\n", + "Generated a single JSONL file with 113 samples (75 token repeat) - 2550 max words - at ./dataset/shuffle-word-2550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3125 max words - at ./dataset/shuffle-word-3125-count.jsonl\n", + "Generated JSONL file with - 2820 max words, 75 samples - at ./dataset/gen-word-2820-count.jsonl\n", + "Generated JSONL file with - 2680 max words, 75 samples - at ./dataset/gen-word-2680-count.jsonl\n", + "Generated a single JSONL file with 80 samples (75 token repeat) - 2740 max words - at ./dataset/shuffle-word-2740-count.jsonl\n", + "Generated JSONL file with - 2760 max words, 75 samples - at ./dataset/gen-word-2760-count.jsonl\n", + "Generated JSONL file with - 2550 max words, 75 samples - at ./dataset/gen-word-2550-count.jsonl\n", + "Generated JSONL file with - 2640 max words, 75 samples - at ./dataset/gen-word-2640-count.jsonl\n", + "Generated a single JSONL file with 137 samples (75 token repeat) - 2500 max words - at ./dataset/shuffle-word-2500-count.jsonl\n", + "Generated JSONL file with - 2660 max words, 75 samples - at ./dataset/gen-word-2660-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2810 max words - at ./dataset/shuffle-word-2810-count.jsonl\n", + "Generated JSONL file with - 2520 max words, 75 samples - at ./dataset/gen-word-2520-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2890 max words - at ./dataset/shuffle-word-2890-count.jsonl\n", + "Generated JSONL file with - 2720 max words, 75 samples - at ./dataset/gen-word-2720-count.jsonl\n", + "Generated JSONL file with - 2830 max words, 75 samples - at ./dataset/gen-word-2830-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2820 max words - at ./dataset/shuffle-word-2820-count.jsonl\n", + "Generated JSONL file with - 2770 max words, 75 samples - at ./dataset/gen-word-2770-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4775 max words - at ./dataset/shuffle-word-4775-count.jsonl\n", + "Generated JSONL file with - 2690 max words, 75 samples - at ./dataset/gen-word-2690-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2760 max words - at ./dataset/shuffle-word-2760-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2960 max words - at ./dataset/shuffle-word-2960-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2830 max words - at ./dataset/shuffle-word-2830-count.jsonl\n", + "Generated JSONL file with - 3000 max words, 75 samples - at ./dataset/gen-word-3000-count.jsonl\n", + "Generated JSONL file with - 2950 max words, 75 samples - at ./dataset/gen-word-2950-count.jsonl\n", + "Generated JSONL file with - 2910 max words, 75 samples - at ./dataset/gen-word-2910-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2920 max words - at ./dataset/shuffle-word-2920-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2950 max words - at ./dataset/shuffle-word-2950-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2800 max words - at ./dataset/shuffle-word-2800-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2910 max words - at ./dataset/shuffle-word-2910-count.jsonl\n", + "Generated JSONL file with - 2860 max words, 75 samples - at ./dataset/gen-word-2860-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3225 max words - at ./dataset/shuffle-word-3225-count.jsonl\n", + "Generated a single JSONL file with 76 samples (75 token repeat) - 2750 max words - at ./dataset/shuffle-word-2750-count.jsonl\n", + "Generated a single JSONL file with 112 samples (75 token repeat) - 2540 max words - at ./dataset/shuffle-word-2540-count.jsonl\n", + "Generated JSONL file with - 2850 max words, 75 samples - at ./dataset/gen-word-2850-count.jsonl\n", + "Generated JSONL file with - 2890 max words, 75 samples - at ./dataset/gen-word-2890-count.jsonl\n", + "Generated JSONL file with - 2960 max words, 75 samples - at ./dataset/gen-word-2960-count.jsonl\n", + "Generated JSONL file with - 2900 max words, 75 samples - at ./dataset/gen-word-2900-count.jsonl\n", + "Generated JSONL file with - 2730 max words, 75 samples - at ./dataset/gen-word-2730-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4100 max words - at ./dataset/shuffle-word-4100-count.jsonl\n", + "Generated JSONL file with - 4050 max words, 100 samples - at ./dataset/gen-word-4050-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2870 max words - at ./dataset/shuffle-word-2870-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5025 max words - at ./dataset/shuffle-word-5025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3025 max words - at ./dataset/shuffle-word-3025-count.jsonl\n", + "Generated JSONL file with - 2780 max words, 75 samples - at ./dataset/gen-word-2780-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3050 max words - at ./dataset/shuffle-word-3050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3475 max words - at ./dataset/shuffle-word-3475-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2980 max words - at ./dataset/shuffle-word-2980-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2930 max words - at ./dataset/shuffle-word-2930-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2770 max words - at ./dataset/shuffle-word-2770-count.jsonl\n", + "Generated JSONL file with - 2670 max words, 75 samples - at ./dataset/gen-word-2670-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2850 max words - at ./dataset/shuffle-word-2850-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2970 max words - at ./dataset/shuffle-word-2970-count.jsonl\n", + "Generated JSONL file with - 2840 max words, 75 samples - at ./dataset/gen-word-2840-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5150 max words - at ./dataset/shuffle-word-5150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3075 max words - at ./dataset/shuffle-word-3075-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2940 max words - at ./dataset/shuffle-word-2940-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3200 max words - at ./dataset/shuffle-word-3200-count.jsonl\n", + "Generated JSONL file with - 3100 max words, 100 samples - at ./dataset/gen-word-3100-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2880 max words - at ./dataset/shuffle-word-2880-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2790 max words - at ./dataset/shuffle-word-2790-count.jsonl\n", + "Generated a single JSONL file with 87 samples (75 token repeat) - 2660 max words - at ./dataset/shuffle-word-2660-count.jsonl\n", + "Generated JSONL file with - 2880 max words, 75 samples - at ./dataset/gen-word-2880-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3150 max words - at ./dataset/shuffle-word-3150-count.jsonl\n", + "Generated a single JSONL file with 80 samples (75 token repeat) - 2710 max words - at ./dataset/shuffle-word-2710-count.jsonl\n", + "Generated JSONL file with - 2920 max words, 75 samples - at ./dataset/gen-word-2920-count.jsonl\n", + "Generated a single JSONL file with 77 samples (75 token repeat) - 2780 max words - at ./dataset/shuffle-word-2780-count.jsonl\n", + "Generated a single JSONL file with 75 samples (75 token repeat) - 2840 max words - at ./dataset/shuffle-word-2840-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3275 max words - at ./dataset/shuffle-word-3275-count.jsonl\n", + "Generated JSONL file with - 2870 max words, 75 samples - at ./dataset/gen-word-2870-count.jsonl\n", + "Generated a single JSONL file with 78 samples (75 token repeat) - 2720 max words - at ./dataset/shuffle-word-2720-count.jsonl\n", + "Generated JSONL file with - 3200 max words, 100 samples - at ./dataset/gen-word-3200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3400 max words - at ./dataset/shuffle-word-3400-count.jsonl\n", + "Generated JSONL file with - 3050 max words, 100 samples - at ./dataset/gen-word-3050-count.jsonl\n", + "Generated JSONL file with - 2700 max words, 75 samples - at ./dataset/gen-word-2700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3100 max words - at ./dataset/shuffle-word-3100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3750 max words - at ./dataset/shuffle-word-3750-count.jsonl\n", + "Generated JSONL file with - 3025 max words, 100 samples - at ./dataset/gen-word-3025-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4675 max words - at ./dataset/shuffle-word-4675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3250 max words - at ./dataset/shuffle-word-3250-count.jsonl\n", + "Generated JSONL file with - 3075 max words, 100 samples - at ./dataset/gen-word-3075-count.jsonl\n", + "Generated JSONL file with - 3275 max words, 100 samples - at ./dataset/gen-word-3275-count.jsonl\n", + "Generated JSONL file with - 4225 max words, 100 samples - at ./dataset/gen-word-4225-count.jsonl\n", + "Generated JSONL file with - 3250 max words, 100 samples - at ./dataset/gen-word-3250-count.jsonl\n", + "Generated JSONL file with - 3225 max words, 100 samples - at ./dataset/gen-word-3225-count.jsonl\n", + "Generated JSONL file with - 3700 max words, 100 samples - at ./dataset/gen-word-3700-count.jsonl\n", + "Generated JSONL file with - 3425 max words, 100 samples - at ./dataset/gen-word-3425-count.jsonl\n", + "Generated JSONL file with - 3525 max words, 100 samples - at ./dataset/gen-word-3525-count.jsonl\n", + "Generated JSONL file with - 3675 max words, 100 samples - at ./dataset/gen-word-3675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3325 max words - at ./dataset/shuffle-word-3325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3525 max words - at ./dataset/shuffle-word-3525-count.jsonl\n", + "Generated JSONL file with - 5025 max words, 100 samples - at ./dataset/gen-word-5025-count.jsonl\n", + "Generated JSONL file with - 3375 max words, 100 samples - at ./dataset/gen-word-3375-count.jsonl\n", + "Generated JSONL file with - 3125 max words, 100 samples - at ./dataset/gen-word-3125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3925 max words - at ./dataset/shuffle-word-3925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4975 max words - at ./dataset/shuffle-word-4975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4525 max words - at ./dataset/shuffle-word-4525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3425 max words - at ./dataset/shuffle-word-3425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3500 max words - at ./dataset/shuffle-word-3500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4025 max words - at ./dataset/shuffle-word-4025-count.jsonl\n", + "Generated JSONL file with - 3500 max words, 100 samples - at ./dataset/gen-word-3500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3675 max words - at ./dataset/shuffle-word-3675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3375 max words - at ./dataset/shuffle-word-3375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3350 max words - at ./dataset/shuffle-word-3350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3550 max words - at ./dataset/shuffle-word-3550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3625 max words - at ./dataset/shuffle-word-3625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3650 max words - at ./dataset/shuffle-word-3650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5850 max words - at ./dataset/shuffle-word-5850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4425 max words - at ./dataset/shuffle-word-4425-count.jsonl\n", + "Generated JSONL file with - 3650 max words, 100 samples - at ./dataset/gen-word-3650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4350 max words - at ./dataset/shuffle-word-4350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3825 max words - at ./dataset/shuffle-word-3825-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4000 max words - at ./dataset/shuffle-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4050 max words - at ./dataset/shuffle-word-4050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4200 max words - at ./dataset/shuffle-word-4200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4075 max words - at ./dataset/shuffle-word-4075-count.jsonl\n", + "Generated JSONL file with - 3400 max words, 100 samples - at ./dataset/gen-word-3400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 6000 max words - at ./dataset/shuffle-word-6000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3700 max words - at ./dataset/shuffle-word-3700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3900 max words - at ./dataset/shuffle-word-3900-count.jsonl\n", + "Generated JSONL file with - 3575 max words, 100 samples - at ./dataset/gen-word-3575-count.jsonl\n", + "Generated JSONL file with - 3300 max words, 100 samples - at ./dataset/gen-word-3300-count.jsonl\n", + "Generated JSONL file with - 3350 max words, 100 samples - at ./dataset/gen-word-3350-count.jsonl\n", + "Generated JSONL file with - 3725 max words, 100 samples - at ./dataset/gen-word-3725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3725 max words - at ./dataset/shuffle-word-3725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4500 max words - at ./dataset/shuffle-word-4500-count.jsonl\n", + "Generated JSONL file with - 3550 max words, 100 samples - at ./dataset/gen-word-3550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3875 max words - at ./dataset/shuffle-word-3875-count.jsonl\n", + "Generated JSONL file with - 3625 max words, 100 samples - at ./dataset/gen-word-3625-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3600 max words - at ./dataset/shuffle-word-3600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4125 max words - at ./dataset/shuffle-word-4125-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3975 max words - at ./dataset/shuffle-word-3975-count.jsonl\n", + "Generated JSONL file with - 3325 max words, 100 samples - at ./dataset/gen-word-3325-count.jsonl\n", + "Generated JSONL file with - 3600 max words, 100 samples - at ./dataset/gen-word-3600-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4950 max words - at ./dataset/shuffle-word-4950-count.jsonl\n", + "Generated JSONL file with - 4150 max words, 100 samples - at ./dataset/gen-word-4150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5600 max words - at ./dataset/shuffle-word-5600-count.jsonl\n", + "Generated JSONL file with - 3775 max words, 100 samples - at ./dataset/gen-word-3775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3775 max words - at ./dataset/shuffle-word-3775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4150 max words - at ./dataset/shuffle-word-4150-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5750 max words - at ./dataset/shuffle-word-5750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3800 max words - at ./dataset/shuffle-word-3800-count.jsonl\n", + "Generated JSONL file with - 3975 max words, 100 samples - at ./dataset/gen-word-3975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4600 max words - at ./dataset/shuffle-word-4600-count.jsonl\n", + "Generated JSONL file with - 4350 max words, 100 samples - at ./dataset/gen-word-4350-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 3850 max words - at ./dataset/shuffle-word-3850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4850 max words - at ./dataset/shuffle-word-4850-count.jsonl\n", + "Generated JSONL file with - 4250 max words, 100 samples - at ./dataset/gen-word-4250-count.jsonl\n", + "Generated JSONL file with - 4975 max words, 100 samples - at ./dataset/gen-word-4975-count.jsonl\n", + "Generated JSONL file with - 5450 max words, 100 samples - at ./dataset/gen-word-5450-count.jsonl\n", + "Generated JSONL file with - 4900 max words, 100 samples - at ./dataset/gen-word-4900-count.jsonl\n", + "Generated JSONL file with - 4200 max words, 100 samples - at ./dataset/gen-word-4200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4300 max words - at ./dataset/shuffle-word-4300-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4375 max words - at ./dataset/shuffle-word-4375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4650 max words - at ./dataset/shuffle-word-4650-count.jsonl\n", + "Generated JSONL file with - 4375 max words, 100 samples - at ./dataset/gen-word-4375-count.jsonl\n", + "Generated JSONL file with - 3925 max words, 100 samples - at ./dataset/gen-word-3925-count.jsonl\n", + "Generated JSONL file with - 4025 max words, 100 samples - at ./dataset/gen-word-4025-count.jsonl\n", + "Generated JSONL file with - 3825 max words, 100 samples - at ./dataset/gen-word-3825-count.jsonl\n", + "Generated JSONL file with - 3875 max words, 100 samples - at ./dataset/gen-word-3875-count.jsonl\n", + "Generated JSONL file with - 3850 max words, 100 samples - at ./dataset/gen-word-3850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4625 max words - at ./dataset/shuffle-word-4625-count.jsonl\n", + "Generated JSONL file with - 4100 max words, 100 samples - at ./dataset/gen-word-4100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4725 max words - at ./dataset/shuffle-word-4725-count.jsonl\n", + "Generated JSONL file with - 4400 max words, 100 samples - at ./dataset/gen-word-4400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4550 max words - at ./dataset/shuffle-word-4550-count.jsonl\n", + "Generated JSONL file with - 3750 max words, 100 samples - at ./dataset/gen-word-3750-count.jsonl\n", + "Generated JSONL file with - 3800 max words, 100 samples - at ./dataset/gen-word-3800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4475 max words - at ./dataset/shuffle-word-4475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4325 max words - at ./dataset/shuffle-word-4325-count.jsonl\n", + "Generated JSONL file with - 4075 max words, 100 samples - at ./dataset/gen-word-4075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4575 max words - at ./dataset/shuffle-word-4575-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5125 max words - at ./dataset/shuffle-word-5125-count.jsonl\n", + "Generated JSONL file with - 4175 max words, 100 samples - at ./dataset/gen-word-4175-count.jsonl\n", + "Generated JSONL file with - 4825 max words, 100 samples - at ./dataset/gen-word-4825-count.jsonl\n", + "Generated JSONL file with - 5425 max words, 100 samples - at ./dataset/gen-word-5425-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4400 max words - at ./dataset/shuffle-word-4400-count.jsonl\n", + "Generated JSONL file with - 4450 max words, 100 samples - at ./dataset/gen-word-4450-count.jsonl\n", + "Generated JSONL file with - 4125 max words, 100 samples - at ./dataset/gen-word-4125-count.jsonl\n", + "Generated JSONL file with - 4600 max words, 100 samples - at ./dataset/gen-word-4600-count.jsonl\n", + "Generated JSONL file with - 4625 max words, 100 samples - at ./dataset/gen-word-4625-count.jsonl\n", + "Generated JSONL file with - 4550 max words, 100 samples - at ./dataset/gen-word-4550-count.jsonl\n", + "Generated JSONL file with - 4325 max words, 100 samples - at ./dataset/gen-word-4325-count.jsonl\n", + "Generated JSONL file with - 4425 max words, 100 samples - at ./dataset/gen-word-4425-count.jsonl\n", + "Generated JSONL file with - 4850 max words, 100 samples - at ./dataset/gen-word-4850-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5550 max words - at ./dataset/shuffle-word-5550-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4925 max words - at ./dataset/shuffle-word-4925-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4825 max words - at ./dataset/shuffle-word-4825-count.jsonl\n", + "Generated JSONL file with - 4500 max words, 100 samples - at ./dataset/gen-word-4500-count.jsonl\n", + "Generated JSONL file with - 4000 max words, 100 samples - at ./dataset/gen-word-4000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5300 max words - at ./dataset/shuffle-word-5300-count.jsonl\n", + "Generated JSONL file with - 3950 max words, 100 samples - at ./dataset/gen-word-3950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4450 max words - at ./dataset/shuffle-word-4450-count.jsonl\n", + "Generated JSONL file with - 4675 max words, 100 samples - at ./dataset/gen-word-4675-count.jsonl\n", + "Generated JSONL file with - 4800 max words, 100 samples - at ./dataset/gen-word-4800-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5525 max words - at ./dataset/shuffle-word-5525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4750 max words - at ./dataset/shuffle-word-4750-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5000 max words - at ./dataset/shuffle-word-5000-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4250 max words - at ./dataset/shuffle-word-4250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5375 max words - at ./dataset/shuffle-word-5375-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5225 max words - at ./dataset/shuffle-word-5225-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5050 max words - at ./dataset/shuffle-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5675 max words - at ./dataset/shuffle-word-5675-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4875 max words - at ./dataset/shuffle-word-4875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5250 max words - at ./dataset/shuffle-word-5250-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5700 max words - at ./dataset/shuffle-word-5700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5325 max words - at ./dataset/shuffle-word-5325-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5100 max words - at ./dataset/shuffle-word-5100-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5075 max words - at ./dataset/shuffle-word-5075-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4900 max words - at ./dataset/shuffle-word-4900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4700 max words - at ./dataset/shuffle-word-4700-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5175 max words - at ./dataset/shuffle-word-5175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5400 max words - at ./dataset/shuffle-word-5400-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5275 max words - at ./dataset/shuffle-word-5275-count.jsonl\n", + "Generated JSONL file with - 5050 max words, 100 samples - at ./dataset/gen-word-5050-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5725 max words - at ./dataset/shuffle-word-5725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 4225 max words - at ./dataset/shuffle-word-4225-count.jsonl\n", + "Generated JSONL file with - 4750 max words, 100 samples - at ./dataset/gen-word-4750-count.jsonl\n", + "Generated JSONL file with - 4525 max words, 100 samples - at ./dataset/gen-word-4525-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5575 max words - at ./dataset/shuffle-word-5575-count.jsonl\n", + "Generated JSONL file with - 4725 max words, 100 samples - at ./dataset/gen-word-4725-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5450 max words - at ./dataset/shuffle-word-5450-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5475 max words - at ./dataset/shuffle-word-5475-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5900 max words - at ./dataset/shuffle-word-5900-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5625 max words - at ./dataset/shuffle-word-5625-count.jsonl\n", + "Generated JSONL file with - 4275 max words, 100 samples - at ./dataset/gen-word-4275-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5925 max words - at ./dataset/shuffle-word-5925-count.jsonl\n", + "Generated JSONL file with - 5375 max words, 100 samples - at ./dataset/gen-word-5375-count.jsonl\n", + "Generated JSONL file with - 5175 max words, 100 samples - at ./dataset/gen-word-5175-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5950 max words - at ./dataset/shuffle-word-5950-count.jsonl\n", + "Generated JSONL file with - 4700 max words, 100 samples - at ./dataset/gen-word-4700-count.jsonl\n", + "Generated JSONL file with - 4775 max words, 100 samples - at ./dataset/gen-word-4775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5500 max words - at ./dataset/shuffle-word-5500-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5975 max words - at ./dataset/shuffle-word-5975-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5775 max words - at ./dataset/shuffle-word-5775-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5200 max words - at ./dataset/shuffle-word-5200-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5875 max words - at ./dataset/shuffle-word-5875-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5650 max words - at ./dataset/shuffle-word-5650-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5800 max words - at ./dataset/shuffle-word-5800-count.jsonl\n", + "Generated JSONL file with - 4950 max words, 100 samples - at ./dataset/gen-word-4950-count.jsonl\n", + "Generated a single JSONL file with 100 samples (100 token repeat) - 5425 max words - at ./dataset/shuffle-word-5425-count.jsonl\n", + "Generated JSONL file with - 5300 max words, 100 samples - at ./dataset/gen-word-5300-count.jsonl\n", + "Generated JSONL file with - 5000 max words, 100 samples - at ./dataset/gen-word-5000-count.jsonl\n", + "Generated JSONL file with - 5075 max words, 100 samples - at ./dataset/gen-word-5075-count.jsonl\n", + "Generated JSONL file with - 5325 max words, 100 samples - at ./dataset/gen-word-5325-count.jsonl\n", + "Generated JSONL file with - 5250 max words, 100 samples - at ./dataset/gen-word-5250-count.jsonl\n", + "Generated JSONL file with - 4575 max words, 100 samples - at ./dataset/gen-word-4575-count.jsonl\n", + "Generated JSONL file with - 5400 max words, 100 samples - at ./dataset/gen-word-5400-count.jsonl\n", + "Generated JSONL file with - 5150 max words, 100 samples - at ./dataset/gen-word-5150-count.jsonl\n", + "Generated JSONL file with - 5875 max words, 100 samples - at ./dataset/gen-word-5875-count.jsonl\n", + "Generated JSONL file with - 5125 max words, 100 samples - at ./dataset/gen-word-5125-count.jsonl\n", + "Generated JSONL file with - 5725 max words, 100 samples - at ./dataset/gen-word-5725-count.jsonl\n", + "Generated JSONL file with - 5350 max words, 100 samples - at ./dataset/gen-word-5350-count.jsonl\n", + "Generated JSONL file with - 5225 max words, 100 samples - at ./dataset/gen-word-5225-count.jsonl\n", + "Generated JSONL file with - 5525 max words, 100 samples - at ./dataset/gen-word-5525-count.jsonl\n", + "Generated JSONL file with - 5500 max words, 100 samples - at ./dataset/gen-word-5500-count.jsonl\n", + "Generated JSONL file with - 5275 max words, 100 samples - at ./dataset/gen-word-5275-count.jsonl\n", + "Generated JSONL file with - 5600 max words, 100 samples - at ./dataset/gen-word-5600-count.jsonl\n", + "Generated JSONL file with - 5650 max words, 100 samples - at ./dataset/gen-word-5650-count.jsonl\n", + "Generated JSONL file with - 5550 max words, 100 samples - at ./dataset/gen-word-5550-count.jsonl\n", + "Generated JSONL file with - 4925 max words, 100 samples - at ./dataset/gen-word-4925-count.jsonl\n", + "Generated JSONL file with - 5575 max words, 100 samples - at ./dataset/gen-word-5575-count.jsonl\n", + "Generated JSONL file with - 5950 max words, 100 samples - at ./dataset/gen-word-5950-count.jsonl\n", + "Generated JSONL file with - 5825 max words, 100 samples - at ./dataset/gen-word-5825-count.jsonl\n", + "Generated JSONL file with - 5925 max words, 100 samples - at ./dataset/gen-word-5925-count.jsonl\n", + "Generated JSONL file with - 5475 max words, 100 samples - at ./dataset/gen-word-5475-count.jsonl\n", + "Generated JSONL file with - 5975 max words, 100 samples - at ./dataset/gen-word-5975-count.jsonl\n", + "Generated JSONL file with - 5900 max words, 100 samples - at ./dataset/gen-word-5900-count.jsonl\n", + "Generated JSONL file with - 5850 max words, 100 samples - at ./dataset/gen-word-5850-count.jsonl\n", + "Generated JSONL file with - 5700 max words, 100 samples - at ./dataset/gen-word-5700-count.jsonl\n", + "Generated JSONL file with - 6000 max words, 100 samples - at ./dataset/gen-word-6000-count.jsonl\n", + "Generated JSONL file with - 5750 max words, 100 samples - at ./dataset/gen-word-5750-count.jsonl\n", + "Generated JSONL file with - 5800 max words, 100 samples - at ./dataset/gen-word-5800-count.jsonl\n", + "Generated JSONL file with - 5675 max words, 100 samples - at ./dataset/gen-word-5675-count.jsonl\n", + "## Done ##\n", + "total 1.8G\n", + "drwxr-xr-x 2 root root 40K Jan 23 20:09 .\n", + "drwxr-xr-x 5 root root 4.0K Jan 23 20:09 ..\n", + "-rw-r--r-- 1 root root 20K Jan 23 20:09 gen-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 106K Jan 23 20:09 gen-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 732K Jan 23 20:09 gen-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 746K Jan 23 20:09 gen-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 749K Jan 23 20:09 gen-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 757K Jan 23 20:09 gen-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 762K Jan 23 20:09 gen-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 785K Jan 23 20:09 gen-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 774K Jan 23 20:09 gen-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 787K Jan 23 20:09 gen-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 794K Jan 23 20:09 gen-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 809K Jan 23 20:09 gen-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 88K Jan 23 20:09 gen-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 805K Jan 23 20:09 gen-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 818K Jan 23 20:09 gen-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 817K Jan 23 20:09 gen-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 832K Jan 23 20:09 gen-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 844K Jan 23 20:09 gen-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 855K Jan 23 20:09 gen-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 852K Jan 23 20:09 gen-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 868K Jan 23 20:09 gen-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 871K Jan 23 20:09 gen-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 875K Jan 23 20:09 gen-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 93K Jan 23 20:09 gen-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 883K Jan 23 20:09 gen-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 881K Jan 23 20:09 gen-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 898K Jan 23 20:09 gen-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 902K Jan 23 20:09 gen-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 924K Jan 23 20:09 gen-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 916K Jan 23 20:09 gen-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 928K Jan 23 20:09 gen-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 939K Jan 23 20:09 gen-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 940K Jan 23 20:09 gen-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 956K Jan 23 20:09 gen-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 105K Jan 23 20:09 gen-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 945K Jan 23 20:09 gen-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 968K Jan 23 20:09 gen-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 968K Jan 23 20:09 gen-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 981K Jan 23 20:09 gen-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 981K Jan 23 20:09 gen-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 993K Jan 23 20:09 gen-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 1001K Jan 23 20:09 gen-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 1015K Jan 23 20:09 gen-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 1014K Jan 23 20:09 gen-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 1019K Jan 23 20:09 gen-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 109K Jan 23 20:09 gen-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 1023K Jan 23 20:09 gen-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 25K Jan 23 20:09 gen-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 116K Jan 23 20:09 gen-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.1M Jan 23 20:09 gen-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 127K Jan 23 20:09 gen-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.2M Jan 23 20:09 gen-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 133K Jan 23 20:09 gen-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 140K Jan 23 20:09 gen-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.3M Jan 23 20:09 gen-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 147K Jan 23 20:09 gen-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.4M Jan 23 20:09 gen-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 30K Jan 23 20:09 gen-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 154K Jan 23 20:09 gen-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 162K Jan 23 20:09 gen-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.5M Jan 23 20:09 gen-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 169K Jan 23 20:09 gen-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 1.6M Jan 23 20:09 gen-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 176K Jan 23 20:09 gen-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 1.7M Jan 23 20:09 gen-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 186K Jan 23 20:09 gen-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 34K Jan 23 20:09 gen-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 193K Jan 23 20:09 gen-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.8M Jan 23 20:09 gen-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 200K Jan 23 20:09 gen-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 gen-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 204K Jan 23 20:09 gen-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 213K Jan 23 20:09 gen-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 gen-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 219K Jan 23 20:09 gen-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 gen-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 40K Jan 23 20:09 gen-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 228K Jan 23 20:09 gen-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.2M Jan 23 20:09 gen-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 gen-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 gen-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 gen-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 239K Jan 23 20:09 gen-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 gen-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 gen-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 gen-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 244K Jan 23 20:09 gen-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 gen-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 247K Jan 23 20:09 gen-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 gen-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 gen-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 gen-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 20:09 gen-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 261K Jan 23 20:09 gen-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 20:09 gen-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 20:09 gen-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 3.3M Jan 23 20:09 gen-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 45K Jan 23 20:09 gen-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 265K Jan 23 20:09 gen-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 gen-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 273K Jan 23 20:09 gen-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 20:09 gen-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 20:09 gen-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 20:09 gen-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 3.5M Jan 23 20:09 gen-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 283K Jan 23 20:09 gen-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 gen-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 gen-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 gen-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 gen-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 287K Jan 23 20:09 gen-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 294K Jan 23 20:09 gen-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 3.7M Jan 23 20:09 gen-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 gen-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 gen-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 gen-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 49K Jan 23 20:09 gen-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 301K Jan 23 20:09 gen-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 gen-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 20:09 gen-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 20:09 gen-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 3.9M Jan 23 20:09 gen-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 304K Jan 23 20:09 gen-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 20:09 gen-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 20:09 gen-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 20:09 gen-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 4.0M Jan 23 20:09 gen-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 314K Jan 23 20:09 gen-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 319K Jan 23 20:09 gen-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 4.1M Jan 23 20:09 gen-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 gen-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 gen-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 gen-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 333K Jan 23 20:09 gen-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 gen-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 20:09 gen-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 20:09 gen-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 20:09 gen-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 54K Jan 23 20:09 gen-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 339K Jan 23 20:09 gen-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 4.3M Jan 23 20:09 gen-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 348K Jan 23 20:09 gen-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 4.4M Jan 23 20:09 gen-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 20:09 gen-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 20:09 gen-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 355K Jan 23 20:09 gen-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 20:09 gen-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 4.5M Jan 23 20:09 gen-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 20:09 gen-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 20:09 gen-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 359K Jan 23 20:09 gen-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 20:09 gen-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 20:09 gen-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 4.6M Jan 23 20:09 gen-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 20:09 gen-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 358K Jan 23 20:09 gen-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 20:09 gen-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 4.7M Jan 23 20:09 gen-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 20:09 gen-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 20:09 gen-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 15K Jan 23 20:09 gen-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 60K Jan 23 20:09 gen-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 372K Jan 23 20:09 gen-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 20:09 gen-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 4.8M Jan 23 20:09 gen-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 20:09 gen-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 20:09 gen-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 385K Jan 23 20:09 gen-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 20:09 gen-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 4.9M Jan 23 20:09 gen-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 gen-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 gen-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 389K Jan 23 20:09 gen-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 gen-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 gen-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 391K Jan 23 20:09 gen-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 5.1M Jan 23 20:09 gen-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 398K Jan 23 20:09 gen-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 5.2M Jan 23 20:09 gen-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 63K Jan 23 20:09 gen-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 411K Jan 23 20:09 gen-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 20:09 gen-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 20:09 gen-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 5.3M Jan 23 20:09 gen-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 423K Jan 23 20:09 gen-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 5.4M Jan 23 20:09 gen-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 423K Jan 23 20:09 gen-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 433K Jan 23 20:09 gen-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 5.5M Jan 23 20:09 gen-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 20:09 gen-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 20:09 gen-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 20:09 gen-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 435K Jan 23 20:09 gen-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 5.6M Jan 23 20:09 gen-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 20:09 gen-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 5.7M Jan 23 20:09 gen-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 5.8M Jan 23 20:09 gen-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 70K Jan 23 20:09 gen-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 445K Jan 23 20:09 gen-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 5.8M Jan 23 20:09 gen-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 456K Jan 23 20:09 gen-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 455K Jan 23 20:09 gen-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 468K Jan 23 20:09 gen-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 474K Jan 23 20:09 gen-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 71K Jan 23 20:09 gen-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 486K Jan 23 20:09 gen-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 492K Jan 23 20:09 gen-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 492K Jan 23 20:09 gen-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 509K Jan 23 20:09 gen-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 507K Jan 23 20:09 gen-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 78K Jan 23 20:09 gen-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 518K Jan 23 20:09 gen-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 524K Jan 23 20:09 gen-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 530K Jan 23 20:09 gen-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 534K Jan 23 20:09 gen-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 552K Jan 23 20:09 gen-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 82K Jan 23 20:09 gen-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 555K Jan 23 20:09 gen-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 566K Jan 23 20:09 gen-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 575K Jan 23 20:09 gen-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 571K Jan 23 20:09 gen-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 581K Jan 23 20:09 gen-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 87K Jan 23 20:09 gen-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 593K Jan 23 20:09 gen-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 595K Jan 23 20:09 gen-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 601K Jan 23 20:09 gen-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 614K Jan 23 20:09 gen-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 623K Jan 23 20:09 gen-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 93K Jan 23 20:09 gen-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 631K Jan 23 20:09 gen-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 628K Jan 23 20:09 gen-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 645K Jan 23 20:09 gen-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 649K Jan 23 20:09 gen-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 656K Jan 23 20:09 gen-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 98K Jan 23 20:09 gen-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 667K Jan 23 20:09 gen-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 671K Jan 23 20:09 gen-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 680K Jan 23 20:09 gen-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 690K Jan 23 20:09 gen-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 692K Jan 23 20:09 gen-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 103K Jan 23 20:09 gen-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 694K Jan 23 20:09 gen-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 713K Jan 23 20:09 gen-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 714K Jan 23 20:09 gen-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 725K Jan 23 20:09 gen-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 732K Jan 23 20:09 gen-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 5.0M Jan 23 20:09 shuffle-word-10-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 20:09 shuffle-word-100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1040-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 shuffle-word-110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 shuffle-word-120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1260-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.1M Jan 23 20:09 shuffle-word-130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1370-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1380-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1430-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1460-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1490-count.jsonl\n", + "-rw-r--r-- 1 root root 4.2M Jan 23 20:09 shuffle-word-15-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1610-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1640-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-1880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-1990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.8M Jan 23 20:09 shuffle-word-20-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2010-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2020-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2030-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2040-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2060-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2070-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2080-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2090-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2110-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2120-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2130-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2140-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2160-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2170-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2180-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2190-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2210-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2220-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2260-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2270-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2280-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2290-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-230-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2350-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-240-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2400-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2410-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2430-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2440-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-2460-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2470-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2480-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2490-count.jsonl\n", + "-rw-r--r-- 1 root root 3.6M Jan 23 20:09 shuffle-word-25-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-250-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2500-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2510-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2520-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2530-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2540-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2550-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2560-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2570-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2580-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-260-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2600-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2610-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2620-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2630-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2640-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2650-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2660-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2670-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2680-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-270-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2700-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2710-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2720-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2730-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2740-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2750-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2760-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2770-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2780-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-280-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2800-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2810-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2820-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2830-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2840-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2850-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2860-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2870-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2880-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-290-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2900-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2910-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2920-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2930-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2940-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2950-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2960-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2970-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2980-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-2990-count.jsonl\n", + "-rw-r--r-- 1 root root 3.4M Jan 23 20:09 shuffle-word-30-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-300-count.jsonl\n", + "-rw-r--r-- 1 root root 1.9M Jan 23 20:09 shuffle-word-3000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-310-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-320-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-330-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-340-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 shuffle-word-35-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-360-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-370-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-380-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-390-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-3975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.2M Jan 23 20:09 shuffle-word-40-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-410-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-420-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-430-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-440-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 shuffle-word-45-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-460-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-470-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-480-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-490-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-4975-count.jsonl\n", + "-rw-r--r-- 1 root root 8.0M Jan 23 20:09 shuffle-word-5-count.jsonl\n", + "-rw-r--r-- 1 root root 3.1M Jan 23 20:09 shuffle-word-50-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5025-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5050-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5075-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-510-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5100-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5125-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5150-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5175-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-520-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5200-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5225-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5250-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5275-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-530-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5300-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5325-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5350-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5375-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-540-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5400-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5425-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5450-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5475-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 shuffle-word-55-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5500-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5525-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5550-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5575-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-560-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5625-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5675-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-570-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5725-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5775-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-580-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5825-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5875-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-590-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5925-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-5975-count.jsonl\n", + "-rw-r--r-- 1 root root 3.0M Jan 23 20:09 shuffle-word-60-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-600-count.jsonl\n", + "-rw-r--r-- 1 root root 2.5M Jan 23 20:09 shuffle-word-6000-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-610-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-620-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-630-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-640-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-65-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-650-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-660-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-670-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-680-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-690-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-70-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-700-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-710-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-720-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-730-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-740-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-75-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-750-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-760-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-770-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-780-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-790-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-80-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-800-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-810-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-820-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-830-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-840-count.jsonl\n", + "-rw-r--r-- 1 root root 2.9M Jan 23 20:09 shuffle-word-85-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-850-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-860-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-870-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-880-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-890-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 20:09 shuffle-word-90-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-900-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-910-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-920-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-930-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-940-count.jsonl\n", + "-rw-r--r-- 1 root root 2.8M Jan 23 20:09 shuffle-word-95-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-950-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-960-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-970-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-980-count.jsonl\n", + "-rw-r--r-- 1 root root 2.0M Jan 23 20:09 shuffle-word-990-count.jsonl\n", + "-rw-r--r-- 1 root root 12K Jan 23 20:09 word-2-count.jsonl\n", + "-rw-r--r-- 1 root root 15K Jan 23 20:09 word-4-count.jsonl\n" + ] + } + ], + "source": [ + "%%script bash\n", + "\n", + "########################################\n", + "# Generate the required jsonl dataset\n", + "########################################\n", + "\n", + "# Reset the dataset dir\n", + "mkdir -p ./dataset\n", + "rm -rf ./dataset/*.jsonl\n", + "\n", + "# Generate the various datasets\n", + "echo \"## Generating word reptition dataset ##\"\n", + "\n", + "#\n", + "# Training set for <= 100 words\n", + "# This is used to fill up as much blanks as possible\n", + "#\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-2-count.jsonl 2 100 &\n", + "python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/word-4-count.jsonl 4 100 &\n", + "for i in {5..100..5} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 100+ - 3000 words dataset\n", + "# \n", + "for i in {110..3000..10} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 75 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 75 & \n", + "done\n", + "\n", + "#\n", + "# Ramping up the 3000+ - 400 words dataset\n", + "# \n", + "for i in {3025..6000..25} \n", + "do\n", + " python ./memory_script/gen_limited_prompt_completion_jsonl.py ./dataset/gen-word-$i-count.jsonl $i 100 & \n", + " python ./memory_script/shuffle_limited_prompt_completion_jsonl.py ./dataset/shuffle-word-$i-count.jsonl $i 100 & \n", + "done\n", + "\n", + "wait\n", + "echo \"## Done ##\"\n", + "\n", + "ls -alh ./dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e66e145e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resolving data files: 100%|███████████████| 862/862 [00:00<00:00, 148632.68it/s]\n", + "Filter (num_proc=96): 16%|▊ | 48773/312109 [00:25<02:19, 1882.75 examples/s]\n", + "Map (num_proc=96): 100%|██████| 307741/307741 [00:05<00:00, 54820.73 examples/s]\n", + "Map (num_proc=96): 100%|███████| 307741/307741 [00:42<00:00, 7168.62 examples/s]\n", + "Map (num_proc=96): 100%|█████████| 36846/36846 [00:13<00:00, 2761.34 examples/s]\n", + "Saving the dataset (4/4 shards): 100%|█| 36846/36846 [00:01<00:00, 23444.71 exam\n", + "Saving the dataset (1/1 shards): 100%|█| 1547/1547 [00:00<00:00, 34203.75 exampl\n" + ] + } + ], + "source": [ + "# Lets pre tokenize the requried dataset\n", + "# and pack the data into 8k of length\n", + "#\n", + "# For the initial training, it seems to be better to do 4k chunks, batch size 16, with 8k datapacks\n", + "# Then to do 8k chunks, batchsize 8, with 16k datapacks. Why? I dun know.\n", + "#\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{NOTEBOOK_DIR}/stage-2-tune.yaml\"\n", + "\n", + "# Ensure the checkpoint directory exists\n", + "!cd \"{TRAINER_DIR}\" && mkdir -p \"../checkpoint/stage-2-memory-finetune/\"" + ] + }, + { + "cell_type": "markdown", + "id": "6413a747", + "metadata": {}, + "source": [ + "## Finetune 2 (2x2k -> 2x4k) : The actual tune!" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0bdba654", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-01-23 20:13:18,992] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-3B-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-3B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=8192'], args=['fit', '-c', '/workspace/RWKV-infctx-trainer/notebook/rwkv-x-exp/v5-exp/memory-test/stage-2-tune.yaml', '--model.load_model=../model/Memory-Tune-Stage-1-RWKV-v5-3B-world.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/stage-2-memory-finetune/RWKV-v5-3B-world.pth/', '--trainer.logger.init_args.name=[8xA100] RWKV-v5-3B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.microbatch_size=8', '--model.ctx_len=8192'].\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:40: No seed found, seed set to 3745190225\n", + "Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:558: `precision=bf16` is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + "\n", + "[RWKV.Trainer] Applying 'target_batch_size' with the following:\n", + " - target_batch_size: 256\n", + " - num_nodes: 1\n", + " - num_devices: 8\n", + " - microbatch_size: 8\n", + " - accumulate_grad_batches: 4\n", + " - effective_batch_size: 256\n", + "\n", + "[rank: 0] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n", + "[2024-01-23 20:13:51,892] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:51,961] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,008] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,062] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,079] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,080] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-01-23 20:13:52,114] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'\n", + "[rank: 1] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 3] Seed set to 3745190225\n", + "[rank: 2] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 7] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 6] Seed set to 3745190225\n", + "[rank: 4] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "[rank: 5] Seed set to 3745190225\n", + "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", + " return self.fget.__get__(instance, owner)()\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "---\n", + "[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/wkv5/build.ninja...\n", + "Building extension module wkv5...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module wkv5...\n", + "[RWKV.TimeMix] CUDA kernel compiled & loaded globally\n", + "---\n", + "[rank: 6] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n", + "[rank: 4] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n", + "[rank: 7] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n", + "[rank: 3] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n", + "[rank: 1] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n", + "[rank: 2] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n", + "[rank: 5] Seed set to 3745190225\n", + "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n", + "Enabling DeepSpeed BF16. Model parameters and inputs will be cast to `bfloat16`.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.16.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20240123_201446-ldvjsc3w\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[8xA100] RWKV-v5-3B-World - Mem-Finetune-2 (bs=256, train-ctx=8192, deepspeed_stage_1)\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-Memory-Experiment/runs/ldvjsc3w\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "#\n", + "# RWKV lighting_trainer.py important notes \n", + "# https://github.com/RWKV/RWKV-infctx-trainer \n", + "#\n", + "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\n", + "#LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "\n", + "[RWKV.model] Configuring optimizer with\n", + " - lr_init: 2.000e-04 (0.0002)\n", + " - lr_final: 1.000e-04 (0.0001)\n", + "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", + "Detected CUDA files, patching ldflags\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n", + "Building extension module fused_adam...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "ninja: no work to do.\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.014776945114135742 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...Loading extension module fused_adam...\n", + "\n", + "Loading extension module fused_adam...\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.10196971893310547 seconds\n", + "Time to load fused_adam op: 0.1021115779876709 seconds\n", + "Time to load fused_adam op: 0.10188078880310059 seconds\n", + "Time to load fused_adam op: 0.10222482681274414 secondsTime to load fused_adam op: 0.10220718383789062 seconds\n", + "\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Time to load fused_adam op: 0.10234403610229492 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading `train_dataloader` to estimate number of stepping batches.\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "Loading extension module fused_adam...\n", + "Time to load fused_adam op: 0.1023564338684082 seconds\n", + "/usr/local/lib/python3.10/dist-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:83.)\n", + " self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------------\n", + "0 | emb | Embedding | 167 M \n", + "1 | blocks | ModuleList | 2.7 B \n", + "2 | ln_out | LayerNorm | 5.1 K \n", + "3 | head | Linear | 167 M \n", + "--------------------------------------\n", + "3.1 B Trainable params\n", + "0 Non-trainable params\n", + "3.1 B Total params\n", + "12,251.996Total estimated model params size (MB)\n", + "Epoch 0: 3%| | 18/576 [03:31<1:49:28, 0.08it/s, v_num=sc3w, train/loss=0.0732" + ] + } + ], + "source": [ + "# Start the finetune model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{NOTEBOOK_DIR}/stage-2-tune.yaml\" \\\n", + " --model.load_model=\"../model/Memory-Tune-Stage-1-{MODEL_NAME}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/stage-2-memory-finetune/{MODEL_NAME}/\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Finetune-2 (bs=256, train-ctx=8192, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.microbatch_size=8 \\\n", + " --model.ctx_len=8192" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b479789d", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python export_checkpoint.py \\\n", + " \"../checkpoint/stage-2-memory-finetune/{MODEL_NAME}/last.ckpt\" \\\n", + " \"../model/Memory-Tune-Stage-2-{MODEL_NAME}\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/Memory-Tune-Stage-2-{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e248cba", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets do a memory eval!\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\"\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\" \"none\" 1000 4000\n", + "!python3 ./memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/Memory-Tune-Stage-2-{MODEL_NAME}\" \"none\" 4000 8000" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 6073.787154, + "end_time": "2024-01-23T13:12:24.967410", + "environment_variables": {}, + "exception": null, + "input_path": "./World-3B-mem-finetune.ipynb", + "output_path": "./World-3B-mem-finetune.output.ipynb", + "parameters": {}, + "start_time": "2024-01-23T11:31:11.180256", + "version": "2.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}