-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
76 lines (56 loc) · 2.48 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import tensorflow as tf
from transformers import GPT2Tokenizer, TFGPT2LMHeadModel
from transformers.modeling_tf_utils import get_initializer
# Using 2 cores
tf.config.threading.set_intra_op_parallelism_threads(2)
tf.config.threading.set_inter_op_parallelism_threads(2)
# Using a pretrained model GPT2
# We can also use other variants of GPT
print("Downloading model")
model = TFGPT2LMHeadModel.from_pretrained("gpt2")
# Load the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
# Load and preprocess text data
with open("data_input.txt", "r") as f:
lines = f.read().split("\n")
# Encoding the data using the tokenizer
# Tokenizer truncate the sequences to a max_length of 1024 tokens
input_ids = []
for line in lines:
encoding = tokenizer.encode(line, add_special_tokens=True, max_length=1024, truncation=True)
input_ids.append(encoding)
# Parameters
batch_size = 2
num_epochs = 10
learning_rate = 5e-5
# Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
# Loss function
lossfunction = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# Fine-tune the model using low-rank adaptation
for layer in model.transformer.h:
layer.attention_output_dense = tf.keras.layers.Dense(units=256, kernel_initializer=get_initializer(0.02), name="attention_output_dense")
model.summary()
# Train the model
for epoch in range(num_epochs):
print(f"Epoch {epoch + 1}/{num_epochs}")
for i in range(0, len(input_ids), batch_size):
batch = input_ids[i:i+batch_size]
# Padding the batch to maintain the same length
batch = tf.keras.preprocessing.sequence.pad_sequences(batch, padding="post")
inputs = batch[:, :-1]
targets = batch[:, 1:]
with tf.GradientTape() as tape:
logits = model(inputs)[0]
loss = lossfunction(targets, logits)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
if i % (20 * batch_size) == 0:
print(f"Batch {i}/{len(input_ids)} - loss: {loss:.4f}")
# Save the resultant output model
model.save_pretrained("tf_gpt2_keras_lora")
# Generate text
input_ids = tokenizer.encode("The generated sentence for the model is", return_tensors="tf")
output = model.generate(input_ids, max_length=100, do_sample=True, top_k=50, top_p=0.95, temperature=0.9, pad_token_id=tokenizer.eos_token_id)
print(tokenizer.decode(output[0], skip_special_tokens=True))