diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5ecb04b
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 konas122
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..502d5e4
--- /dev/null
+++ b/README.md
@@ -0,0 +1,45 @@
+# 声纹识别
+
+## python第三方库
+
+```
+python=3.8
+tensorboardX=2.6
+tensorboard=2.11.2
+scipy=1.4.1
+numpy=1.23.5
+librosa=0.9.2
+torch=1.8.1
+torchaudio=0.8.1
+torchvision=0.9.1               
+```
+
+​	
+## 训练
+​	运行 `train.py` 进行训练。
+
+​	该网络是在 `resnet18` 或 `vgg19` 的基础上再添加 LSTM 和线性层，从而实现声纹识别。
+​	该项目同时也保留了单用 CNN 的方法( `net_cnn.py` )来实现声纹识别，其实效果也不差。
+
+
+​	
+## 训练数据
+​	这是我所用的数据集：https://pan.baidu.com/s/1_KrjPB27AHPrBa_1AeMQSQ?pwd=0mag	提取码：0mag	
+
+​	当然，也可以用自己的数据集。只需在 `train.py` 的相同目录下创建 `data` 文件夹，并在 `data` 下创建子文件夹 `train`，然后将自己的训练数据放到 `train` 中。目前，这代码仅支持 `.wav` 格式的训练音频。
+
+​	
+
+### Acknowledge
+
+We study many useful projects in our codeing process, which includes:
+
+[clovaai/voxceleb_trainer](https://github.com/clovaai/voxceleb_trainer).
+
+[lawlict/ECAPA-TDNN](https://github.com/lawlict/ECAPA-TDNN/blob/master/ecapa_tdnn.py).
+
+[TaoRuijie/ECAPA-TDNN](https://github.com/TaoRuijie/ECAPA-TDNN)
+
+Thanks for these authors to open source their code!
+
+未完待续...
diff --git a/audio.py b/audio.py
new file mode 100644
index 0000000..30b0233
--- /dev/null
+++ b/audio.py
@@ -0,0 +1,155 @@
+import torch
+import random
+import librosa
+import numpy as np
+import librosa.display
+from scipy.signal import medfilt
+import matplotlib.pyplot as plt
+# import torchaudio.transforms as T
+
+
+path = '.\\voices'
+name = 'a001.wav'
+audio_filename = ".\\data\\test\\G2231\\T0055G2231S0076.wav"
+
+
+def noise_augmentation(samples, min_db=40, max_db=80):
+    samples = samples.copy()  # frombuffer()导致数据不可更改因此使用拷贝
+    data_type = samples[0].dtype
+    db = np.random.randint(low=min_db, high=max_db)
+    db *= 1e-6
+    noise = db * np.random.normal(0, 1, len(samples))  # 高斯分布
+    # print(db)
+    samples = samples + noise
+    samples = samples.astype(data_type)
+    return samples
+
+
+def add_noise(x, snr, method='vectorized', axis=0):
+    # Signal power
+    if method == 'vectorized':
+        N = x.size
+        Ps = np.sum(x ** 2 / N)
+    elif method == 'max_en':
+        N = x.shape[axis]
+        Ps = np.max(np.sum(x ** 2 / N, axis=axis))
+    elif method == 'axial':
+        N = x.shape[axis]
+        Ps = np.sum(x ** 2 / N, axis=axis)
+    else:
+        raise ValueError('method \"' + str(method) + '\" not recognized.')
+
+    Psdb = 10 * np.log10(Ps)        # Signal power, in dB
+    Pn = Psdb - snr         # Noise level necessary
+    n = np.sqrt(10 ** (Pn / 10)) * np.random.normal(0, 1, x.shape)      # Noise vector (or matrix)
+    return x + n
+
+
+def load_spectrogram(filename):
+    wav, fs = librosa.load(filename, sr=16000)
+    mag = librosa.feature.melspectrogram(y=wav, sr=16000, n_fft=512, n_mels=80,
+                                         win_length=400, hop_length=160)
+    mag = librosa.power_to_db(mag, ref=1.0, amin=1e-10, top_db=None)
+    librosa.display.specshow(mag, sr=16000, x_axis='time', y_axis='mel')  # 画mel谱图
+    plt.show()
+
+    return mag
+
+
+def audio_to_wav(filename, sr=16000, noise=False):
+    wav, fs = librosa.load(filename, sr=sr)
+
+    # wav1 = load_spectrogram(wav)
+    # t = T.MelSpectrogram(sample_rate=16000, n_fft=512, win_length=400, hop_length=160,
+    #                      f_min=20, f_max=7600, window_fn=torch.hamming_window, n_mels=80)
+    # wav2 = torch.from_numpy(wav)
+    # wav2 = t(wav2)
+
+    extended_wav = np.append(wav, wav)
+    if len(extended_wav) < 41000:
+        extended_wav = np.append(extended_wav, wav)
+    if noise:
+        extended_wav = add_noise(extended_wav, fs)
+    return extended_wav, fs
+
+
+def loadWAV(filename, noise=False):
+    y, sr = audio_to_wav(filename=filename, noise=noise)
+    assert len(y) >= 41000, f'Error: file {filename}\n'
+    num = random.randint(0, len(y) - 41000)
+    y = y[num:num + 41000]
+    y = torch.from_numpy(y).float()
+    return y
+
+
+def load_pure_wav(filename, frame_threshold=10, noise=False):
+    y, sr = audio_to_wav(filename=filename, noise=noise)
+    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=24, win_length=1024, hop_length=512, n_fft=1024)
+    Mfcc1 = medfilt(mfcc[0, :], 9)      # 对mfcc进行中值滤波
+    pic = Mfcc1
+    start = 0
+    end = 0
+    points = []
+    min_data = min(pic) * 0.9
+    for i in range((pic.shape[0])):
+        if pic[i] < min_data and start == 0:
+            start = i
+        if pic[i] < min_data and start != 0:
+            end = i
+        elif pic[i] > min_data and start != 0:
+            hh = [start, end]
+            points.append(hh)
+            start = 0
+    if pic[-1] < min_data and start != 0:       # 解决 文件的最后为静音
+        hh = [start, end]
+        points.append(hh)
+    distances = []
+    for i in range(len(points)):
+        two_ends = points[i]
+        distance = two_ends[1] - two_ends[0]
+        if distance > frame_threshold:
+            distances.append(points[i])
+
+    # out, _ = soundfile.read(filename)
+    # out = out.astype(np.float32)
+    if len(distances) == 0:     # 无静音段
+        return y
+    else:
+        silence_data = []
+        for i in range(len(distances)):
+            if i == 0:
+                start, end = distances[i]
+                if start == 1:
+                    internal_clean = y[0:0]
+                else:
+                    start = (start - 1) * 512   # 求取开始帧的开头
+                    # end = (end - 1) * 512 + 1024
+                    internal_clean = y[0:start - 1]
+            else:
+                _, end = distances[i - 1]
+                start, _ = distances[i]
+                start = (start - 1) * 512
+                end = (end - 1) * 512 + 1024    # 求取结束帧的结尾
+                internal_clean = y[end + 1:start]
+            # hhh = np.array(internal_clean)
+            silence_data.extend(internal_clean)
+        ll = len(distances)     # 结尾音频处理
+        _, end = distances[ll - 1]
+        end = (end - 1) * 512 + 1024
+        end_part_clean = y[end:len(y)]
+        silence_data.extend(end_part_clean)
+        y = silence_data
+        y = torch.from_numpy(np.array(y)).float()
+        return y
+
+
+if __name__ == '__main__':
+    a = load_pure_wav(audio_filename, noise=True)
+    print(a.shape, a.dtype)
+    _ = load_spectrogram(audio_filename)
+    # a = np.array([[[-11, -10, -9, -8],
+    #               [-7, -6, -5, -4],
+    #                [-3, -2, -1, 0]],
+    #              [[1, 2, 3, 4],
+    #               [5, 6, 7, 8],
+    #               [9, 10, 11, 12]]])
diff --git a/eval.py b/eval.py
new file mode 100644
index 0000000..bd222a4
--- /dev/null
+++ b/eval.py
@@ -0,0 +1,72 @@
+import torch
+import time
+from torch import nn
+# from d2l import torch as d2l
+
+
+class Timer:
+    def __init__(self):
+        self.times = []
+        self.tik = None
+        self.start()
+
+    def start(self):
+        self.tik = time.time()
+
+    def stop(self):
+        self.times.append(time.time() - self.tik)
+        return self.times[-1]
+
+    def avg(self):
+        return sum(self.times) / len(self.times)
+
+    def sum(self):
+        return sum(self.times)
+
+
+class Accumulator:
+    def __init__(self, n):
+        self.data = [0.0] * n
+
+    def add(self, *args):
+        self.data = [a + float(b) for a, b in zip(self.data, args)]
+
+    def reset(self):
+        self.data = [0.0] * len(self.data)
+
+    def __getitem__(self, idx):
+        return self.data[idx]
+
+
+def try_gpu(i=0):
+    if torch.cuda.device_count() >= i + 1:
+        return torch.device(f'cuda:{i}')
+    return torch.device('cpu')
+
+
+def accuracy(y_hat, y):
+    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
+        y_hat = y_hat.argmax(axis=1)
+    astype = lambda x, *args, **kwargs: x.type(*args, **kwargs)
+    cmp = astype(y_hat, y.dtype) == y
+    reduce_sum = lambda x, *args, **kwargs: x.sum(*args, **kwargs)
+    return float(reduce_sum(astype(cmp, y.dtype)))
+
+
+def evaluate_accuracy_gpu(net, data_iter, device=None):
+    if isinstance(net, nn.Module):
+        net.eval()
+        if not device:
+            device = next(iter(net.parameters())).device
+    metric = Accumulator(2)
+
+    with torch.no_grad():
+        for X, y in data_iter:
+            if isinstance(X, list):
+                X = [x.to(device) for x in X]
+            else:
+                X = X.to(device)
+            y = y.to(device)
+            size = lambda x, *args, **kwargs: x.numel(*args, **kwargs)
+            metric.add(accuracy(net(X), y), size(y))
+    return metric[0] / metric[1]
diff --git a/fine_tuning.py b/fine_tuning.py
new file mode 100644
index 0000000..d10cf73
--- /dev/null
+++ b/fine_tuning.py
@@ -0,0 +1,65 @@
+import torch
+import loader
+import train as t
+import eval as d2l
+# import torch_directml
+from loss import AAMSoftmax
+# from d2l import torch as d2l
+from tensorboardX import SummaryWriter
+from torch.utils.data import DataLoader
+from models.tdnn_pretrain import Pretrain_TDNN
+
+
+def load_model(path, output_num, device, not_grad=False):
+    load_net = torch.load(path, map_location=device)
+    model = Pretrain_TDNN(output_num, 1024, output_embedding=False, not_grad=not_grad)
+    model.speaker_encoder = load_net.speaker_encoder
+    del load_net
+    return model
+
+
+if __name__ == "__main__":
+    people_num, data_per_people = 420, 10
+    noise, mel, reverse = False, True, False
+    margin, scale, easy_margin = 0.2, 20, False
+    num_epochs, learn_rate, weight_decay = 40, 0.1, 1e-3
+    learn_rate_period, learn_rate_decay = 10, 0.95
+    mode, model_name = "train", "resnet18"
+    hidden_size, num_layers = 64, 2
+
+    # Device = torch_directml.device()
+    # prefetch_factor, batch_size, num_works, persistent = 2, 32, 8, False
+
+    Device = d2l.try_gpu()
+    if Device.type == 'cpu':
+        prefetch_factor, batch_size, num_works, persistent = 2, 8, 8, False
+    elif torch.cuda.is_available():
+        prefetch_factor, batch_size, num_works, persistent = 8, 256, 32, True
+    else:
+        prefetch_factor, batch_size, num_works, persistent = 2, 32, 8, False
+
+    t.init_logs()
+    train_dict, test_dict, people_num = loader.load_files(mode=mode, folder_num=people_num,
+                                                                   file_num=data_per_people, k=1)
+    train_dataset = loader.MyDataset(data_dict=train_dict, people_num=people_num, train=True,
+                                     mel=mel, noise=noise)
+    test_dataset = loader.MyDataset(data_dict=test_dict, people_num=people_num, train=False,
+                                    mel=mel, noise=noise)
+    print(len(train_dataset), len(test_dataset))
+    train_ = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True,
+                        drop_last=True, num_workers=num_works, pin_memory=True,
+                        persistent_workers=persistent, prefetch_factor=prefetch_factor)
+    test_ = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True,
+                       drop_last=True, num_workers=num_works, pin_memory=True,
+                       persistent_workers=persistent, prefetch_factor=prefetch_factor)
+
+    # pth_path = 'test.pth'
+    # model2 = load_model(pth_path, people_num, Device, not_grad=True)
+
+    model2 = Pretrain_TDNN(people_num, 1024, output_embedding=False, not_grad=False)
+    model2.load_parameters('param.model', Device)
+
+    loss = AAMSoftmax(192, people_num, margin, scale, easy_margin)
+    writer = SummaryWriter('./logs')
+    t.train(train_, test_, model2, loss, Device, writer, num_epochs, learn_rate, weight_decay)
+    model2.save_parameters('param2.model')
diff --git a/img/PR.jpg b/img/PR.jpg
new file mode 100644
index 0000000..22ad336
Binary files /dev/null and b/img/PR.jpg differ
diff --git a/img/ROC.jpg b/img/ROC.jpg
new file mode 100644
index 0000000..4c8278b
Binary files /dev/null and b/img/ROC.jpg differ
diff --git a/img/confusion_matrix.jpg b/img/confusion_matrix.jpg
new file mode 100644
index 0000000..d6d3a08
Binary files /dev/null and b/img/confusion_matrix.jpg differ
diff --git a/loader.py b/loader.py
new file mode 100644
index 0000000..5e44675
--- /dev/null
+++ b/loader.py
@@ -0,0 +1,160 @@
+import os
+import torch
+import audio
+import numpy as np
+import multiprocessing
+from joblib import Parallel, delayed
+from torch.utils.data import Dataset, DataLoader
+
+transcript_filename = ".\\data\\transcript.txt"
+test_path = ".\\data\\test"
+train_path = ".\\data\\train"
+dev_path = ".\\data\\dev"
+
+
+class MyDataset(Dataset):
+    def __init__(self, data_dict=None, people_num=None, train=True, mel=True, noise=False):
+        super(MyDataset, self).__init__()
+        self.noise = noise
+        self.mel = mel
+        self.train = train
+        self.data_dict = data_dict
+        self.spect = []
+        self.labels = []
+        if data_dict is None or people_num is None:
+            raise Exception(f'Error: data_dtc {data_dict} is empty\n')
+        else:
+            self.people_num = people_num
+            self._preprocess()
+
+    def _preprocess(self):
+        out = Parallel(n_jobs=multiprocessing.cpu_count())(delayed(self._audio)(key) for key in self.data_dict)
+        self.spect = [value for value, _ in out]
+        self.labels = [value for _, value in out]
+        self.labels = torch.from_numpy(np.array(self.labels)).long()
+
+    def _audio(self, key):
+        spec = audio.loadWAV(filename=key)
+        return spec, self.data_dict[key]
+
+    def __getitem__(self, item):
+        label = self.labels[item]
+        spec = self.spect[item]
+        return spec, label
+
+    def __len__(self):
+        return len(self.labels)
+
+
+def load_files(mode="train", folder_num=-1, file_num=-1, k=1.5):
+    path = ".\\data"
+    train, test = {}, {}
+    if mode == "train":
+        path = path + '\\train'
+    elif mode == "test":
+        path = path + '\\test'
+    elif mode == "dev":
+        path = path + "\\dev"
+    else:
+        raise Exception(f'Error: mode {mode} 不存在')
+    dirs = os.listdir(path)
+
+    if 0 < folder_num < len(dirs):
+        if mode == "train":
+            num = np.arange(folder_num)
+        else:
+            num = np.random.choice(len(dirs), folder_num, replace=False)
+    else:
+        num = np.arange(len(dirs))
+        folder_num = len(dirs)
+    if k <= 0 or k >= 9:
+        k = 1.5
+
+    count = 0
+    folder_path = []
+    for i in num:
+        file_path = dirs[i]
+        folder_path.append(file_path)
+        file_path = os.path.join(path, file_path)
+        tmp_files = os.listdir(file_path)
+        sub_files = [tmp_files[file] for file in range(len(tmp_files))
+                     if tmp_files[file][-4:] == ".wav"]
+
+        if file_num > len(sub_files):
+            file_num = len(sub_files)
+        elif file_num < 10:
+            file_num = 10
+        np.random.shuffle(sub_files)
+        train_num = int(file_num // (k + 1) * k + 1)
+        # test_num = file_num - train_num
+
+        for j in range(train_num):
+            wav_file = os.path.join(file_path, sub_files[j])
+            train[wav_file] = count
+        for j in range(train_num, file_num):
+            wav_file = os.path.join(file_path, sub_files[j])
+            test[wav_file] = count
+        count += 1
+    return train, test, folder_num
+
+
+class Vocabulary:
+    def __init__(self, word_to_id, id_to_word, vocab, token, sentence_max):
+        self.word_to_id = word_to_id
+        self.id_to_word = id_to_word
+        self.vocab = vocab
+        self.token = token
+        self.sentence_max = sentence_max
+
+
+def transcript_process(filename, token="word"):
+    id_to_word = {}
+    word_to_id = {}
+    vocab = {}
+    sentence_max = 0
+    f = open(filename, "r", encoding="utf-8")
+    for line in f.readlines():
+        text = line[16:]
+        index = line[:16]
+        vec = np.array([], dtype='int16')
+        text = text.replace("\n", "")
+        if token == "word":
+            words = text.split(' ')
+        elif token == "char":
+            words = text.replace(" ", "")
+        else:
+            raise Exception(f'Error: token {token} 不存在')
+        # print(words)
+        for word in words:
+            if sentence_max < len(words):
+                sentence_max = len(words)
+            if word not in word_to_id:
+                new_id = len(word_to_id)
+                word_to_id[word] = new_id
+                id_to_word[new_id] = word
+            vec = np.append(vec, word_to_id[word])
+            vocab[index] = vec
+    f.close()
+    vocabulary = Vocabulary(word_to_id, id_to_word, vocab, token, sentence_max)
+    return vocabulary
+
+
+if __name__ == '__main__':
+    # vocabulary = transcript_process(transcript_filename, token="word")
+    # print(vocabulary.vocab)
+    # print(vocabulary.sentence_max)
+    Reverse = False
+    train_dict, test_dict, number = load_files("train", 40, 20, 1.5)
+    # for i in train_dict.values():
+    #     print(i)
+    train_dataset = MyDataset(train_dict, number, True, True, False)
+    test_dataset = MyDataset(test_dict, number, False, True, False)
+    print(len(train_dataset), len(test_dataset))
+    train_iter = DataLoader(dataset=train_dataset, batch_size=6, shuffle=True, drop_last=True, num_workers=4)
+    print(len(train_iter))
+    a = None
+    for b, (x, y) in enumerate(train_iter):
+        if b == 0:
+            a = x
+        print(x.shape, y)
+    print(a[0].shape)
diff --git a/logs/acc/test_acc/events.out.tfevents.1680200803.Konas b/logs/acc/test_acc/events.out.tfevents.1680200803.Konas
new file mode 100644
index 0000000..f06cfb8
Binary files /dev/null and b/logs/acc/test_acc/events.out.tfevents.1680200803.Konas differ
diff --git a/logs/acc/test_acc/events.out.tfevents.1681549622.Konas b/logs/acc/test_acc/events.out.tfevents.1681549622.Konas
new file mode 100644
index 0000000..1f2fd05
Binary files /dev/null and b/logs/acc/test_acc/events.out.tfevents.1681549622.Konas differ
diff --git a/logs/acc/train_acc/events.out.tfevents.1678964030.Konas b/logs/acc/train_acc/events.out.tfevents.1678964030.Konas
new file mode 100644
index 0000000..caa0452
Binary files /dev/null and b/logs/acc/train_acc/events.out.tfevents.1678964030.Konas differ
diff --git a/logs/acc/train_acc/events.out.tfevents.1679050809.Konas b/logs/acc/train_acc/events.out.tfevents.1679050809.Konas
new file mode 100644
index 0000000..b08adf6
Binary files /dev/null and b/logs/acc/train_acc/events.out.tfevents.1679050809.Konas differ
diff --git a/logs/acc/train_acc/events.out.tfevents.1679832770.Konas b/logs/acc/train_acc/events.out.tfevents.1679832770.Konas
new file mode 100644
index 0000000..4612c74
Binary files /dev/null and b/logs/acc/train_acc/events.out.tfevents.1679832770.Konas differ
diff --git a/logs/acc/train_acc/events.out.tfevents.1679986062.Konas b/logs/acc/train_acc/events.out.tfevents.1679986062.Konas
new file mode 100644
index 0000000..7533348
Binary files /dev/null and b/logs/acc/train_acc/events.out.tfevents.1679986062.Konas differ
diff --git a/logs/acc/train_acc/events.out.tfevents.1680200803.Konas b/logs/acc/train_acc/events.out.tfevents.1680200803.Konas
new file mode 100644
index 0000000..a8a239d
Binary files /dev/null and b/logs/acc/train_acc/events.out.tfevents.1680200803.Konas differ
diff --git a/logs/acc/train_acc/events.out.tfevents.1681549622.Konas b/logs/acc/train_acc/events.out.tfevents.1681549622.Konas
new file mode 100644
index 0000000..da98fc9
Binary files /dev/null and b/logs/acc/train_acc/events.out.tfevents.1681549622.Konas differ
diff --git a/logs/events.out.tfevents.1680192878.Konas b/logs/events.out.tfevents.1680192878.Konas
new file mode 100644
index 0000000..792bc4c
Binary files /dev/null and b/logs/events.out.tfevents.1680192878.Konas differ
diff --git a/logs/events.out.tfevents.1680251006.Konas b/logs/events.out.tfevents.1680251006.Konas
new file mode 100644
index 0000000..6524766
Binary files /dev/null and b/logs/events.out.tfevents.1680251006.Konas differ
diff --git a/logs/events.out.tfevents.1681549581.Konas b/logs/events.out.tfevents.1681549581.Konas
new file mode 100644
index 0000000..2354615
Binary files /dev/null and b/logs/events.out.tfevents.1681549581.Konas differ
diff --git a/loss.py b/loss.py
new file mode 100644
index 0000000..078216c
--- /dev/null
+++ b/loss.py
@@ -0,0 +1,90 @@
+import math
+import torch
+import eval as d2l
+import torch.nn as nn
+# import torch.nn.functional as F
+
+
+def prec_accuracy(output, target, topk=(1,)):
+    mask = max(topk)
+    batch_size = target.size(0)
+
+    _, pred = output.topk(mask, 1, True, True)
+    pred = pred.t()
+    correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+    res = []
+    for k in topk:
+        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
+        res.append(correct_k.mul_(1 / batch_size))
+    return res
+
+
+def evaluate_accuracy_gpu(net, data_iter, device=None):
+    if isinstance(net, nn.Module):
+        net.eval()
+        if not device:
+            device = d2l.try_gpu()
+    metric = d2l.Accumulator(2)
+
+    with torch.no_grad():
+        for _, (X, y) in enumerate(data_iter):
+            if isinstance(X, list):
+                X = [x.to(device) for x in X]
+            else:
+                X = X.to(device)
+            y = y.to(device)
+            size = lambda x, *args, **kwargs: x.numel(*args, **kwargs)
+            phi = net(X)
+
+            one_hot = torch.zeros(phi.size(), device='cuda' if torch.cuda.is_available() else 'cpu')
+            one_hot.scatter_(1, y.view(-1, 1), 1)
+            output = (one_hot * phi) + ((1.0 - one_hot) * phi)
+            prec = prec_accuracy(output.detach(), y.detach(), topk=(1,))[0]
+            metric.add(prec * size(y), size(y))
+
+    return metric[0] / metric[1]
+
+
+class AAMSoftmax(nn.Module):
+    def __init__(self, nOut, nClasses, margin=0.2, scale=20, easy_margin=False):  # or margin=0.2, scale=30
+        super(AAMSoftmax, self).__init__()
+        self.test_normalize = True
+        self.m = margin
+        self.s = scale
+        self.in_feats = nOut
+        self.output_num = nClasses
+        # self.weight = torch.nn.Parameter(torch.FloatTensor(nClasses, nOut), requires_grad=True)
+        # nn.init.xavier_normal_(self.weight, gain=1)
+        self.ce = nn.CrossEntropyLoss()
+        self.easy_margin = easy_margin
+        self.cos_m = math.cos(self.m)
+        self.sin_m = math.sin(self.m)
+        # make the function cos(theta+m) monotonic decreasing while theta in [0°,180°]
+        self.th = math.cos(math.pi - self.m)
+        self.mm = math.sin(math.pi - self.m) * self.m
+        print('Initialised AAMSoftmax margin:%.3f scale:%.3f' % (self.m, self.s))
+
+    def forward(self, cosine, label):
+        assert cosine.size()[0] == label.size()[0]
+        assert cosine.size()[1] == self.output_num
+        # cos(theta)
+        # cosine = F.linear(F.normalize(cosine), F.normalize(self.weight))
+        # cos(theta + m)
+        sine = torch.sqrt((1.0 - torch.mul(cosine, cosine)).clamp(0, 1))
+        phi = cosine * self.cos_m - sine * self.sin_m
+
+        if self.easy_margin:
+            phi = torch.where(cosine > 0, phi, cosine)
+        else:
+            phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)
+
+        # one_hot = torch.zeros_like(cosine)
+        one_hot = torch.zeros(cosine.size(), device='cuda' if torch.cuda.is_available() else 'cpu')
+        one_hot.scatter_(1, label.view(-1, 1), 1)
+        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
+        output = output * self.s
+
+        loss = self.ce(output, label)
+        prec = prec_accuracy(output.detach(), label.detach(), topk=(1,))[0]
+        return loss, prec
diff --git a/models/tdnn.py b/models/tdnn.py
new file mode 100644
index 0000000..ab94c69
--- /dev/null
+++ b/models/tdnn.py
@@ -0,0 +1,177 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio.transforms as T
+from models.tdnn_module import PreEmphasis, FbankAug
+
+
+class Res2Conv1dReluBn(nn.Module):
+    def __init__(self, channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=True, scale=4):
+        super().__init__()
+        assert channels % scale == 0, "{} % {} != 0".format(channels, scale)
+        self.scale = scale
+        self.width = channels // scale
+        self.nums = scale if scale == 1 else scale - 1
+
+        self.convs = []
+        self.bns = []
+        for i in range(self.nums):
+            self.convs.append(nn.Conv1d(self.width, self.width, kernel_size, stride, padding, dilation, bias=bias))
+            self.bns.append(nn.BatchNorm1d(self.width))
+        self.convs = nn.ModuleList(self.convs)
+        self.bns = nn.ModuleList(self.bns)
+
+    def forward(self, x):
+        out = []
+        spx = torch.split(x, self.width, 1)
+        sp = None
+        for i in range(self.nums):
+            if i == 0:
+                sp = spx[i]
+            else:
+                sp = sp + spx[i]
+            # Order: conv -> relu -> bn
+            sp = self.convs[i](sp)
+            sp = self.bns[i](F.relu(sp))
+            out.append(sp)
+        if self.scale != 1:
+            out.append(spx[self.nums])
+        out = torch.cat(out, dim=1)
+        return out
+
+
+class Conv1dReluBn(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=True):
+        super().__init__()
+        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)
+        self.bn = nn.BatchNorm1d(out_channels)
+
+    def forward(self, x):
+        return self.bn(F.relu(self.conv(x)))
+
+
+class SE_Connect(nn.Module):
+    def __init__(self, channels, s=2):
+        super().__init__()
+        assert channels % s == 0, "{} % {} != 0".format(channels, s)
+        self.linear1 = nn.Linear(channels, channels // s)
+        self.linear2 = nn.Linear(channels // s, channels)
+
+    def forward(self, x):
+        out = x.mean(dim=2)
+        out = F.relu(self.linear1(out))
+        out = torch.sigmoid(self.linear2(out))
+        out = x * out.unsqueeze(2)
+        return out
+
+
+def SE_Res2Block(channels, kernel_size, stride, padding, dilation, scale):
+    return nn.Sequential(
+        Conv1dReluBn(channels, channels, kernel_size=1, stride=1, padding=0),
+        Res2Conv1dReluBn(channels, kernel_size, stride, padding, dilation, scale=scale),
+        Conv1dReluBn(channels, channels, kernel_size=1, stride=1, padding=0),
+        SE_Connect(channels)
+    )
+
+
+class AttentiveStatsPool(nn.Module):
+    def __init__(self, in_dim, bottleneck_dim, context):
+        super().__init__()
+        self.context = context
+        if self.context:
+            in_dims = in_dim * 3
+        else:
+            in_dims = in_dim
+        self.linear = nn.Sequential(
+            nn.Conv1d(in_dims, bottleneck_dim, kernel_size=1),
+            nn.Tanh(),
+            nn.BatchNorm1d(bottleneck_dim),
+            nn.Conv1d(bottleneck_dim, in_dim, kernel_size=1),
+            nn.Softmax(dim=2),
+        )
+
+    def forward(self, x):
+        t = x.size()[-1]
+        if self.context:
+            global_x = torch.cat(
+                (
+                    x,
+                    torch.mean(x, dim=2, keepdim=True).repeat(1, 1, t),
+                    torch.sqrt(torch.var(x, dim=2, keepdim=True).clamp(min=1e-4, max=1e4)).repeat(1, 1, t),
+                ),
+                dim=1,
+            )
+        else:
+            global_x = x
+        alpha = self.linear(global_x)
+        mean = torch.sum(alpha * x, dim=2)
+        residuals = torch.sum(alpha * x ** 2, dim=2) - mean ** 2
+        std = torch.sqrt(residuals.clamp(min=1e-9))
+        return torch.cat([mean, std], dim=1)
+
+
+class ECAPA_TDNN(nn.Module):
+    def __init__(self, in_channels=80, channels=512, embd_dim=192, output_num=10,
+                 context=True, aug=True, embedding=True):
+        super().__init__()
+        self.context = context
+        self.aug = aug
+        self.embedding = embedding
+        self.layer1 = Conv1dReluBn(in_channels, channels, kernel_size=5, padding=2)
+        self.layer2 = SE_Res2Block(channels, kernel_size=3, stride=1, padding=2, dilation=2, scale=8)
+        self.layer3 = SE_Res2Block(channels, kernel_size=3, stride=1, padding=3, dilation=3, scale=8)
+        self.layer4 = SE_Res2Block(channels, kernel_size=3, stride=1, padding=4, dilation=4, scale=8)
+
+        self.fbank = torch.nn.Sequential(
+            PreEmphasis(),
+            T.MelSpectrogram(sample_rate=16000, n_fft=512, win_length=400, hop_length=160,
+                                                 f_min=20, f_max=7600, window_fn=torch.hamming_window, n_mels=80)
+        )
+        self.specaug = FbankAug()  # Spec augmentation
+
+        cat_channels = channels * 3
+        self.conv = nn.Conv1d(cat_channels, 1536, kernel_size=1)
+        self.pooling = AttentiveStatsPool(1536, 128, self.context)
+        self.bn1 = nn.BatchNorm1d(3072)
+        self.linear = nn.Linear(3072, embd_dim)
+        self.bn2 = nn.BatchNorm1d(embd_dim)
+
+        self.weight = torch.nn.Parameter(torch.FloatTensor(output_num, embd_dim), requires_grad=True)
+        nn.init.xavier_normal_(self.weight, gain=1)
+
+    def forward(self, x):
+        with torch.no_grad():
+            x = self.fbank(x) + 1e-6
+            x = x.log()
+            x = x - torch.mean(x, dim=-1, keepdim=True)
+            if self.aug:
+                x = self.specaug(x)
+        out1 = self.layer1(x)
+        out2 = self.layer2(out1)
+        out3 = self.layer3(out1 + out2)
+        out4 = self.layer4(out1 + out2 + out3)
+
+        # out1 = self.layer1(x)
+        # out2 = self.layer2(out1) + out1
+        # out3 = self.layer3(out1 + out2) + out1 + out2
+        # out4 = self.layer4(out1 + out2 + out3) + out1 + out2 + out3
+
+        out = torch.cat([out2, out3, out4], dim=1)
+        out = F.relu(self.conv(out))
+        if out.shape[0] == 1:
+            out = self.linear(self.pooling(out))
+        else:
+            out = self.bn1(self.pooling(out))
+            out = self.bn2(self.linear(out))
+
+        if not self.embedding:
+            return F.linear(F.normalize(out), F.normalize(self.weight))
+        return out
+
+
+if __name__ == '__main__':
+    X = torch.zeros(2, 90000)
+    model = ECAPA_TDNN(in_channels=80, channels=512, embd_dim=192, output_num=10, context=True, embedding=False)
+    output = model(X)
+    # print(model)
+    print(output.shape)  # [2, 192] or [2, output_num]
diff --git a/models/tdnn_l.py b/models/tdnn_l.py
new file mode 100644
index 0000000..54fbf54
--- /dev/null
+++ b/models/tdnn_l.py
@@ -0,0 +1,57 @@
+import torch
+import eval as d2l
+import torch.nn as nn
+# import torch.nn.functional as F
+from models.tdnn_module import ECAPA_TDNN
+
+
+class ECAPAModel(nn.Module):
+    def __init__(self, n_class, C=1024, output_embedding=True, not_grad=False):
+        super(ECAPAModel, self).__init__()
+        self.in_features = 192
+        self.output_num = n_class
+        self.output_embedding = output_embedding
+        self.speaker_encoder = ECAPA_TDNN(C=C)
+        self.fc = nn.Linear(192, self.output_num)
+        if not not_grad:
+            for param in self.speaker_encoder.parameters():
+                param.requires_grad = True
+        else:
+            for param in self.speaker_encoder.parameters():
+                param.requires_grad = False
+
+    def forward(self, x, aug=True):
+        out = self.speaker_encoder(x, aug=aug)
+        if not self.output_embedding:
+            return self.fc(out)
+        else:
+            return out
+
+    def save_parameters(self, path):
+        torch.save(self.state_dict(), path)
+
+    def load_parameters(self, path, device):
+        self_state = self.state_dict()
+        loaded_state = torch.load(path, map_location=device)
+        for name, param in loaded_state.items():
+            origname = name
+            if name not in self_state:
+                name = name.replace("module.", "")
+                if name not in self_state:
+                    print("%s is not in the model." % origname)
+                    continue
+            if self_state[name].size() != loaded_state[origname].size():
+                print("Wrong parameter length: %s, model: %s, loaded: %s" % (
+                    origname, self_state[name].size(), loaded_state[origname].size()))
+                continue
+            self_state[name].copy_(param)
+
+
+if __name__ == '__main__':
+    net = ECAPAModel(100, 1024, False)
+    net.load_parameters("../pretrain.model", d2l.try_gpu())
+    X = torch.zeros(2, 90000)
+    output = net(X)
+    print(output.shape)
+    # parameters = torch.load("../pretrain.model", map_location=d2l.try_gpu())
+    # print(parameters)
diff --git a/models/tdnn_module.py b/models/tdnn_module.py
new file mode 100644
index 0000000..a60fded
--- /dev/null
+++ b/models/tdnn_module.py
@@ -0,0 +1,195 @@
+import math
+import torch
+import torchaudio
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class SEModule(nn.Module):
+    def __init__(self, channels, bottleneck=128):
+        super(SEModule, self).__init__()
+        self.se = nn.Sequential(
+            nn.AdaptiveAvgPool1d(1),
+            nn.Conv1d(channels, bottleneck, kernel_size=1, padding=0),
+            nn.ReLU(),
+            # nn.BatchNorm1d(bottleneck), # I remove this layer
+            nn.Conv1d(bottleneck, channels, kernel_size=1, padding=0),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, input):
+        x = self.se(input)
+        return input * x
+
+
+class Bottle2neck(nn.Module):
+    def __init__(self, inplanes, planes, kernel_size=None, dilation=None, scale=8):
+        super(Bottle2neck, self).__init__()
+        width = int(math.floor(planes / scale))
+        self.conv1 = nn.Conv1d(inplanes, width * scale, kernel_size=1)
+        self.bn1 = nn.BatchNorm1d(width * scale)
+        self.nums = scale - 1
+        convs = []
+        bns = []
+        num_pad = math.floor(kernel_size / 2) * dilation
+        for i in range(self.nums):
+            convs.append(nn.Conv1d(width, width, kernel_size=kernel_size, dilation=dilation, padding=num_pad))
+            bns.append(nn.BatchNorm1d(width))
+        self.convs = nn.ModuleList(convs)
+        self.bns = nn.ModuleList(bns)
+        self.conv3 = nn.Conv1d(width * scale, planes, kernel_size=1)
+        self.bn3 = nn.BatchNorm1d(planes)
+        self.relu = nn.ReLU()
+        self.width = width
+        self.se = SEModule(planes)
+
+    def forward(self, x):
+        sp = None
+        residual = x
+        out = self.conv1(x)
+        out = self.relu(out)
+        out = self.bn1(out)
+
+        spx = torch.split(out, self.width, 1)
+        for i in range(self.nums):
+            if i == 0:
+                sp = spx[i]
+            else:
+                sp = sp + spx[i]
+            sp = self.convs[i](sp)
+            sp = self.relu(sp)
+            sp = self.bns[i](sp)
+            if i == 0:
+                out = sp
+            else:
+                out = torch.cat((out, sp), 1)
+        out = torch.cat((out, spx[self.nums]), 1)
+
+        out = self.conv3(out)
+        out = self.relu(out)
+        out = self.bn3(out)
+
+        out = self.se(out)
+        out += residual
+        return out
+
+
+class PreEmphasis(nn.Module):
+    def __init__(self, coef: float = 0.97):
+        super().__init__()
+        self.coef = coef
+        self.register_buffer(
+            'flipped_filter', torch.FloatTensor([-self.coef, 1.]).unsqueeze(0).unsqueeze(0)
+        )
+
+    def forward(self, input: torch.tensor) -> torch.tensor:
+        input = input.unsqueeze(1)
+        input = F.pad(input, (1, 0), 'reflect')
+        return F.conv1d(input, self.flipped_filter).squeeze(1)
+
+
+class FbankAug(nn.Module):
+    def __init__(self, freq_mask_width=(0, 8), time_mask_width=(0, 10)):
+        self.time_mask_width = time_mask_width
+        self.freq_mask_width = freq_mask_width
+        super().__init__()
+
+    def mask_along_axis(self, x, dim):
+        original_size = x.shape
+        batch, fea, time = x.shape
+        if dim == 1:
+            D = fea
+            width_range = self.freq_mask_width
+        else:
+            D = time
+            width_range = self.time_mask_width
+
+        mask_len = torch.randint(width_range[0], width_range[1], (batch, 1), device=x.device).unsqueeze(2)
+        mask_pos = torch.randint(0, max(1, D - mask_len.max()), (batch, 1), device=x.device).unsqueeze(2)
+        arange = torch.arange(D, device=x.device).view(1, 1, -1)
+        mask = (mask_pos <= arange) * (arange < (mask_pos + mask_len))
+        mask = mask.any(dim=1)
+
+        if dim == 1:
+            mask = mask.unsqueeze(2)
+        else:
+            mask = mask.unsqueeze(1)
+
+        x = x.masked_fill_(mask, 0.0)
+        return x.view(*original_size)
+
+    def forward(self, x):
+        x = self.mask_along_axis(x, dim=2)
+        x = self.mask_along_axis(x, dim=1)
+        return x
+
+
+class ECAPA_TDNN(nn.Module):
+    def __init__(self, C):
+        super(ECAPA_TDNN, self).__init__()
+
+        self.torchfbank = torch.nn.Sequential(
+            PreEmphasis(),
+            torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_fft=512, win_length=400, hop_length=160,
+                                                 f_min=20, f_max=7600, window_fn=torch.hamming_window, n_mels=80),
+        )
+
+        self.specaug = FbankAug()  # Spec augmentation
+
+        self.conv1 = nn.Conv1d(80, C, kernel_size=5, stride=1, padding=2)
+        self.relu = nn.ReLU()
+        self.bn1 = nn.BatchNorm1d(C)
+        self.layer1 = Bottle2neck(C, C, kernel_size=3, dilation=2, scale=8)
+        self.layer2 = Bottle2neck(C, C, kernel_size=3, dilation=3, scale=8)
+        self.layer3 = Bottle2neck(C, C, kernel_size=3, dilation=4, scale=8)
+        # I fixed the shape of the output from MFA layer, that is close to the setting from ECAPA paper.
+        self.layer4 = nn.Conv1d(3 * C, 1536, kernel_size=1)
+        self.attention = nn.Sequential(
+            nn.Conv1d(4608, 256, kernel_size=1),
+            nn.ReLU(),
+            nn.BatchNorm1d(256),
+            nn.Tanh(),  # I add this layer
+            nn.Conv1d(256, 1536, kernel_size=1),
+            nn.Softmax(dim=2),
+        )
+        self.bn5 = nn.BatchNorm1d(3072)
+        self.fc6 = nn.Linear(3072, 192)
+        self.bn6 = nn.BatchNorm1d(192)
+
+    def forward(self, x, aug=True):
+        with torch.no_grad():
+            x = self.torchfbank(x) + 1e-6
+            x = x.log()
+            x = x - torch.mean(x, dim=-1, keepdim=True)
+            if aug:
+                x = self.specaug(x)
+
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.bn1(x)
+
+        x1 = self.layer1(x)
+        x2 = self.layer2(x + x1)
+        x3 = self.layer3(x + x1 + x2)
+
+        x = self.layer4(torch.cat((x1, x2, x3), dim=1))
+        x = self.relu(x)
+
+        t = x.size()[-1]
+
+        global_x = torch.cat((x, torch.mean(x, dim=2, keepdim=True).repeat(1, 1, t),
+                              torch.sqrt(torch.var(x, dim=2, keepdim=True).clamp(min=1e-4)).repeat(1, 1, t)), dim=1)
+
+        w = self.attention(global_x)
+
+        mu = torch.sum(x * w, dim=2)
+        sg = torch.sqrt((torch.sum((x ** 2) * w, dim=2) - mu ** 2).clamp(min=1e-4))
+
+        x = torch.cat((mu, sg), 1)
+        if x.shape[0] > 1:
+            x = self.bn5(x)
+        x = self.fc6(x)
+        if x.shape[0] > 1:
+            x = self.bn6(x)
+
+        return x
diff --git a/models/tdnn_pretrain.py b/models/tdnn_pretrain.py
new file mode 100644
index 0000000..1865966
--- /dev/null
+++ b/models/tdnn_pretrain.py
@@ -0,0 +1,55 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from models.tdnn_module import ECAPA_TDNN as TDNN
+
+
+class Pretrain_TDNN(nn.Module):
+    def __init__(self, n_class, C=1024, output_embedding=True, not_grad=False, aug=True):
+        super(Pretrain_TDNN, self).__init__()
+        self.aug = aug
+        self.in_features = 192
+        self.output_num = n_class
+        self.output_embedding = output_embedding
+        self.speaker_encoder = TDNN(C=C)
+        self.weight = torch.nn.Parameter(torch.FloatTensor(n_class, 192), requires_grad=True)
+        nn.init.xavier_normal_(self.weight, gain=1)
+        if not not_grad:
+            for param in self.speaker_encoder.parameters():
+                param.requires_grad = True
+        else:
+            for param in self.speaker_encoder.parameters():
+                param.requires_grad = False
+
+    def forward(self, x):
+        out = self.speaker_encoder(x, aug=self.aug)
+        if not self.output_embedding:
+            return F.linear(F.normalize(out), F.normalize(self.weight))
+        else:
+            return out
+
+    def save_parameters(self, path):
+        torch.save(self.state_dict(), path)
+
+    def load_parameters(self, path, device):
+        self_state = self.state_dict()
+        loaded_state = torch.load(path, map_location=device)
+        for name, param in loaded_state.items():
+            origname = name
+            if name not in self_state:
+                name = name.replace("module.", "")
+                if name not in self_state:
+                    print("%s is not in the model." % origname)
+                    continue
+            if self_state[name].size() != loaded_state[origname].size():
+                print("Wrong parameter length: %s, model: %s, loaded: %s" % (
+                    origname, self_state[name].size(), loaded_state[origname].size()))
+                continue
+            self_state[name].copy_(param)
+
+
+if __name__ == '__main__':
+    net = Pretrain_TDNN(100, 1024, True)
+    X = torch.zeros(2, 41500)
+    output = net(X)
+    print(output.shape)
diff --git a/param.model b/param.model
new file mode 100644
index 0000000..769a13a
Binary files /dev/null and b/param.model differ
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..679a36a
--- /dev/null
+++ b/test.py
@@ -0,0 +1,17 @@
+import torch
+
+import eval as d2l
+from tools import eval_net
+# from d2l import torch as d2l
+from models.tdnn_pretrain import Pretrain_TDNN
+
+if __name__ == "__main__":
+    model_path = './param.model'
+    Device = d2l.try_gpu()
+
+    model2 = Pretrain_TDNN(420, 1024, False, not_grad=False)
+    model2.load_parameters(model_path, Device)
+    # model2 = torch.load('net.pth')
+
+    EER, minDCF = eval_net(model2, Device, 10, 10)
+    print(f'EER:{EER:.4f} minDCF:{minDCF:.4f}')
diff --git a/tools.py b/tools.py
new file mode 100644
index 0000000..848e83c
--- /dev/null
+++ b/tools.py
@@ -0,0 +1,158 @@
+import numpy as np
+import torch
+import audio
+import numpy
+import loader
+import random
+from sklearn import metrics
+import torch.nn.functional as F
+import matplotlib.pyplot as plt
+
+
+def tuneThresholdfromScore(scores, labels, target_fa):
+    # 运用scikit-learn库来计算roc曲线
+    fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=1)
+    # 算出auc
+    auc = metrics.auc(fpr, tpr)
+
+    plt.plot(fpr, tpr, 'k--', label='ROC (area = {0:.2f})'.format(auc), lw=2)
+    plt.xlim([-0.05, 1.05])
+    plt.ylim([-0.05, 1.05])
+    plt.xlabel('False Positive Rate')
+    plt.ylabel('True Positive Rate')
+    plt.title('ROC Curve')
+    plt.savefig('./img/ROC.jpg', dpi=400)
+
+    prec, recall, _ = metrics.precision_recall_curve(labels, scores, pos_label=1)
+    metrics.PrecisionRecallDisplay(precision=prec, recall=recall).plot()
+    plt.savefig('./img/PR.jpg', dpi=400)
+
+    fnr = 1 - tpr
+    tunedThreshold = []
+
+    for tfa in target_fa:
+        idx = numpy.nanargmin(numpy.absolute((tfa - fpr)))  # numpy.where(fpr<=tfa)[0][-1]
+        tunedThreshold.append([thresholds[idx], fpr[idx], fnr[idx]])
+    # 根据上面算出的fnr和fpr相减得出一个数组，算出数组中最小的索引(排除NaN)
+    idxE = numpy.nanargmin(numpy.absolute((fnr - fpr)))
+    eer = max(fpr[idxE], fnr[idxE])
+
+    return tunedThreshold[1][0], eer, auc, fpr, fnr
+
+
+def ComputeErrorRates(scores, labels, threshold=0.96695, p=0.01):
+    assert len(scores) == len(labels), f'Error: {scores} {labels}\n'
+    predict = []
+    threshold = threshold if 0.9693 <= threshold < 0.99 else 0.9693
+    for i in range(len(scores)):
+        if scores[i] > threshold:
+            predict.append(1)
+        else:
+            predict.append(0)
+    matrix = metrics.confusion_matrix(labels, predict)
+    [TN, FP], [FN, TP] = matrix
+    matrix = np.array([[TP, FN], [FP, TN]])
+
+    metrics.ConfusionMatrixDisplay(confusion_matrix=matrix,
+                                   display_labels=['Positive', 'Negative']).plot()
+    plt.savefig('./img/confusion_matrix.jpg', dpi=400)
+
+    FAR = FP / (FP + TN)
+    FRR = FN / (TP + FN)
+    minDCF = FAR * (1 - p) + FRR * p
+    return matrix, minDCF
+
+
+def ComputeMinDcf(fnrs, fprs, thresholds, p_target, c_miss, c_fa):
+    min_c_det = float("inf")
+    min_c_det_threshold = thresholds[0]
+    for i in range(0, len(fnrs)):
+        c_det = c_miss * fnrs[i] * p_target + c_fa * fprs[i] * (1 - p_target)
+        if c_det < min_c_det:
+            min_c_det = c_det
+            min_c_det_threshold = thresholds[i]
+    c_def = min(c_miss * p_target, c_fa * (1 - p_target))
+    min_dcf = min_c_det / c_def
+    return min_dcf, min_c_det_threshold
+
+
+def get_embedding(net, name, device):
+    net.aug = False
+    net.output_embedding = True
+    net.to(device)
+    wav = audio.loadWAV(filename=name)
+    wav = wav.unsqueeze(0).to(device)
+    with torch.no_grad():
+        embedding = net(wav)
+        embedding = F.normalize(embedding, p=2, dim=1)
+    return embedding
+
+
+def dic_process(dic):
+    result = {}
+    value = list(dic.values())[0]
+    embedding_list = []
+    for item in dic.items():
+        if item[1] != value:
+            value = item[1]
+            embedding_list = []
+        embedding_list.append(item[0])
+        result[value] = embedding_list
+    return result
+
+
+def eval_net(net, device, folder_num=-1, file_num=-1):
+    labels = []
+    embed_dict = {}
+    score_list = []
+    enroll, test, folder_num = loader.load_files("test", folder_num, file_num, 9)
+    enroll = dic_process(enroll)
+    test = dic_process(test)
+
+    for key in enroll:
+        count = 0
+        embed = None
+        for name in enroll[key]:
+            if count >= len(enroll[key]):
+                break
+            count += 1
+            embedding = get_embedding(net, name, device)
+
+            if count == 1:
+                embed = embedding
+            else:
+                embed = torch.cat([embed, embedding])
+        embed = torch.mean(embed, dim=0).unsqueeze(0)
+        embed_dict[key] = embed
+
+    for item in enroll:
+        dict_key_ls = list(enroll.keys())
+        random.shuffle(dict_key_ls)
+        for label in dict_key_ls:
+            if label == item:
+                y_true = 1
+            else:
+                y_true = 0
+            num = random.randint(0, len(test[label]) - 1)
+            embed1 = get_embedding(net, test[label][num], device)
+            embedding = embed_dict[item]
+
+            score = torch.matmul(embed1, embedding.mT).cpu().numpy().reshape(-1)
+            score_list.append(score)
+            labels.append(y_true)
+
+    threshold, EER, AUC, _, _ = tuneThresholdfromScore(score_list, labels, [1, 0.1])
+    _, minDCF = ComputeErrorRates(score_list, labels, threshold)
+    return EER, minDCF
+
+
+if __name__ == '__main__':
+    train_dict, test_dict, number = loader.load_files("train", 40, 20, 1.5)
+    dic_process(train_dict)
+    # print(train_dict)
+
+    # embed = torch.FloatTensor([[0.1, 0.2, 0.3, 0.4],
+    #                           [0.5, 0.6, 0.7, 0.8]])
+    # sum = torch.matmul(embed, embed.T)
+    # sum = torch.sum(sum, dim=[0, 1], keepdim=False)
+    # print(sum)
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..8d1842a
--- /dev/null
+++ b/train.py
@@ -0,0 +1,106 @@
+import os
+import torch
+import loader
+import eval as d2l
+# from d2l import torch as d2l
+from tensorboardX import SummaryWriter
+from torch.utils.data import DataLoader
+from models.tdnn import ECAPA_TDNN
+from loss import AAMSoftmax, evaluate_accuracy_gpu
+
+
+def init_logs(path=".\\logs"):
+    for root, dirs, files in os.walk(path, topdown=False):
+        for name in files:
+            os.remove(os.path.join(root, name))
+        for name in dirs:
+            os.rmdir(os.path.join(root, name))
+
+
+def train(train_iter, test_iter, net, loss_func, device, write, num_epoch=10, lr=0.1, wd=2e-4):
+    net.to(device)
+    trainer = torch.optim.Adam(params=(param for param in net.parameters()
+                                       if param.requires_grad), lr=lr, weight_decay=wd)
+    scheduler = torch.optim.lr_scheduler.CyclicLR(trainer, base_lr=1e-3, max_lr=0.1, step_size_up=6250,
+                                                  mode="triangular2", cycle_momentum=False)
+    timer = d2l.Timer()
+    sum, img = 0, None
+    for epoch in range(num_epoch):
+        print(f'\nepoch {epoch + 1}:')
+        train_acc = train_l = 0
+        metric = d2l.Accumulator(3)
+        net.train()
+        for i, (x, y) in enumerate(train_iter):
+            # if i == 0 and epoch == num_epoch - 1:
+            #     img = x.to(device)
+            timer.start()
+            x, y = x.to(device), y.to(device)
+            trainer.zero_grad()
+            y_hat = net(x)
+            l, prec = loss_func(y_hat, y)
+            l.backward()
+            trainer.step()
+            with torch.no_grad():
+                metric.add(l * x.shape[0], prec * x.shape[0], x.shape[0])
+            timer.stop()
+            train_l = metric[0] / metric[2]
+            train_acc = metric[1] / metric[2]
+            scheduler.step()
+        sum += metric[2]
+        # test_acc = 0
+        test_acc = evaluate_accuracy_gpu(net, test_iter)
+        print(f'\tloss {train_l:.3f}, train acc {train_acc:.3f}, '
+              f'test acc {test_acc:.3f}')
+        write.add_scalar('loss', train_l, epoch)
+        write.add_scalars('acc', {'test_acc': test_acc, 'train_acc': train_acc}, epoch)
+    print(f'\n{sum / timer.sum():.1f} examples/sec '
+          f'on {str(device)}')
+    # write.add_graph(net, img)
+
+
+if __name__ == "__main__":
+    people_num, data_per_people = 420, 150
+    noise, mel = False, True
+    margin, scale, easy_margin = 0.2, 20, False
+    not_grad, bidirectional, reverse = False, True, False
+    num_epochs, learn_rate, weight_decay = 150, 0.125, 1e-3
+    mode, model_name = "train", "dense169"
+    hidden_size, num_layers = 64, 2
+    model_path = './pretrain.model'
+
+    # Device = torch_directml.device()
+    # print(Device)
+    # prefetch_factor, batch_size, num_works, persistent = 2, 32, 8, False
+
+    Device = d2l.try_gpu()
+    if Device.type == 'cpu':
+        prefetch_factor, batch_size, num_works, persistent = 2, 8, 8, False
+    elif torch.cuda.is_available():
+        prefetch_factor, batch_size, num_works, persistent = 8, 256, 32, True
+    else:
+        prefetch_factor, batch_size, num_works, persistent = 2, 32, 8, False
+
+    init_logs()
+    train_dict, test_dict, people_num = loader.load_files(mode=mode, folder_num=people_num,
+                                                          file_num=data_per_people, k=7.5)
+    train_dataset = loader.MyDataset(data_dict=train_dict, people_num=people_num, train=True, mel=mel,
+                                     noise=noise)
+    test_dataset = loader.MyDataset(data_dict=test_dict, people_num=people_num, train=False, mel=mel,
+                                    noise=False)
+    print(len(train_dataset), len(test_dataset))
+    train_ = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True,
+                        drop_last=True, num_workers=num_works, pin_memory=True,
+                        persistent_workers=persistent, prefetch_factor=prefetch_factor)
+    test_ = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True,
+                       drop_last=True, num_workers=num_works, pin_memory=True,
+                       persistent_workers=persistent, prefetch_factor=prefetch_factor)
+    writer = SummaryWriter('./logs')
+
+    # model1 = cnn.get_net(people_num, model_name, not_grad)
+    # model2 = F.CNN_LSTM(model_name, people_num, hidden_size, num_layers, bidirectional, not_grad)
+    model2 = ECAPA_TDNN(in_channels=80, channels=512, embd_dim=192,
+                        output_num=people_num, context=True, embedding=False)
+
+    loss = AAMSoftmax(192, people_num, margin, scale, easy_margin)
+    train(train_, test_, model2, loss, Device, writer, num_epochs, learn_rate, weight_decay)
+    torch.save(model2.state_dict(), "net.model")