forked from myLoveOneTwo/Machine_Learning-2
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
=
committed
Mar 6, 2020
0 parents
commit 8adb469
Showing
4,072 changed files
with
6,313 additions
and
0 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"留出法直接将数据集D DD划分为两个互斥的部分,其中一部分作为训练集S SS,另一部分用作测试集T TT。\n", | ||
"\n", | ||
"通常训练集和测试集的比例为70%:30%。\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"array([0, 1, 0, 0, 1, 0, 1, 1, 0, 1])" | ||
] | ||
}, | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# 随机产生我们的数据集\n", | ||
"x = np.random.randint(-10, 10, 10)\n", | ||
"y = (x > 0).astype(int)\n", | ||
"y" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 12, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# 先把下标的顺序打乱\n", | ||
"shuffle_indexs = np.random.permutation(len(x))\n", | ||
"# 看我们要留几成当作测试数据集\n", | ||
"test_radio = 0.3\n", | ||
"# 求出test的个数\n", | ||
"test_size = int(len(x)*test_radio)\n", | ||
"\n", | ||
"# 比如说我现在0-test_size当作test的数据 test_size-最后当成train数据\n", | ||
"test_indexs = shuffle_indexs[:test_size]\n", | ||
"train_indexs = shuffle_indexs[test_size:]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"3" | ||
] | ||
}, | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"len(test_indexs) # 试验一下我们的测试数据集的个数" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 18, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# 使用fance indexing 进行取值\n", | ||
"train_x = x[train_indexs]\n", | ||
"train_y = y[train_indexs]\n", | ||
"\n", | ||
"test_x = x[test_indexs]\n", | ||
"test_y = y[test_indexs]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 17, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"(3,)" | ||
] | ||
}, | ||
"execution_count": 17, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"test_x.shape" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### 到此,我们的留出法结束" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.1" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"k折交叉验证就是将数据集A随机分为k个包,每次将其中一个包作为测试集,剩下k-1个包作为训练集进行训练" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"# 随机产生我们的数据集\n", | ||
"x = np.random.randint(-10, 10, 10)\n", | ||
"y = (x > 0).astype(int)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"processing fold # 0\n", | ||
"[-8]\n", | ||
"[ -4 9 7 -3 -10 4 8 -3 -9]\n", | ||
"processing fold # 1\n", | ||
"[-4]\n", | ||
"[ -8 9 7 -3 -10 4 8 -3 -9]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"k = 2 # 2折验证法\n", | ||
"num_val_samples = 1 # 定义我们的个数\n", | ||
"for i in range(k): \n", | ||
" print('processing fold #', i) \n", | ||
" test_x = x[i * num_val_samples: (i + 1) * num_val_samples] \n", | ||
" test_y = y[i * num_val_samples: (i + 1) * num_val_samples] \n", | ||
" #当i为1时,我们的train取到的应该是第0列和第2列之后的,所以进行数据的堆叠\n", | ||
" train_x = np.concatenate( \n", | ||
" [x[:i * num_val_samples], \n", | ||
" x[(i + 1) * num_val_samples:]], \n", | ||
" axis=0) \n", | ||
" train_y = np.concatenate( \n", | ||
" [y[:i * num_val_samples], \n", | ||
" y[(i + 1) * num_val_samples:]], \n", | ||
" axis=0) \n", | ||
" \n", | ||
" print(test_x)\n", | ||
" print(train_x)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"K折验证法结束" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.1" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"自助法: 给定包含m个样本的数据集D DD,我们对它进行采样产生数据集D′ D'D \n", | ||
"′\n", | ||
" :每次随机从D DD中挑选一个样本,将其复制到D′ D'D \n", | ||
"′\n", | ||
" 中,然后再将其样本放回原始数据集D DD中,使得该样本在下次采样的时候也可能被采到;这个过程重复执行m次,我们就得到了包含m个样本的数据集D′ D'D \n", | ||
"′\n", | ||
" 。简而言之,就是从数据集D DD中,有放回随机采样m次,组成一个新样本集D′ D'D \n", | ||
"′\n", | ||
" 。" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"# 随机产生我们的数据集\n", | ||
"x = np.random.randint(-10, 10, 10)\n", | ||
"y = (x > 0).astype(int)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"array([ 6, -3, 7, 9, -7, 8, -6, 1, 6, -3])" | ||
] | ||
}, | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"x" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#通过产生的随机数获得抽取样本的序号\n", | ||
"bootstrapping = []\n", | ||
"for i in range(len(x)):\n", | ||
" bootstrapping.append(np.floor(np.random.random()*len(x)))\n", | ||
"#通过序号获得原始数据集中的数据\n", | ||
"x_1 = []\n", | ||
"y_1 = []\n", | ||
"for i in range(len(x)):\n", | ||
" x_1.append(x[int(bootstrapping[i])])\n", | ||
" y_1.append(y[int(bootstrapping[i])])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[1, -7, -3, -6, 7, 9, -3, 8, 7, 6]" | ||
] | ||
}, | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"x_1" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[1, 0, 0, 0, 1, 1, 0, 1, 1, 1]" | ||
] | ||
}, | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"y_1" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"我们的自助法结束" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.1" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.