-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload.py
37 lines (23 loc) · 856 Bytes
/
load.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Author Andrew Chupin
# Coding in UTF-8
import os
import pandas
import matplotlib
import matplotlib.pyplot as plt
HOUSING_PATH = os.path.join("datasets", "housing")
def load_housing_data(housing_path=HOUSING_PATH):
cvs_path = os.path.join(housing_path, "housing.csv")
return pandas.read_csv(cvs_path)
frame = load_housing_data()
import numpy as np
def split_train_test(data, test_ratio):
np.random.seed(42)
shuffled_indices = np.random.permutation(len(data))
test_set_size = int(len(data) * test_ratio)
test_indices = shuffled_indices[:test_set_size]
train_indices = shuffled_indices[test_set_size:]
return data.iloc[train_indices], data.iloc[test_indices]
def test_slit():
train_set, test_set = split_train_test(frame, 0.2)
print(f"{len(train_set)} train + {len(test_set)} test")
print(test_set)