-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathID3Algo_entropy.py
50 lines (42 loc) · 2.01 KB
/
ID3Algo_entropy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# Reference Credits
# 1) https://www.python-course.eu/Decision_Trees.php
# 2) https://dhirajkumarblog.medium.com/decision-tree-from-scratch-in-python-629631ec3e3a
import math
# Calculates the entropy and frequency of the given data set for the target attribute.
def cal_entropy(data, target_attr):
value_freq = {}
data_entropy = 0.0
for record in data:
# Calculate the frequency of each of the values in the target attr
if record[target_attr] in value_freq:
value_freq[record[target_attr]] += 1.0
else:
value_freq[record[target_attr]] = 1.0
# Calculate the entropy of the data for the target attribute
for freq in value_freq.values():
data_entropy += (-freq/len(data)) * math.log(freq/len(data), 2)
return data_entropy
# Calculates the information gain (reduction in entropy) that would
# result by splitting the data on the chosen attribute (attr).
def info_gain(data, attr, target_attr):
value_freq = {}
sub_entropy = 0.0
# Calculate the frequency of each of the values in the target attribute
for record in data:
if record[attr] in value_freq:
value_freq[record[attr]] += 1.0
else:
value_freq[record[attr]] = 1.0
# Calculate the sum of the entropy for each subset of records weighted
# by their probability of occuring in the training set.
print("For attribute '%s' \nEach value frequency is :%s " % (attr, value_freq))
for value in value_freq.keys():
value_prob = value_freq[value] / sum(value_freq.values())
data_subset = [record for record in data if record[attr] == value]
sub_entropy += value_prob * cal_entropy(data_subset, target_attr)
# Subtract the entropy of the chosen attribute from the entropy of the
# whole data set with respect to the target attribute (and return it)
entropy_gain = (cal_entropy(data, target_attr) - sub_entropy)
print("For Attribute : %s" % attr)
print("Entropy Gain: %f" % entropy_gain)
return entropy_gain