-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdistribution.v1.0.py
54 lines (46 loc) · 2.23 KB
/
distribution.v1.0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python3
# ============================================================================== #
# Distribution
# Powered by [email protected] 202307
# ============================================================================== #
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import shapiro
from pandas import read_csv
employee = read_csv( './data/Final_Employees_Data.csv',
sep = ',', header = None,
names = [ 'Eid','Ename','Experience','Total_projects','Rating','Area_of_Interest_1',\
'Area_of_Interest_2','Area_of_Interest_3','Language1','Language2','Language3',\
'AI_project_count','ML_project_count','JS_project_count','Java_project_count',\
'DotNet_project_count','Mobile_project_count'] ).drop(0)
column_to_test = employee['Rating'].values.astype(float)
skewness = employee['Rating'].skew()
mean_rating = np.mean(column_to_test)
median_rating = np.median(column_to_test)
percentile_25 = np.percentile(column_to_test, 25)
percentile_75 = np.percentile(column_to_test, 75)
print(f"Skewness: {skewness}")
print(f"Mean Rating: {mean_rating}")
print(f"Median Rating: {median_rating}")
print(f"25th Percentile Rating: {percentile_25}")
print(f"75th Percentile Rating: {percentile_75}")
alpha = 0.05
plt.figure(figsize=(8, 6))
plt.hist(column_to_test, bins=50, density=True, color='skyblue', edgecolor='black', alpha=0.7)
mu, sigma = np.mean(column_to_test), np.std(column_to_test)
x = np.linspace(min(column_to_test), max(column_to_test), 100)
y = 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
plt.axvline(2.5, color='skyblue', linestyle='dashed', linewidth=2, label='Median')
plt.plot(x, y, color='red', linewidth=2)
plt.title('Employee Rating Distribution')
plt.xlabel('Rating')
plt.ylabel('Density')
plt.grid(True)
plt.savefig('rating_distribution.png', dpi=1000)
plt.show()
statistic, p_value = shapiro(column_to_test)
print("Shapiro-Wilk statistic:", statistic)
print("p value:", p_value)
if p_value > alpha: print("Normal Distribution")
else: print("Not Normal Distribution")
# ============================================================================== #