Skip to content

Commit

Permalink
Improve RF and fix requirements.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
shalinis602 committed Jul 12, 2024
1 parent b4539dc commit 72c7c39
Show file tree
Hide file tree
Showing 8 changed files with 573 additions and 412 deletions.
508 changes: 508 additions & 0 deletions .ipynb_checkpoints/rna-seq-ml-modeling-checkpoint.ipynb

Large diffs are not rendered by default.

Binary file not shown.
376 changes: 4 additions & 372 deletions requirements.txt

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 17 additions & 0 deletions results/pre_processing/pca.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Explained variance ratios for each component:
Principal Component 1: 0.1394
Principal Component 2: 0.1006
Principal Component 3: 0.0764
Principal Component 4: 0.0596
Principal Component 5: 0.0391
Principal Component 6: 0.0321
Principal Component 7: 0.0222

Cumulative explained variance ratios:
Principal Component 1: 0.1394
Principal Component 2: 0.2400
Principal Component 3: 0.3164
Principal Component 4: 0.3760
Principal Component 5: 0.4151
Principal Component 6: 0.4472
Principal Component 7: 0.4694
56 changes: 28 additions & 28 deletions results/random_forest.txt
Original file line number Diff line number Diff line change
@@ -1,41 +1,41 @@
Validation Accuracy: 0.22
Validation Accuracy: 0.96
Validation Classification Report:
precision recall f1-score support

BRCA 0.17 0.04 0.06 28
COAD 0.00 0.00 0.00 9
KIRC 0.00 0.00 0.00 10
LUAD 0.00 0.00 0.00 13
PRAD 0.89 0.81 0.85 21
BRCA 0.93 1.00 0.97 28
COAD 1.00 1.00 1.00 9
KIRC 1.00 0.90 0.95 10
LUAD 0.92 0.92 0.92 13
PRAD 1.00 0.95 0.98 21

accuracy 0.22 81
macro avg 0.21 0.17 0.18 81
weighted avg 0.29 0.22 0.24 81
accuracy 0.96 81
macro avg 0.97 0.96 0.96 81
weighted avg 0.96 0.96 0.96 81

Validation Confusion Matrix:
[[ 1 19 3 3 2]
[ 0 0 9 0 0]
[ 0 5 0 5 0]
[ 1 5 7 0 0]
[ 4 0 0 0 17]]
[[28 0 0 0 0]
[ 0 9 0 0 0]
[ 0 0 9 1 0]
[ 1 0 0 12 0]
[ 1 0 0 0 20]]

Test Accuracy: 0.12
Test Accuracy: 0.99
Test Classification Report:
precision recall f1-score support

BRCA 0.00 0.00 0.00 27
COAD 0.00 0.00 0.00 8
KIRC 0.00 0.00 0.00 15
LUAD 0.00 0.00 0.00 19
PRAD 0.62 0.91 0.74 11
BRCA 1.00 0.96 0.98 27
COAD 1.00 1.00 1.00 8
KIRC 1.00 1.00 1.00 15
LUAD 0.95 1.00 0.97 19
PRAD 1.00 1.00 1.00 11

accuracy 0.12 80
macro avg 0.12 0.18 0.15 80
weighted avg 0.09 0.12 0.10 80
accuracy 0.99 80
macro avg 0.99 0.99 0.99 80
weighted avg 0.99 0.99 0.99 80

Test Confusion Matrix:
[[ 0 17 3 1 6]
[ 0 0 8 0 0]
[ 2 2 0 11 0]
[ 3 4 12 0 0]
[ 1 0 0 0 10]]
[[26 0 0 1 0]
[ 0 8 0 0 0]
[ 0 0 15 0 0]
[ 0 0 0 19 0]
[ 0 0 0 0 11]]
8 changes: 4 additions & 4 deletions rna-seq-ml-modeling.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 5,
"id": "6797f2bc",
"metadata": {},
"outputs": [
Expand All @@ -254,7 +254,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 6,
"id": "8f59d610",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -348,7 +348,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 7,
"id": "4c88dfb4",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -514,7 +514,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "d1e3b956",
"id": "b26ed2f6",
"metadata": {},
"outputs": [],
"source": []
Expand Down
20 changes: 12 additions & 8 deletions src/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,22 @@ def main():
plt.legend(loc='best')
plt.grid(True)

plot_filename = os.path.join(output_dir, 'explained_variance_plot.png')
output_dir2 = 'results/pre_processing'
os.makedirs(output_dir2, exist_ok=True)
plot_filename = os.path.join(output_dir2, 'explained_variance_plot.png')
plt.savefig(plot_filename)
plt.show()

# Print explained variance values for the selected components
print("Explained variance ratios for each component:")
for i, var in enumerate(explained_variance, start=1):
print(f"Principal Component {i}: {var:.4f}")
# Save explained variance values for the selected components
pca_file_path = os.path.join(output_dir2, 'pca.txt')
with open(pca_file_path, "w") as f:
f.write("Explained variance ratios for each component:\n")
for i, var in enumerate(explained_variance, start=1):
f.write(f"Principal Component {i}: {var:.4f}\n")

print("\nCumulative explained variance ratios:")
for i, cum_var in enumerate(cumulative_explained_variance, start=1):
print(f"Principal Component {i}: {cum_var:.4f}")
f.write("\nCumulative explained variance ratios:\n")
for i, cum_var in enumerate(cumulative_explained_variance, start=1):
f.write(f"Principal Component {i}: {cum_var:.4f}\n")

if __name__ == "__main__":
main()

0 comments on commit 72c7c39

Please sign in to comment.