-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmapping_stats.py
37 lines (24 loc) · 1.11 KB
/
mapping_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import re, string, pandas as pd, numpy as np, yaml
from librarySize import *
project_folder = "/SAN/vyplab/alb_projects/data/4su_tdp_f210i/"
sampleCSVpath = project_folder + "fastq_locations_4su.csv"
star_output_folder = project_folder + "STAR_aligned_redone/"
feature_counts_output_folder = project_folder + "feature_counts/"
mappingStats = project_folder + "mappingStats.csv"
SAMPLES = pd.read_csv(sampleCSVpath, sep = ",")
SAMPLES = SAMPLES.replace(np.nan, '', regex=True)
SAMPLE_NAMES = SAMPLES['sample_name'].tolist()
allStarLogs = [star_output_folder + name + ".Log.final.out" \
for name in SAMPLE_NAMES]
allFeatureLogs = [feature_counts_output_folder + name + "_featureCounts_results.txt.summary" \
for name in SAMPLE_NAMES]
libs = list(map(getLibSize, allStarLogs))
starMapped = list(map(getStarMapped, allStarLogs))
featureMapped = list(map(getFeatureCountsMapped, allFeatureLogs))
stats = {'sample': SAMPLE_NAMES, \
'librarySize': libs,\
'percentMapped':starMapped,\
'featureCountsMappedToGene': featureMapped}
df = pd.DataFrame(stats)
print(df)
df.to_csv(mappingStats, sep=',')