Skip to content

Commit

Permalink
Bug fixes and adding more type hints.
Browse files Browse the repository at this point in the history
  • Loading branch information
mauraisa committed Apr 5, 2019
1 parent ae42a36 commit bdfa50e
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 18 deletions.
31 changes: 17 additions & 14 deletions protein_loc_scraper/dataframe/dataframe.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from typing import Dict
from typing import Dict, List
import csv

class DataFrame(object):
Expand All @@ -10,16 +10,19 @@ def __init__(self, data: Dict = None):
self.nrow = 0
self.ncol = 0
self.data = dict()
self.columns = list()
self._columns = list()

if not data is None:
self._fromDict(data)

def __getitem__(self, item):
def __getitem__(self, item: str) -> List:
if item not in self._columns:
raise KeyError('{} is not a column!'.format(item))
return self.data[item]

def __setitem__(self, key, value):
def __setitem__(self, key: str, value: List):
self.data[key] = value
self._columns.append(key)

def _fromDict(self, data: Dict):
for i, k, v in enumerate(data.items()):
Expand All @@ -29,8 +32,8 @@ def _fromDict(self, data: Dict):
if self.nrow != len(v):
raise ValueError('Columns must all be same length!')
self.data[k] = v
self.columns.append(k)
self.ncol = len(self.columns)
self._columns.append(k)
self.ncol = len(self._columns)
self.data[DataFrame._ROW_NAME] = [x for x in range(self.nrow)]


Expand All @@ -46,17 +49,17 @@ def to_csv(self, ofname: str, sep: str = '\t'):
outF = open(ofname, 'w')

#print headers
for i, s in enumerate(self.columns):
for i, s in enumerate(self._columns):
if i == 0:
outF.write('{}'.format(s))
else: outF.write('\t{}'.format(s))
else: outF.write('{}{}'.format(sep, s))
outF.write('\n')

for i, row in self.iterrows():
for col in self.columns:
if i == 0:
for _, row in self.iterrows():
for j, col in enumerate(self._columns):
if j == 0:
outF.write('{}'.format(row[col]))
else: outF.write('\t{}'.format(row[col]))
else: outF.write('{}{}'.format(sep, row[col]))
outF.write('\n')


Expand All @@ -76,7 +79,7 @@ def read_tsv(fname: str, hasHeader: bool = True):
_start = 0
ret.data = {x: list() for x in _keys}
ret.data[DataFrame._ROW_NAME] = list()
ret.columns = _keys
ret._columns = _keys
ret.ncol = len(_keys)

#iterate through lines
Expand All @@ -85,7 +88,7 @@ def read_tsv(fname: str, hasHeader: bool = True):
if len(elems) != ret.ncol:
raise RuntimeError('Incorect number elements in row: {}'.format(i))
for j, elem in enumerate(elems):
ret.data[ret.columns[j]].append(elem)
ret.data[ret._columns[j]].append(elem)
ret.data[DataFrame._ROW_NAME].append(i)
ret.nrow = len(ret.data[DataFrame._ROW_NAME])

Expand Down
14 changes: 11 additions & 3 deletions protein_loc_scraper/locScraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,21 @@ def main():
sys.stdout.write('Working on {}...\n'.format(ifname))
df = dataframe.read_tsv(ifname)

ids = df[args.idCol]
#get list of uniprot IDs
sys.stdout.write('Using \'{}\' as the uniprot ID column.\n'.format(args.idCol))
try:
ids = df[args.idCol]
except KeyError as e:
sys.stderr.write('Error in {}: {}\nSkipping...\n'.format(ifname, e))
continue

#get locations
locations = scraper.getLocList(ids, nThread = args.nThread)

df[args.locCol] = locations

#write results
df.to_csv(ofnames[i], sep = '\t')
sys.stdout.write('Results written to {}\n'.format(ofnames[i]))
sys.stdout.write('Results written to {}\n\n'.format(ofnames[i]))

if __name__ == '__main__':
main()
2 changes: 1 addition & 1 deletion protein_loc_scraper/scraper/parallelization.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def getLocList(uniProtIDs: List, nThread: int = None) -> List:
#lookup locs using thread pool
sys.stdout.write('Searching for locations with {} threads...\n'.format(_nThread))
with Pool(processes=_nThread) as pool:
ret = list(tqdm.tqdm(pool.imap(getLocs, uniProtIDs),
ret = list(tqdm(pool.imap(getLocs, uniProtIDs),
total = listLen,
miniters=1,
file = sys.stdout))
Expand Down

0 comments on commit bdfa50e

Please sign in to comment.