-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_data_dictionary_bold.py
84 lines (70 loc) · 2.53 KB
/
create_data_dictionary_bold.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#! /Users/CN/Documents/Projects/Joystick_Cereb_MRS/bin/mrsi_code/env python
# ------------------------------------------------------------------------------
# Script name: create_data_dictionary_bold.sh
#
# Description: Get info for data dictionary from behavioural variables.
#
# Author: Caroline Nettekoven, 2020
#
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
import os
import os.path as op
import pandas as pd
from pandas.api.types import CategoricalDtype
import sys
sys.path.insert(0, '/Users/CN/Documents/Projects/BOLD_study_local/bin')
# Get df
df_file = op.join(
'/Users/CN/Documents/Projects/BOLD_study_local/cleaning/behav/data_for_sharing_Sep2020.csv')
df = pd.read_csv(df_file)
df.head()
df.tail()
# [248 rows x 77 columns]
# ------------------------------ Exclude subjects ------------------------------
# (apart from 507 & 671, as they did not complete behavioural testing, so aren't in the dataframe anyways)
# Note: 530 also does not appear in behavioural data
excluded_subjects = ['553', '598', '629', '652', '649', '562', '620', '507', '671'
'530', '658', '559', '577', '591']
for x in excluded_subjects:
# if not any(df['code'].str[:3] == x):
# print(x)
df = df.drop(df[df['code'].str[:3] == x].index)
# [236 rows x 77 columns]
# ------------------------------ Correct levels ------------------------------
df.head()
# code
df.code = df.code.astype('category')
# group
df.final_group = df.final_group.astype('category')
# label_map = {
# 1: 'typ',
# 2: 'dld',
# 3: 'hist'
# }
# df['final_group'] = df['final_group'].map(label_map)
# df.final_group = df.final_group.astype(CategoricalDtype(label_map.values()))
# sex
df.sex = df.sex.astype('category')
# label_map = {
# 1: 'male',
# 2: 'female',
# }
# df['sex'] = df['sex'].map(label_map)
# df.sex = df.sex.astype(CategoricalDtype(label_map.values()))
# domhand
df.domhand = df.domhand.astype('category')
# label_map = {
# 1: 'right',
# 2: 'left',
# }
# df['domhand'] = df['domhand'].map(label_map)
# df.domhand = df.domhand.astype(CategoricalDtype(label_map.values()))
# Age
df = df.rename(columns={'age': 'age_year_mon'})
# ------------------------------ Show range ------------------------------
for col in df.columns:
if df[col].dtypes == 'int32' or df[col].dtypes == 'float64':
#
print('{0} \n{1}-{2}'.format(col,
int(df[col].min()), int(df[col].max())))