-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathclient.cfg
89 lines (72 loc) · 2.42 KB
/
client.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Usage:
#
# Copy client.cfg to your.cfg and set LUIGI_CONFIG_PATH=your.cfg.
[ETLAccount]
# Account identifier in SQLAlchemy URL format. Be sure this includes
# all and only those details that identify the ETL
# account. Insignificant details such as password and tunnel port go
# in other parameters; including them here would result in
# inconsistent task identifiers.
#
# WRONG:
# oracle://username:password@localhost:5555/database_sid
#
# RIGHT:
account=oracle://grouse_etl_1@dbhost2/database_sid
passkey=GROUSE_ETL_1_ON_DBHOST2
ssh_tunnel=localhost:4768
# 4678 = GROU on phone keypad. Salt to taste.
# To access ssh tunnels from docker containers, the
# `kingsquare/tunnel` docker image is handy (though not on a mac :-/):
#
# docker run --rm --name lsnr -v $SSH_AUTH_SOCK:/ssh-agent -t kingsquare/tunnel \
# *:4768:localhost:1521 USERNAME@DBHOST
#
# Then use...
# ssh_tunnel=lsnr:4768
#
# .. and add a `--link` when running:
#
# docker run --rm --link=lsnr ... -t grouse-etl ...
[CMSExtract]
# What schema is the CMS RIF data stored in?
cms_rif=CMS_DEID
# When was it downloaded?
# e.g. suppose a jenkins download job was http://.../job/cms_syn_dl/8
# and it finished Jul 30, 2015 8:54:25 AM. The we have:
download_date=1487378515445
# In order to get build dates from jenkins to luigi, i.e.
# from groovy to python, we use integers, since date interchange
# is a pain.
#
# Using the Jenkins API http://javadoc.jenkins.io/
# A bit of groovy like this gets what we need:
# dlBuild?.timestamp.getTimeInMillis() + dlBuild?.duration
# Split work into how many chunks by bene_id?
# bene_chunks = 64
[I2B2ProjectCreate]
# Where did we (or should we) create an i2b2 project?
# ref i2b2 sources:
# crc_create_datamart_oracle.sql
# crc_create_uploader_oracle.sql
star_schema = GROUSEDATA
# And what i2b2 project_id?
project_id = GROUSE
# TODO: parameter to avoid overlap with 2011-2013 medpar
# for sq_up_encdim_encounternum start value.
[resources]
encounter_mapping=1
patient_mapping=1
# Toward scalable i2p with spark
[JDBC4ETL]
db_url = jdbc:oracle:thin:@dbhost2:1521:database_sid
user = grouse_etl_1
passkey = GROUSE_ETL_1_ON_DBHOST2
[spark]
# from ${SPARK_HOME}/bin/pyspark:
#
# export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
# export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:$PYTHONPATH"
spark-submit = .../spark-2.2.1-bin-hadoop2.7/bin/spark-submit
master = local[*]
jars = .../instantclient_11_2/ojdbc6.jar