-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjson2datalad.sh
executable file
·114 lines (99 loc) · 3.32 KB
/
json2datalad.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env bash
###
# CDI = "cryo data ingest"
###
# Expected JSON format
#
# [
# {
# "local_path": "path/to/file1.ext",
# "link": "https://url.nsidc.org/path/to/file1.ext"
# },
# {
# "local_path": "path/to/file2.ext",
# "link": "https://url.nsidc.org/path/to/file2.ext"
# },
# ]
#
set -o nounset
set -o pipefail
set -o errexit
# set -o errtrace
PROGNAME=$(basename $0)
red='\033[0;31m'; orange='\033[0;33m'; green='\033[0;32m'; yellow='\033[0;93m'; nc='\033[0m' # No Color
log_info() { echo -e "${green}[$(date --iso-8601=seconds)] [INFO] [${PROGNAME}] ${@}${nc}"; }
log_warn() { echo -e "${orange}[$(date --iso-8601=seconds)] [WARN] [${PROGNAME}] ${@}${nc}"; }
log_err() { echo -e "${red}[$(date --iso-8601=seconds)] [ERR] [${PROGNAME}] ${@}${nc}" >&2; }
log_debug() { if [[ ${debug:-} == 1 ]]; then echo -e "${yellow}[$(date --iso-8601=seconds)] [DEBUG] [${PROGNAME}] ${@}${nc}"; fi }
err_exit() { echo -e "${red}[$(date --iso-8601=seconds)] [ERR] [${PROGNAME}] ${@:-"Unknown Error"}${nc}" >&2; exit 1; }
trap ctrl_c INT # trap ctrl-c and call ctrl_c()
function ctrl_c() {
log_warn "Caught CTRL-C"
log_warn "Killing process"
log_warn "No cleanup done..."
kill -term $$ # send this program a terminate signal
}
function print_usage() { cat <<EOF
csv2datalad.sh -j jsonfile -d datalad_dir [-h|--help] [--debug] [-v]
-j|--jsonfile: Input JSON file [keys: local_path & link]
-d|--datalad_dir: Directory where to build dataset
-v|--verbose: Print verbose messages during processing
-h|--help: Print this help
--debug: Print debugging messages
EOF
}
# check dependencies
function check_shell() {
if [[ $(which datalad) == "" ]]; then
err_exit "datalad command not found"
fi
}
# parse CLI args
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
-h|--help)
print_usage; exit 1;;
-j|--jsonfile)
jsonfile="$2"
shift # past argument
shift # past value
;;
-d|--datalad_dir)
datalad_dir="$2"; shift; shift;;
--debug)
debug=1; shift;;
-v|--verbose)
verbose=1; set -o xtrace; shift;;
*) # unknown option
positional+=("$1") # save it in an array for later.
shift;;
esac
done
### Check inputs, set up environment
if [[ -z ${jsonfile:-} ]]; then print_usage; err_exit "-j not set"; else log_debug "JSON file:: ${jsonfile}"; fi
if [[ -z ${datalad_dir:-} ]]; then print_usage; err_exit "-d not set"; else log_debug "DATALAD dir: ${datalad_dir}"; fi
# download a dataset into a local datalad repository
function cdi_download() {
log_info "Running datalad addurls (DRYRUN)..."
datalad addurls -d ${datalad_dir} -n --fast --nosave ${jsonfile} '{link}' '{local_path}'
log_info "Running datalad addurls..."
datalad addurls -d ${datalad_dir} --fast --nosave ${jsonfile} '{link}' '{local_path}'
log_info "Running datalad save..."
datalad save ${datalad_dir} -m "Created ${datalad_dir}"
}
# Create a GitHub remote and push a local datalad repository to it
function cdi_set_remote() {
log_info "Creating GitHub repository"
gh repo create \
cryo-data/${datalad_dir} \
-d "${datalad_dir}" \
--public \
-s ${datalad_dir}
# undo: gh repo delete cryo-data/${datalad_dir}
log_info "Pushing to GitHub"
(cd ${datalad_dir}; git push -u origin main)
(cd ${datalad_dir}; datalad push)
}
cdi_download
cdi_set_remote