From 5b37230bcd48f0002129bb1371db6e6152f86e4d Mon Sep 17 00:00:00 2001
From: pichuan <pichuan@google.com>
Date: Wed, 1 Jun 2022 22:12:01 -0700
Subject: [PATCH] Update 2 remaining docs for v1.4.0 release.

PiperOrigin-RevId: 452458249
---
 docs/deepvariant-training-case-study.md | 59 ++++++++++++++-----------
 docs/metrics-deeptrio.md                | 24 +++++-----
 2 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/docs/deepvariant-training-case-study.md b/docs/deepvariant-training-case-study.md
index e176df35..2f86d678 100644
--- a/docs/deepvariant-training-case-study.md
+++ b/docs/deepvariant-training-case-study.md
@@ -20,8 +20,8 @@ We demonstrated that by training on 1 replicate of BGISEQ-500 whole genome data
 (everything except for chromosome 20-22), we can significantly improve the
 accuracy comparing to the WGS model as a baseline:
 
-* Indel F1 :93.4908% --> 98.0862%;
-*   SNP F1: 99.8838% --> 99.8943%.
+* Indel F1 :93.4908% --> 98.1305%;
+*   SNP F1: 99.8838% --> 99.9011%.
 
 Training for 50,000 steps took about 1.5 hours on 1 GPU. Currently we cannot
 train on multiple GPUs.
@@ -40,7 +40,7 @@ YOUR_PROJECT=REPLACE_WITH_YOUR_PROJECT
 OUTPUT_GCS_BUCKET=REPLACE_WITH_YOUR_GCS_BUCKET
 
 BUCKET="gs://deepvariant"
-BIN_VERSION="1.3.0"
+BIN_VERSION="1.4.0"
 
 MODEL_BUCKET="${BUCKET}/models/DeepVariant/${BIN_VERSION}/DeepVariant-inception_v3-${BIN_VERSION}+data-wgs_standard"
 GCS_PRETRAINED_WGS_MODEL="${MODEL_BUCKET}/model.ckpt"
@@ -94,7 +94,7 @@ gunzip "${DATA_DIR}/ucsc_hg19.fa.gz"
 ```
 sudo apt -y update
 sudo apt -y install parallel
-curl -O https://raw.githubusercontent.com/google/deepvariant/r1.3/scripts/install_nvidia_docker.sh
+curl -O https://raw.githubusercontent.com/google/deepvariant/r1.4/scripts/install_nvidia_docker.sh
 bash -x install_nvidia_docker.sh
 ```
 
@@ -155,8 +155,12 @@ Starting in v1.4.0, we added an extra channel in our WGS setting using the
 
 ```
 $ cat "${OUTPUT_DIR}/training_set.with_label.tfrecord-00000-of-00016.gz.example_info.json"
-{"version": "1.3.0", "shape": [100, 221, 7], "channels": [1, 2, 3, 4, 5, 6, 19]}
-````
+{"version": "1.4.0", "shape": [100, 221, 7], "channels": [1, 2, 3, 4, 5, 6, 19]}
+```
+
+Depending on your data type, you might want to tweak the flags for the
+`make_examples` step, which can result in different shape of the output
+examples.
 
 We will want to shuffle this on Dataflow later, so we copy the data to GCS
 bucket first:
@@ -230,7 +234,7 @@ Then, get the code that shuffles:
 
 ```
 mkdir -p ${SHUFFLE_SCRIPT_DIR}
-wget https://raw.githubusercontent.com/google/deepvariant/r1.3/tools/shuffle_tfrecords_beam.py -O ${SHUFFLE_SCRIPT_DIR}/shuffle_tfrecords_beam.py
+wget https://raw.githubusercontent.com/google/deepvariant/r1.4/tools/shuffle_tfrecords_beam.py -O ${SHUFFLE_SCRIPT_DIR}/shuffle_tfrecords_beam.py
 ```
 
 Next, we shuffle the data using DataflowRunner. Before that, please make sure
@@ -281,13 +285,13 @@ In the output, the `tfrecord_path` should be valid paths in gs://.
 
 ```
 # Generated by shuffle_tfrecords_beam.py
+# class2: 124564
+# class1: 173668
+# class0: 44526
 #
 # --input_pattern_list=OUTPUT_BUCKET/training_set.with_label.tfrecord-?????-of-00016.gz
 # --output_pattern_prefix=OUTPUT_BUCKET/training_set.with_label.shuffled
 #
-# class2: 124564
-# class1: 173668
-# class0: 44526
 
 name: "HG001"
 tfrecord_path: "OUTPUT_GCS_BUCKET/training_set.with_label.shuffled-?????-of-?????.tfrecord.gz"
@@ -319,13 +323,13 @@ cat "${OUTPUT_DIR}/validation_set.dataset_config.pbtxt"
 
 ```
 # Generated by shuffle_tfrecords_beam.py
+# class0: 5595
+# class1: 31852
+# class2: 21954
 #
 # --input_pattern_list=OUTPUT_DIR/validation_set.with_label.tfrecord-?????-of-00016.gz
 # --output_pattern_prefix=OUTPUT_DIR/validation_set.with_label.shuffled
 #
-# class2: 21954
-# class1: 31852
-# class0: 5595
 
 name: "HG001"
 tfrecord_path: "OUTPUT_DIR/validation_set.with_label.shuffled-?????-of-?????.tfrecord.gz"
@@ -425,7 +429,7 @@ gsutil cat "${TRAINING_DIR}"/best_checkpoint.txt
 ```
 
 In my run, this showed that the model checkpoint that performs the best on the
-validation set was `${TRAINING_DIR}/model.ckpt-50000`.
+validation set was `${TRAINING_DIR}/model.ckpt-33739`.
 
 It's possible that training more steps can result in better accuracy. For now
 let's use this model to do the final evaluation on the test set and see how we
@@ -437,7 +441,7 @@ sudo docker run --gpus 1 \
   google/deepvariant:"${BIN_VERSION}-gpu" \
   /opt/deepvariant/bin/run_deepvariant \
   --model_type WGS \
-  --customized_model "${TRAINING_DIR}/model.ckpt-50000" \
+  --customized_model "${TRAINING_DIR}/model.ckpt-33739" \
   --ref "${REF}" \
   --reads "${BAM_CHR20}" \
   --regions "chr20" \
@@ -445,9 +449,9 @@ sudo docker run --gpus 1 \
   --num_shards=${N_SHARDS}
 ```
 
-Note that in v1.4.0, by using `--model_type WGS`, it will automatically add
-`insert_size` as an extra channel. So we don't need to add it in
-`--make_examples_extra_args`.
+In v1.4.0, by using `--model_type WGS`, `run_deepvariant` will
+automatically add `insert_size` as an extra channel in the `make_examples` step.
+So we don't need to add it in `--make_examples_extra_args`.
 
 Once this is done, we have the final callset in VCF
 format here: `${OUTPUT_DIR}/test_set.vcf.gz`. Next step is to run `hap.py` to
@@ -475,24 +479,25 @@ The output of `hap.py` is here:
 ```
 [I] Total VCF records:         3775119
 [I] Non-reference VCF records: 3775119
-[W] overlapping records at chr20:11311221 for sample 0
-[W] Variants that overlap on the reference allele: 1
+[W] overlapping records at chr20:35754687 for sample 0
+[W] Variants that overlap on the reference allele: 3
 [I] Total VCF records:         132914
-[I] Non-reference VCF records: 96625
+[I] Non-reference VCF records: 96580
+2022-06-02 00:36:08,582 WARNING  Creating template for vcfeval. You can speed this up by supplying a SDF template that corresponds to /home/pichuan_google_com/training-case-study/input/data/ucsc_hg19.fa
 Benchmarking Summary:
 Type Filter  TRUTH.TOTAL  TRUTH.TP  TRUTH.FN  QUERY.TOTAL  QUERY.FP  QUERY.UNK  FP.gt  FP.al  METRIC.Recall  METRIC.Precision  METRIC.Frac_NA  METRIC.F1_Score  TRUTH.TOTAL.TiTv_ratio  QUERY.TOTAL.TiTv_ratio  TRUTH.TOTAL.het_hom_ratio  QUERY.TOTAL.het_hom_ratio
-INDEL    ALL        10023      9801       222        19307       167       8940    109     34       0.977851          0.983891        0.463044         0.980862                     NaN                     NaN                   1.547658                   2.058546
-INDEL   PASS        10023      9801       222        19307       167       8940    109     34       0.977851          0.983891        0.463044         0.980862                     NaN                     NaN                   1.547658                   2.058546
-  SNP    ALL        66237     66156        81        78318        59      12065     16      2       0.998777          0.999109        0.154051         0.998943                2.284397                2.199583                   1.700387                   1.797428
-  SNP   PASS        66237     66156        81        78318        59      12065     16      2       0.998777          0.999109        0.154051         0.998943                2.284397                2.199583                   1.700387                   1.797428
+INDEL    ALL        10023      9806       217        19266       163       8898    107     33       0.978350          0.984279        0.461850         0.981305                     NaN                     NaN                   1.547658                   2.046311
+INDEL   PASS        10023      9806       217        19266       163       8898    107     33       0.978350          0.984279        0.461850         0.981305                     NaN                     NaN                   1.547658                   2.046311
+  SNP    ALL        66237     66160        77        78315        54      12065     15      4       0.998838          0.999185        0.154057         0.999011                2.284397                2.200204                   1.700387                   1.798656
+  SNP   PASS        66237     66160        77        78315        54      12065     15      4       0.998838          0.999185        0.154057         0.999011                2.284397                2.200204                   1.700387                   1.798656
 ```
 
 To summarize, the accuracy is:
 
 Type  | # FN | # FP | Recall   | Precision | F1\_Score
 ----- | ---- | ---- | -------- | --------- | ---------
-INDEL | 222  | 167  | 0.977851 | 0.983891  | 0.980862
-SNP   | 81   | 59   | 0.998777 | 0.999109  | 0.998943
+INDEL | 217  | 163  | 0.978350 | 0.984279  | 0.981305
+SNP   | 77   | 54   | 0.998838 | 0.999185  | 0.999011
 
 The baseline we're comparing to is to directly use the WGS model to make the
 calls, using this command:
diff --git a/docs/metrics-deeptrio.md b/docs/metrics-deeptrio.md
index 087ad9e8..03d2dacd 100644
--- a/docs/metrics-deeptrio.md
+++ b/docs/metrics-deeptrio.md
@@ -8,12 +8,12 @@ Runtime is on HG002/HG003/HG004 (all chromosomes).
 
 Stage                            | Wall time (minutes)
 -------------------------------- | -----------------
-make_examples                    | ~501m
-call_variants for HG002          | ~348m
-call_variants for HG003          | ~344m
-call_variants for HG004          | ~344m
-postprocess_variants (parallel)  | ~67m
-total                            | ~1604m = ~26.73 hours
+make_examples                    | ~508m
+call_variants for HG002          | ~341m
+call_variants for HG003          | ~349m
+call_variants for HG004          | ~347m
+postprocess_variants (parallel)  | ~65m
+total                            | ~1610m = ~26.83 hours
 
 ### Accuracy
 
@@ -49,12 +49,12 @@ Runtime is on HG002/HG003/HG004 (all chromosomes).
 
 Stage                            | Wall time (minutes)
 -------------------------------- | -------------------
-make_examples                    | ~798m
-call_variants for HG002          | ~261m
-call_variants for HG003          | ~264m
-call_variants for HG004          | ~263m
+make_examples                    | ~829m
+call_variants for HG002          | ~263m
+call_variants for HG003          | ~266m
+call_variants for HG004          | ~268m
 postprocess_variants (parallel)  | ~78m
-total                            | ~1664m = ~27.73 hours
+total                            | ~1704m = ~28.4 hours
 
 ### Accuracy
 
@@ -137,7 +137,7 @@ Use `gcloud compute ssh` to log in to the newly created instance.
 Download and run any of the following case study scripts:
 
 ```
-curl -O https://raw.githubusercontent.com/google/deepvariant/r1.3/scripts/inference_deeptrio.sh
+curl -O https://raw.githubusercontent.com/google/deepvariant/r1.4/scripts/inference_deeptrio.sh
 
 # WGS
 bash inference_deeptrio.sh --model_preset WGS