From 77c543183d8c4908fc3802b0e7ff74854972ceb9 Mon Sep 17 00:00:00 2001
From: emrecam <cam.emre090@gmail.com>
Date: Tue, 10 Sep 2024 15:24:41 +0300
Subject: [PATCH] :wrench: Test(FileDataSourceReader): Add tests for
 FileDataSourceReader class

---
 .../folder-test/csv/patient1.csv              |   5 +
 .../folder-test/csv/patient2.csv              |   5 +
 .../folder-test/csv/patient3.csv              |   5 +
 .../folder-test/json/patient1.json            |   3 +
 .../folder-test/json/patient2.json            |   3 +
 .../folder-test/json/patient3.json            |   3 +
 .../folder-test/parquet/patients1.parquet     | Bin 0 -> 1091 bytes
 .../folder-test/parquet/patients2.parquet     | Bin 0 -> 1114 bytes
 .../folder-test/parquet/patients3.parquet     | Bin 0 -> 1112 bytes
 .../folder-test/txt-csv/patient1.txt          |   4 +
 .../folder-test/txt-csv/patient2.txt          |   4 +
 .../folder-test/txt-csv/patient3.txt          |   4 +
 .../folder-test/txt-ndjson/patient1.txt       |   3 +
 .../folder-test/txt-ndjson/patient2.txt       |   3 +
 .../folder-test/txt-ndjson/patient3.txt       |   3 +
 .../single-file-test/patients-ndjson.txt      |  11 +
 .../single-file-test/patients.csv             |  12 +
 .../single-file-test/patients.json            |  11 +
 .../single-file-test/patients.parquet         | Bin 0 -> 2065 bytes
 .../single-file-test/patients.tsv             |  11 +
 .../single-file-test/patients.txt             |  12 +
 .../data/read/FileDataSourceReaderTest.scala  | 471 ++++++++++++++++++
 22 files changed, 573 insertions(+)
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/csv/patient1.csv
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/csv/patient2.csv
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/csv/patient3.csv
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/json/patient1.json
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/json/patient2.json
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/json/patient3.json
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/parquet/patients1.parquet
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/parquet/patients2.parquet
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/parquet/patients3.parquet
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-csv/patient1.txt
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-csv/patient2.txt
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-csv/patient3.txt
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-ndjson/patient1.txt
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-ndjson/patient2.txt
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-ndjson/patient3.txt
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients-ndjson.txt
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.csv
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.json
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.parquet
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.tsv
 create mode 100644 tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.txt
 create mode 100644 tofhir-engine/src/test/scala/io/tofhir/test/engine/data/read/FileDataSourceReaderTest.scala

diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/csv/patient1.csv b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/csv/patient1.csv
new file mode 100644
index 00000000..2bf60499
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/csv/patient1.csv
@@ -0,0 +1,5 @@
+! This line is a comment to test spark options
+pid,gender,birthDate
+p1,male,2000-05-10
+p2,male,1985-05-08
+p3,male,1997-02
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/csv/patient2.csv b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/csv/patient2.csv
new file mode 100644
index 00000000..cf0b2bda
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/csv/patient2.csv
@@ -0,0 +1,5 @@
+! This line is a comment to test spark options
+pid,gender,birthDate
+p4,male,1999-06-05
+p5,male,1965-10-01
+p6,female,1991-03
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/csv/patient3.csv b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/csv/patient3.csv
new file mode 100644
index 00000000..bde9dd4c
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/csv/patient3.csv
@@ -0,0 +1,5 @@
+! This line is a comment to test spark options
+pid,gender,birthDate
+p7,female,1972-10-25
+p8,female,2010-01-10
+p9,female,1999-05-12
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/json/patient1.json b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/json/patient1.json
new file mode 100644
index 00000000..28e95146
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/json/patient1.json
@@ -0,0 +1,3 @@
+{"pid": "p1", "gender": "male", "birthDate": "2000-05-10"},
+{"pid": "p2", "gender": "male", "birthDate": "1985-05-08"},
+{"pid": "p3", "gender": "male", "birthDate": "1997-02"}
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/json/patient2.json b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/json/patient2.json
new file mode 100644
index 00000000..e9aeb610
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/json/patient2.json
@@ -0,0 +1,3 @@
+{"pid": "p4", "gender": "male", "birthDate": "1999-06-05"},
+{"pid": "p5", "gender": "male", "birthDate": "1965-10-01"},
+{"pid": "p6", "gender": "female", "birthDate": "1991-03"}
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/json/patient3.json b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/json/patient3.json
new file mode 100644
index 00000000..6b5b9e9a
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/json/patient3.json
@@ -0,0 +1,3 @@
+{"pid": "p7", "gender": "female", "birthDate": "1972-10-25"},
+{"pid": "p8", "gender": "female", "birthDate": "2010-01-10"},
+{"pid": "p9", "gender": "female", "birthDate": "1999-05-12"}
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/parquet/patients1.parquet b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/parquet/patients1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..502c56472084891fdb402030faeeb16bbffb735b
GIT binary patch
literal 1091
zcmaJ>L2nX46drcBTO=INbS68=9(tg2D!5RIV61VV0#=G;leUI~2I+=UTUZw=@zQwm
z2Y4|40)K%=e}E?s9{mme0Mqo~djn;c8lAwK`QE(uz4_itc2~MNYS1)IH>gR;EkZOx
zh@Vq7uj~dTG*08|aG!#|X+P8a4NLQL5UA;CLX*5e%oMkDzkf&xGub&i7Y_xA1VV`v
zO*N>dN*IuN+Bxxn*q}y)*g&tPEQAnqPHpzQ!J!u@_5mA+b!1<N2Bs!$UWrIRcHVKE
zj8n+u97MTNv4F@aE-O^Jn{o1gDNBS&Rw7J+j0L4PUiO3W(N<>+5<u^_82rRBzUiL?
z{HDgSX=tFMoDrg2N(7&3j<_SmxfLOuh1jf49{>g6HoJ(?^6Yr2)8}}k8gJB-ogf%a
zUW~d&-f4%LFsC)5X0%2cHU=S{H8$!B#m&6T<Tk%A-pB0uJQhY>7xBob*)Nl>updiQ
z5b;$DQ?8HjA|}4at__jBTc(WtUZPndei~tJ_@qd`FLm)j|4+x+M<n}f(kfwJbY_Y;
zY%AC@D=A&~>A}5eYphS&ZCu5kgw;X=>xq;TS?S4bbipoVS@{71mhv$oa9!<ycF;3H
ze-n+Os*I4js<67kQ*}#WG?gyiSFmbr+wTtCp<AmuZN$xExGC;oR2aX9MW$;<-PaJb
z^ZRG*+WuQOwq&^_AE}7q?8&_;E-QWW{IaWplmQ)(qNJ-*-P*}3DK|HX#r;n3`pg?=
bg3;<5FBtWQgVj5wtdn(!2VaL>_<Z~UG+@_-

literal 0
HcmV?d00001

diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/parquet/patients2.parquet b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/parquet/patients2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..154a62b8a9d558807733fd16e6a55db5b780bdc3
GIT binary patch
literal 1114
zcmaJ>KX21O6hAxW5~>UZ^-i|rp+o8k7pEjqf+*sWl7y0m#!^d_0Tj}Zrj*1uDG^IY
z)Tsj#V(7>xVB`ZZppJYJMh4zHJF(jePRYCX|98LlByS5h%QTi@nFgbb+$KaNgm^hA
z^HLTVVJVin1Mx-pQ*%%C3cBj$AW=<Z2umAzLKh7^;SKf~;gp~76YG&hVu>>dO+si;
z`L=V^qj10Op#~7pIR>GjSEoS~(v*ZY$D#e+aKGnEs8fZXDgoG)5`b*7=8V+>^vIS<
zCEF?3PCl>-c{}UaPBt0DvbJ;Y8r6lUNC$mZ0u`62-+VRjCkGpy2{3^H{7~R$9pVW7
z*}%A^8aPE1Fk*ufB3&V8LzjGFff&4jZ<&z^Q%qiG51|P$On#=adZ?aj>;<Y!H5&DF
z$M;9mm*egMOr23!-3pOwTOkb-lh6*w8+S$YLNBL=X*{q#D&~AEm$qNGQnBsxPf51K
zf8Sshv3{vh$@N8^sn&&J?p;|W|C(f+f1zxRSbwyrK6+Mc>gSsEE%BeZ^Y7T~CuLQ_
zFA|(uDY%w!Y<gO_?z89jgYoE})w_6#eF3M10?tz*rlQgpo9KdDh_dv93APYV&;r-&
z25SpFQ}j1c$X1mxwysKBhES_+NE=0^i}4Dg);7KF&>gw8s?$chc?gl>4rWF9I|ve-
zEg7$&(O%m<Zr64{fZ7t}mUt{Pis6&{fnApT=JjP)CP@K2phQxaP~F<m8zI)$iEelu
g|J`wKV*BIe_dS0+7!8+mrB!FuAp`hmY{OUOFBRwAJpcdz

literal 0
HcmV?d00001

diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/parquet/patients3.parquet b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/parquet/patients3.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..44f7f5338ba8d116ac882bd61306dfab9b3bfefc
GIT binary patch
literal 1112
zcmaJ>Pj3=I6d#s#iiATo%w#v=&;y;5)P;z!#u^7IAktbk{bM+2kggQ1ur5?$4?TMH
z;GyyC!O!3)Fg^8?c=X)(-mrAHR-M3`_j`YSZ{B;!Zb~Q5be3h=7Nd;ZBt#>GcqJvv
zO4^LD1WVik|04Wp=9%W%hUS$ZP>W{?OPNJN7fmDX4fYt}lpphB@lcRRAe2b6bc<=K
z1cl63?%VEx3)DK(Bh++eH-Z}i3B4#l`|fbh^;L}{ek3fcaU}y7x7gOaog^^i?NT9^
zw{nFdqJjmgl?TJjXWKcen9CQgw3!Sejbz9H6qnk*{&L_?_Sd=-=mwfUi@{HL__Ti#
zm^U;NCx`;ZYjQ%ADd@$Vw;ypu9$*>~;vgi$8|(ql5HrKiW2_mdCpvqMD$~qXBh~f&
z(e%Z*w-2*t6c)Ek)Y6toD}zCZgBeFnVS|B{Q!``U7awDpi`87(eM2N7yXT)4SepO5
z#sm?kTA1?ZHqT??EcTau@~;bw^UsvsCE}MJ=7-OU%>6_c-{Sw7JpYblr<BzQ{}Ja@
zB;ZQp*o>5PoTt0@X5%rwXt(i{`w~tY1)L{RPGz+(n`nYt%8IfB1T5uaL|~fVfY#A6
zMf(N{MO7UmbxmRALanx@Fp6pq{Z*`5Z+g9<Gji%RtAlvsHF(O~7!}5EW0BceSN=MJ
zPGRS;Q{VXjYFk#?@{x)t2T$(La7FcZkza9CkP^@bq^Q<asb2lyjg+ga#4x?C|L)M8
c<oxl{d)FTiM#H5$_KLM)kpX-%HsPD{8}EGIEC2ui

literal 0
HcmV?d00001

diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-csv/patient1.txt b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-csv/patient1.txt
new file mode 100644
index 00000000..5cf9bd66
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-csv/patient1.txt
@@ -0,0 +1,4 @@
+pid,gender,birthDate
+p1,male,2000-05-10
+p2,male,1985-05-08
+p3,male,1997-02
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-csv/patient2.txt b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-csv/patient2.txt
new file mode 100644
index 00000000..5a9ac92e
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-csv/patient2.txt
@@ -0,0 +1,4 @@
+pid,gender,birthDate
+p4,male,1999-06-05
+p5,male,1965-10-01
+p6,female,1991-03
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-csv/patient3.txt b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-csv/patient3.txt
new file mode 100644
index 00000000..7b84ae4a
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-csv/patient3.txt
@@ -0,0 +1,4 @@
+pid,gender,birthDate
+p7,female,1972-10-25
+p8,female,2010-01-10
+p9,female,1999-05-12
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-ndjson/patient1.txt b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-ndjson/patient1.txt
new file mode 100644
index 00000000..735f6d42
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-ndjson/patient1.txt
@@ -0,0 +1,3 @@
+{"pid": "p1", "gender": "male", "birthDate": "2000-05-10"}
+{"pid": "p2", "gender": "male", "birthDate": "1985-05-08"}
+{"pid": "p3", "gender": "male", "birthDate": "1997-02"}
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-ndjson/patient2.txt b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-ndjson/patient2.txt
new file mode 100644
index 00000000..ef95e890
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-ndjson/patient2.txt
@@ -0,0 +1,3 @@
+{"pid": "p4", "gender": "male", "birthDate": "1999-06-05"}
+{"pid": "p5", "gender": "male", "birthDate": "1965-10-01"}
+{"pid": "p6", "gender": "female", "birthDate": "1991-03"}
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-ndjson/patient3.txt b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-ndjson/patient3.txt
new file mode 100644
index 00000000..258e6200
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/folder-test/txt-ndjson/patient3.txt
@@ -0,0 +1,3 @@
+{"pid": "p7", "gender": "female", "birthDate": "1972-10-25"}
+{"pid": "p8", "gender": "female", "birthDate": "2010-01-10"}
+{"pid": "p9", "gender": "female", "birthDate": "1999-05-12"}
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients-ndjson.txt b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients-ndjson.txt
new file mode 100644
index 00000000..1a650ded
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients-ndjson.txt
@@ -0,0 +1,11 @@
+{ "pid": "p1", "gender": "male", "birthDate": "2000-05-10", "deceasedDateTime": null, "homePostalCode": null }
+{ "pid": "p2", "gender": "male", "birthDate": "1985-05-08", "deceasedDateTime": "2017-03-10", "homePostalCode": "G02547" }
+{ "pid": "p3", "gender": "male", "birthDate": "1997-02", "deceasedDateTime": null, "homePostalCode": null }
+{ "pid": "p4", "gender": "male", "birthDate": "1999-06-05", "deceasedDateTime": null, "homePostalCode": "H10564" }
+{ "pid": "p5", "gender": "male", "birthDate": "1965-10-01", "deceasedDateTime": "2019-04-21", "homePostalCode": "G02547" }
+{ "pid": "p6", "gender": "female", "birthDate": "1991-03", "deceasedDateTime": null, "homePostalCode": null }
+/* This line is a comment to test spark options */
+{ "pid": "p7", "gender": "female", "birthDate": "1972-10-25", "deceasedDateTime": null, "homePostalCode": "V13135" }
+{ "pid": "p8", "gender": "female", "birthDate": "2010-01-10", "deceasedDateTime": null, "homePostalCode": "Z54564" }
+{ "pid": "p9", "gender": "female", "birthDate": "1999-05-12", "deceasedDateTime": null, "homePostalCode": null }
+{ "pid": "p10", "gender": "female", "birthDate": "2003-11", "deceasedDateTime": null, "homePostalCode": null }
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.csv b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.csv
new file mode 100644
index 00000000..dc6b243e
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.csv
@@ -0,0 +1,12 @@
+! This file has trailing spaces for testing purposes
+pid,gender,birthDate,deceasedDateTime   ,homePostalCode
+p1  ,male,2000-05-10,,
+p2,male,1985-05-08,2017-03-10,G02547  
+p3,male,1997-02,,
+p4,male,1999-06-05,,H10564
+p5,male,1965-10-01,2019-04-21,G02547
+p6,female  ,1991-03,,
+p7  ,female,1972-10-25,,V13135
+p8,female,2010-01-10,,Z54564
+p9,female,1999-05-12,,
+p10,female,2003-11  ,,
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.json b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.json
new file mode 100644
index 00000000..b3741068
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.json
@@ -0,0 +1,11 @@
+{ "pid": "p1", "gender": "male", "birthDate": "2000-05-10", "deceasedDateTime": null, "homePostalCode": null },
+{ "pid": "p2", "gender": "male", "birthDate": "1985-05-08", "deceasedDateTime": "2017-03-10", "homePostalCode": "G02547" },
+{ "pid": "p3", "gender": "male", "birthDate": "1997-02", "deceasedDateTime": null, "homePostalCode": null },
+{ "pid": "p4", "gender": "male", "birthDate": "1999-06-05", "deceasedDateTime": null, "homePostalCode": "H10564" },
+{ "pid": "p5", "gender": "male", "birthDate": "1965-10-01", "deceasedDateTime": "2019-04-21", "homePostalCode": "G02547" },
+{ "pid": "p6", "gender": "female", "birthDate": "1991-03", "deceasedDateTime": null, "homePostalCode": null },
+/* This line is a comment to test spark options */
+{ "pid": "p7", "gender": "female", "birthDate": "1972-10-25", "deceasedDateTime": null, "homePostalCode": "V13135" },
+{ "pid": "p8", "gender": "female", "birthDate": "2010-01-10", "deceasedDateTime": null, "homePostalCode": "Z54564" },
+{ "pid": "p9", "gender": "female", "birthDate": "1999-05-12", "deceasedDateTime": null, "homePostalCode": null },
+{ "pid": "p10", "gender": "female", "birthDate": "2003-11", "deceasedDateTime": null, "homePostalCode": null }
\ No newline at end of file
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.parquet b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d2b5a882fa214ed0298f9fa4bb0a415aa3058b2b
GIT binary patch
literal 2065
zcmaJ@J#5=X6h2ZiBP&j#2D$?YXvmNd8LH$+{a7qepeM_g?YfC1*h(w}NM(tQl*pDU
zsaBD>D2fb4(V-|hcq}q?%+w(WGG^$|sp#OTC^8qBJoLRIC9;-lf_nGv-h1Es?%g{=
z6;ZQjls=_Tw`h`*tAwb8kbxzoEoDZ^tduz^^HMHLS&-6+$S6(;pd{Z1k)RsYQuoz?
zqpJf8tkH#py?yv2XmrWQ5iMcpkwLdj2}`nL1zz(9#U!B!pmm*I-+}>YMIms%*>g1*
zKXXxoTrnC8<%;5}u6b5)Qx=XRvs~IE_n_?#Wx8hyd#0ElVYX8-knC#}-f1!70Aq8`
z(>#~9IL<7mki!`-T#z$oIn8a);5ccX2b(v`V?>%;zMGZi8PBk6qrsMLHkUp#Y=nR~
z8-pNkCT&@BC5;C`dfalRc_8A95-;v`hvQDEIfimUp}&j3YyPa_*8dj=icp}1Y2lGH
zWQ1)38(9LF0nKdMp5vBJ+(cC0s-y150SLCp!d0R~)Y+jV+HT8jj@&j9-0tpsBws1)
zm11rK$Mf)jWrbF@pa-x$ur12m&SjTTTeo;FpGB>1WvomNwMH%rYfw!_S<p23_)X2T
z=$aP~u9E2=M2&nE^v~r^f8VY2N8@I1rQi05zERj4#q1%`GhmGOR0De-g+qzK2$4&E
zgBrT*y^tYvn1-w!whz=i>=IPebD154!4f{aw+%!0_l41MVx-uSLN|c}s_)d9`dXup
z{ne8gJ@D1zi}X`escQ7rmzu+2fAVnD>cG9Dak#%Xh`hTu$W{uSaPHr)@KVrw!K$jy
zoq5=Js>;5JC5@Ev!2DZD%{u4Qr{hR5W1&w6`|S#4?8PN&67yvwh?@Thyi?5=>fC35
z{e|&=UZOTJf4LCE2RjPgrq`Nz6g}rUu@eOQ_A*@~=G(;pY_1Rz(`){dnE5<*Zu_(E
z6M*`=M18pwpq{Fr#Ps^~_@enw{LD^e&k^gVD>P5oH;XK3#$ZTdr|3(fR=a!e1Mlqh
zUn95i;nkuLq6$BJJ{DpkRvw8>T!K*uwBl2Rxeyzu!qRkv*$(_lBV3olvjp}#B}h_0
zCs9IyPj%-LPXW2|80~j)-Yh%gM!k60%G5f0t9K3?We9_0Ym#49lqO;o{eXw$3+wPq
zq!eOxZH6z3eE_{Joy)b}u3hi6?D~^hd5t&fwZYy!Nk<GT;Pj=0n(--Cr+7jfpndWL
z@QdB%{Y~Cnt##VSNtAXG|G)V~D1T`K?M19#x!v2_uk~7>1D_qgo)v!!8~%J3HxcmP
znVtu%R?$UnMQ(MyIA{p*A<>ONbNKle?l?UhT|0D#qi+A;TGqM6Z*lS#K6(}SGw?6z
CBbLMf

literal 0
HcmV?d00001

diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.tsv b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.tsv
new file mode 100644
index 00000000..987ba87d
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.tsv
@@ -0,0 +1,11 @@
+pid	gender	birthDate	deceasedDateTime	homePostalCode
+p1	male	2000-05-10		
+p2	male	1985-05-08	2017-03-10	G02547
+p3	male	1997-02		
+p4	male	1999-06-05		H10564
+p5	male	1965-10-01	2019-04-21	G02547
+p6	female	1991-03		
+p7	female	1972-10-25		V13135
+p8	female	2010-01-10		Z54564
+p9	female	1999-05-12		
+p10	female	2003-11		
diff --git a/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.txt b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.txt
new file mode 100644
index 00000000..dc6b243e
--- /dev/null
+++ b/tofhir-engine/src/test/resources/file-data-source-reader-test-data/single-file-test/patients.txt
@@ -0,0 +1,12 @@
+! This file has trailing spaces for testing purposes
+pid,gender,birthDate,deceasedDateTime   ,homePostalCode
+p1  ,male,2000-05-10,,
+p2,male,1985-05-08,2017-03-10,G02547  
+p3,male,1997-02,,
+p4,male,1999-06-05,,H10564
+p5,male,1965-10-01,2019-04-21,G02547
+p6,female  ,1991-03,,
+p7  ,female,1972-10-25,,V13135
+p8,female,2010-01-10,,Z54564
+p9,female,1999-05-12,,
+p10,female,2003-11  ,,
diff --git a/tofhir-engine/src/test/scala/io/tofhir/test/engine/data/read/FileDataSourceReaderTest.scala b/tofhir-engine/src/test/scala/io/tofhir/test/engine/data/read/FileDataSourceReaderTest.scala
new file mode 100644
index 00000000..a6e2be9f
--- /dev/null
+++ b/tofhir-engine/src/test/scala/io/tofhir/test/engine/data/read/FileDataSourceReaderTest.scala
@@ -0,0 +1,471 @@
+package io.tofhir.test.engine.data.read
+
+import io.tofhir.engine.config.ToFhirConfig
+import io.tofhir.engine.data.read.FileDataSourceReader
+import io.tofhir.engine.model.{FileSystemSource, FileSystemSourceSettings, SourceFileFormats}
+import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.must.Matchers.{contain, include}
+import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper
+
+import java.nio.file.Paths
+import java.sql.Timestamp
+import java.text.SimpleDateFormat
+import scala.language.postfixOps
+
+/**
+ * Unit tests for the FileDataSourceReader class.
+ * Tests the functionality of reading files in different formats.
+ */
+class FileDataSourceReaderTest extends AnyFlatSpec with BeforeAndAfterAll {
+
+  /**
+   * SparkSession used for the test cases.
+   */
+  val sparkSession: SparkSession = ToFhirConfig.sparkSession
+  /**
+   * Path to the directory containing test data for FileDataSourceReader.
+   */
+  val testDataFolderPath: String = Paths.get(getClass.getResource("/file-data-source-reader-test-data").toURI).toAbsolutePath.toString
+  /**
+   * Instance of FileDataSourceReader used to read data files during tests.
+   */
+  val fileDataSourceReader = new FileDataSourceReader(sparkSession)
+  /**
+   * Date format used for parsing and formatting date values in test cases.
+   */
+  val dateFormat: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+  }
+
+  /**
+   * Tests that the FileDataSourceReader correctly handles invalid input by throwing the appropriate exceptions.
+   *
+   * The test case covers two scenarios:
+   * 1. Providing a file path instead of a directory for a streaming job should result in an IllegalArgumentException.
+   * 2. Providing unsupported file formats or extensions should result in a NotImplementedError.
+   *
+   * The following configurations are used for the tests:
+   * - `illegalArgumentSourceBinding`: A source binding with a 'file' path to test the directory requirement for streaming jobs.
+   * - `unsupportedFileFormatSourceBinding`: A source binding with an unsupported file format to test the unsupported format handling.
+   * - `unsupportedExtensionSourceBinding`: A source binding with an unsupported file extension to test the unsupported extension handling.
+   * - `streamJobSourceSettings`: Mapping job source settings configured for a streaming job.
+   * - `batchJobSourceSettings`: Mapping job source settings configured for a batch job.
+   *
+   * The test verifies the following:
+   * 1. An IllegalArgumentException is thrown with the expected message when a file path is provided instead of a directory for a streaming job.
+   * 2. A NotImplementedError is thrown for unsupported file formats and extensions, indicating that these cases are not yet implemented or handled.
+   *
+   */
+  it should "throw IllegalArgumentException, NotImplementedError when necessary" in {
+    // Folder including the test files belong to this test
+    val folderPath = "/single-file-test"
+
+    // Test case 1: Verify that providing a file path instead of a directory throws an IllegalArgumentException
+    val fileName: String = "patients.csv"
+    val illegalArgumentSourceBinding = FileSystemSource(path = fileName)
+    val streamJobSourceSettings = FileSystemSourceSettings(name = "FileDataSourceReaderTest0", sourceUri = "test-uri", dataFolderPath = testDataFolderPath.concat(folderPath), asStream = true)
+    val exception = intercept[IllegalArgumentException] {
+      fileDataSourceReader.read(illegalArgumentSourceBinding, streamJobSourceSettings, Option.empty)
+    }
+    exception.getMessage should include(s"${fileName} is not a directory. For streaming job, you should provide a directory.")
+
+    // Test case 2: Verify that unsupported file formats and extensions throw a NotImplementedError
+    val unsupportedFileFormatSourceBinding = FileSystemSource(path = fileName, fileFormat = Some("UNSUPPORTED"))
+    val unsupportedExtensionSourceBinding = FileSystemSource(path = "patients.UNSUPPORTED")
+    val batchJobSourceSettings = streamJobSourceSettings.copy(asStream = false)
+    assertThrows[NotImplementedError] {
+      fileDataSourceReader.read(unsupportedFileFormatSourceBinding, batchJobSourceSettings, Option.empty)
+    }
+    assertThrows[NotImplementedError] {
+      fileDataSourceReader.read(unsupportedExtensionSourceBinding, batchJobSourceSettings, Option.empty)
+    }
+  }
+
+  /**
+   * Tests that the FileDataSourceReader correctly reads data from CSV, TSV, and TXT_CSV files.
+   *
+   * This test verifies that the reader can handle different file formats and produce the expected results.
+   * The test covers the following formats:
+   * 1. CSV
+   * 2. TSV
+   * 3. TXT_CSV (Text file with CSV-like format)
+   *
+   * The test uses the following source binding configurations:
+   * FileSystemSource(path = "patients.csv", fileFormat = None, options = Map("ignoreTrailingWhiteSpace" -> "true", "comment" -> "!"))
+   * FileSystemSource(path = "patients.tsv", fileFormat = None, options = Map("ignoreTrailingWhiteSpace" -> "true", "comment" -> "!"))
+   * FileSystemSource(path = "patients.txt", fileFormat = "txt-csv", options = Map("ignoreTrailingWhiteSpace" -> "true", "comment" -> "!"))
+   *
+   * The expected read result for all file formats:
+   * +---+------+-------------------+----------------+--------------+
+   * |pid|gender|          birthDate|deceasedDateTime|homePostalCode|
+   * +---+------+-------------------+----------------+--------------+
+   * | p1|  male|2000-05-10 00:00:00|            NULL|          NULL|
+   * | p2|  male|1985-05-08 00:00:00|      2017-03-10|        G02547|
+   * | p3|  male|1997-02-01 00:00:00|            NULL|          NULL|
+   * | p4|  male|1999-06-05 00:00:00|            NULL|        H10564|
+   * | p5|  male|1965-10-01 00:00:00|      2019-04-21|        G02547|
+   * | p6|female|1991-03-01 00:00:00|            NULL|          NULL|
+   * | p7|female|1972-10-25 00:00:00|            NULL|        V13135|
+   * | p8|female|2010-01-10 00:00:00|            NULL|        Z54564|
+   * | p9|female|1999-05-12 00:00:00|            NULL|          NULL|
+   * |p10|female|2003-11-01 00:00:00|            NULL|          NULL|
+   * +---+------+-------------------+----------------+--------------+
+   *
+   */
+  it should "correctly read from CSV, TSV, and TXT_CSV files" in {
+    // Folder containing the test files for this test
+    val folderPath = "/single-file-test"
+
+    // Expected values for validation
+    val expectedRowNumber = 10
+    val expectedColumns = Array("pid", "gender", "birthDate", "deceasedDateTime", "homePostalCode")
+    val expectedFirstRow = Row("p1", "male", new Timestamp(dateFormat.parse("2000-05-10").getTime), null, null)
+    val expectedLastRow = Row("p10", "female", new Timestamp(dateFormat.parse("2003-11-01").getTime), null, null)
+
+    // A sequence of file names and their corresponding formats to be tested
+    val sourceBindingConfigurations = Seq(
+      ("patients.csv", None),
+      ("patients.tsv", None),
+      ("patients.txt", Some(SourceFileFormats.TXT_CSV))
+    )
+
+    // Spark options to test if options are working
+    val sparkOptions = Map(
+      "ignoreTrailingWhiteSpace" -> "true",
+      "comment" -> "!"
+    )
+
+    // Loop through each source binding configuration to run the test
+    val mappingJobSourceSettings = FileSystemSourceSettings(name = "FileDataSourceReaderTest1", sourceUri = "test-uri", dataFolderPath = testDataFolderPath.concat(folderPath))
+    sourceBindingConfigurations.foreach { case (fileName, fileFormat) =>
+      // Read the data using the reader and the defined settings
+      val mappingSourceBinding = FileSystemSource(path = fileName, fileFormat = fileFormat, options = sparkOptions)
+      val result: DataFrame = fileDataSourceReader.read(mappingSourceBinding, mappingJobSourceSettings, Option.empty)
+
+      // Validate the result
+      result.count() shouldBe expectedRowNumber
+      result.columns shouldBe expectedColumns
+      result.first() shouldBe expectedFirstRow
+      result.collect().last shouldBe expectedLastRow
+    }
+  }
+
+  /**
+   * Tests that the FileDataSourceReader correctly reads multiple files from CSV and TXT_CSV folders.
+   *
+   * This test verifies that the reader can handle multiple files across different file formats
+   * and produce the expected results. The test covers reading from folders containing:
+   * 1. CSV files
+   * 2. TXT_CSV (Text files with CSV-like format)
+   *
+   * The test uses the following source binding configurations:
+   * FileSystemSource(path = "csv", fileFormat = Some(SourceFileFormats.CSV), options = Map("ignoreTrailingWhiteSpace" -> "true", "comment" -> "!"))
+   * FileSystemSource(path = "txt-csv", fileFormat = Some(SourceFileFormats.TXT_CSV), options = Map("ignoreTrailingWhiteSpace" -> "true", "comment" -> "!"))
+   *
+   * The expected read result for both folder formats:
+   * +---+------+-------------------+
+   * |pid|gender|          birthDate|
+   * +---+------+-------------------+
+   * | p1|  male|2000-05-10 00:00:00|
+   * | p2|  male|1985-05-08 00:00:00|
+   * | p3|  male|1997-02-01 00:00:00|
+   * | p4|  male|1999-06-05 00:00:00|
+   * | p5|  male|1965-10-01 00:00:00|
+   * | p6|female|1991-03-01 00:00:00|
+   * | p7|female|1972-10-25 00:00:00|
+   * | p8|female|2010-01-10 00:00:00|
+   * | p9|female|1999-05-12 00:00:00|
+   * +---+------+-------------------+
+   * (Rows may appear in different groupings, with each file contributing a distinct set of 3 rows.)
+   *
+   */
+  it should "correctly read multiple files from CSV, TXT_CSV folders" in {
+    // Folder including the test folders belong to this test
+    val folderPath = "/folder-test"
+
+    // Expected values for validation
+    val expectedRowNumber = 9
+    val expectedColumns = Array("pid", "gender", "birthDate")
+    val expectedRows = Set( // One row from each file
+      Row("p1", "male", new Timestamp(dateFormat.parse("2000-05-10").getTime)),
+      Row("p4", "male", new Timestamp(dateFormat.parse("1999-06-05").getTime)),
+      Row("p7", "female", new Timestamp(dateFormat.parse("1972-10-25").getTime))
+    )
+
+    // A sequence of folder names and file format of the files to be selected
+    val sourceBindingConfigurations = Seq(
+      ("csv", Some(SourceFileFormats.CSV)),
+      ("txt-csv", Some(SourceFileFormats.TXT_CSV))
+    )
+    // Spark options to test if options are working
+    val sparkOptions = Map(
+      "ignoreTrailingWhiteSpace" -> "true",
+      "comment" -> "!"
+    )
+
+    // Loop through each source binding configuration to run the test
+    val mappingJobSourceSettings = FileSystemSourceSettings(name = "FileDataSourceReaderTest2", sourceUri = "test-uri", dataFolderPath = testDataFolderPath.concat(folderPath))
+    sourceBindingConfigurations.foreach { case (folderName, fileFormat) =>
+      // Read the data using the reader and the defined settings
+      val mappingSourceBinding = FileSystemSource(path = folderName, fileFormat = fileFormat, options = sparkOptions)
+      val result: DataFrame = fileDataSourceReader.read(mappingSourceBinding, mappingJobSourceSettings, Option.empty)
+
+      // Validate the result
+      result.count() shouldBe expectedRowNumber
+      result.columns shouldBe expectedColumns
+      result.collect().toSet should contain allElementsOf expectedRows
+    }
+  }
+
+  /**
+   * Tests that the FileDataSourceReader correctly reads data from JSON and TXT_NDJSON files.
+   *
+   * This test verifies that the reader can handle different file formats and produce the expected results.
+   * The test covers the following formats:
+   * 1. JSON
+   * 2. TXT_NDJSON (Text file with newline-delimited JSON format)
+   *
+   * The test uses the following source binding configurations:
+   * FileSystemSource(path = "patients.json", fileFormat = None, options = Map("allowComments" -> "true"))
+   * FileSystemSource(path = "patients-ndjson.txt", fileFormat = Some(SourceFileFormats.TXT_NDJSON), options = Map("allowComments" -> "true"))
+   *
+   * The expected read result is for both file formats:
+   * +------------+----------------+------+--------------+---+
+   * |  birthDate |deceasedDateTime|gender|homePostalCode|pid|
+   * +------------+----------------+------+--------------+---+
+   * |2000-05-10  |            NULL|  male|          NULL| p1|
+   * |1985-05-08  |      2017-03-10|  male|        G02547| p2|
+   * |1997-02     |            NULL|  male|          NULL| p3|
+   * |1999-06-05  |            NULL|  male|        H10564| p4|
+   * |1965-10-01  |      2019-04-21|  male|        G02547| p5|
+   * |1991-03     |            NULL|female|          NULL| p6|
+   * |1972-10-25  |            NULL|female|        V13135| p7|
+   * |2010-01-10  |            NULL|female|        Z54564| p8|
+   * |1999-05-12  |            NULL|female|          NULL| p9|
+   * |2003-11     |            NULL|female|          NULL|p10|
+   * +------------+----------------+------+--------------+---+
+   *
+   */
+  it should "correctly read from JSON and TXT-NDJSON files" in {
+    // Folder including the test files
+    val folderPath = "/single-file-test"
+
+    // Define the expected values for validation (Note: Spark reads json columns in alphabetic order)
+    val expectedRowNumber = 10
+    val expectedColumns = Array("birthDate", "deceasedDateTime", "gender", "homePostalCode", "pid")
+    val expectedFirstRow = Row("2000-05-10", null, "male", null, "p1")
+    val expectedLastRow = Row("2003-11", null, "female", null, "p10")
+
+    // Define the file names and their corresponding formats to be tested
+    val sourceBindingConfigurations = Seq(
+      ("patients.json", None),
+      ("patients-ndjson.txt", Some(SourceFileFormats.TXT_NDJSON))
+    )
+    // Spark options to test if options are working
+    val sparkOptions = Map(
+      "allowComments" -> "true",
+    )
+
+    // Loop through each source binding configuration to run the test
+    val mappingJobSourceSettings = FileSystemSourceSettings(name = s"FileDataSourceReaderTest3", sourceUri = "test-uri", dataFolderPath = testDataFolderPath.concat(folderPath))
+    sourceBindingConfigurations.foreach { case (fileName, fileFormat) =>
+      // Define the source binding and settings for reading the file
+      val mappingSourceBinding = FileSystemSource(path = fileName, fileFormat = fileFormat, options = sparkOptions)
+      // Read the data from the specified file
+      val result: DataFrame = fileDataSourceReader.read(mappingSourceBinding, mappingJobSourceSettings, Option.empty)
+
+      // Validate the result
+      result.count() shouldBe expectedRowNumber
+      result.columns shouldBe expectedColumns
+      result.first() shouldBe expectedFirstRow
+      result.collect().last shouldBe expectedLastRow
+    }
+  }
+
+  /**
+   * Tests that the FileDataSourceReader correctly reads multiple files from JSON and NDJSON folders.
+   *
+   * This test verifies that the reader can handle multiple files across different file formats
+   * and produce the expected results. The test covers reading from folders containing:
+   * 1. JSON (standard JSON files in the "json" folder)
+   * 2. TXT_NDJSON (newline-delimited JSON files in the "txt-ndjson" folder)
+   *
+   * The test uses the following source binding configurations:
+   * FileSystemSource(path = "json", fileFormat = Some(SourceFileFormats.JSON))
+   * FileSystemSource(path = "txt-ndjson", fileFormat = Some(SourceFileFormats.TXT_NDJSON))
+   *
+   * The expected read result for both formats:
+   * +----------+------+---+
+   * | birthDate|gender|pid|
+   * +----------+------+---+
+   * |2000-05-10|  male| p1|
+   * |1985-05-08|  male| p2|
+   * |   1997-02|  male| p3|
+   * |1999-06-05|  male| p4|
+   * |1965-10-01|  male| p5|
+   * |   1991-03|female| p6|
+   * |1972-10-25|female| p7|
+   * |2010-01-10|female| p8|
+   * |1999-05-12|female| p9|
+   * +----------+------+---+
+   * (Rows may appear in different groupings, with each file contributing a distinct set of 3 rows.)
+   */
+  it should "correctly read multiple files from JSON and NDJSON folders" in {
+    // Folder containing the test folders for JSON and NDJSON files
+    val folderPath = "/folder-test"
+
+    // Expected values for validation
+    val expectedRowNumber = 9
+    val expectedColumns = Array("birthDate", "gender", "pid")
+    // Expected rows for validation, one row from each file
+    val expectedRows = Set(
+      Row("2000-05-10", "male", "p1"),
+      Row("1999-06-05", "male", "p4"),
+      Row("1972-10-25", "female", "p7")
+    )
+
+    // A sequence of folder names and file format of the files to be selected
+    val sourceBindingConfigurations = Seq(
+      ("json", Some(SourceFileFormats.JSON)),
+      ("txt-ndjson", Some(SourceFileFormats.TXT_NDJSON))
+    )
+
+    // Loop through each source binding configuration to run the test
+    val mappingJobSourceSettings = FileSystemSourceSettings(name = "FileDataSourceReaderTest4", sourceUri = "test-uri", dataFolderPath = testDataFolderPath.concat(folderPath))
+    sourceBindingConfigurations.foreach { case (folderName, fileFormat) =>
+      // Read the data using the reader and the defined settings
+      val mappingSourceBinding = FileSystemSource(path = folderName, fileFormat = fileFormat)
+      val result: DataFrame = fileDataSourceReader.read(mappingSourceBinding, mappingJobSourceSettings, Option.empty)
+
+      // Validate the result
+      result.count() shouldBe expectedRowNumber
+      result.columns shouldBe expectedColumns
+      result.collect().toSet should contain allElementsOf expectedRows
+    }
+  }
+
+  /**
+   * Tests that the FileDataSourceReader correctly reads data from a Parquet file.
+   *
+   * This test verifies that the reader can handle Parquet file format and produce the expected results.
+   * The test covers the following format:
+   * 1. PARQUET
+   *
+   * The test uses the following source binding configuration:
+   * FileSystemSource(path = "patients.parquet", fileFormat = Some(SourceFileFormats.PARQUET))
+   *
+   * The expected read result for the Parquet file is:
+   * +---+------+----------+-------------------+--------------+
+   * |pid|gender| birthDate|deceasedDateTime  |homePostalCode|
+   * +---+------+----------+-------------------+--------------+
+   * | p1|  male|2000-05-10|               null|          null|
+   * | p2|  male|1985-05-08|2017-03-10         |        G02547|
+   * | p3|  male|1997-02-01|               null|          null|
+   * | p4|  male|1999-06-05|               null|        H10564|
+   * | p5|  male|1965-10-01|2019-04-21         |        G02547|
+   * | p6|female|1991-03-01|               null|          null|
+   * | p7|female|1972-10-25|               null|        V13135|
+   * | p8|female|2010-01-10|               null|        Z54564|
+   * | p9|female|1999-05-12|               null|          null|
+   * |p10|female|2003-11   |               null|          null|
+   * +---+------+----------+-------------------+--------------+
+   *
+   */
+  it should "correctly read from Parquet file" in {
+    // Folder including the test files
+    val folderPath = "/single-file-test"
+
+    // Define the expected values for validation
+    val expectedRowNumber = 10
+    val expectedColumns = Array("pid", "gender", "birthDate", "deceasedDateTime", "homePostalCode")
+    val expectedFirstRow = Row("p1", "male", "2000-05-10", null, null)
+    val expectedLastRow = Row("p10", "female", "2003-11", null, null)
+
+    // Define the file name and its corresponding format for Parquet
+    val sourceBindingConfigurations = Seq(
+      ("patients.parquet", Some(SourceFileFormats.PARQUET))
+    )
+
+    // Loop through each source binding configuration to run the test
+    val mappingJobSourceSettings = FileSystemSourceSettings(name = s"FileDataSourceReaderTest5", sourceUri = "test-uri", dataFolderPath = testDataFolderPath.concat(folderPath))
+    sourceBindingConfigurations.foreach { case (fileName, fileFormat) =>
+      // Define the source binding and settings for reading the file
+      val mappingSourceBinding = FileSystemSource(path = fileName, fileFormat = fileFormat)
+      // Read the data from the specified file
+      val result: DataFrame = fileDataSourceReader.read(mappingSourceBinding, mappingJobSourceSettings, Option.empty)
+
+      // Validate the result
+      result.count() shouldBe expectedRowNumber
+      result.columns shouldBe expectedColumns
+      result.first() shouldBe expectedFirstRow
+      result.collect().last shouldBe expectedLastRow
+    }
+  }
+
+  /**
+   * Tests that the FileDataSourceReader correctly reads data from Parquet files.
+   *
+   * This test verifies that the reader can handle Parquet file format and produce the expected results.
+   * The test covers the following format:
+   * 1. PARQUET
+   *
+   * The test uses the following source binding configuration:
+   * FileSystemSource(path = "parquet", fileFormat = Some(SourceFileFormats.PARQUET))
+   *
+   * The expected read result for the Parquet files is:
+   * +---+------+----------+
+   * |pid|gender| birthDate|
+   * +---+------+----------+
+   * | p1|  male|2000-05-10|
+   * | p2|  male|1985-05-08|
+   * | p3|  male|1997-02-01|
+   * | p4|  male|1999-06-05|
+   * | p5|  male|1965-10-01|
+   * | p6|female|1991-03-01|
+   * | p7|female|1972-10-25|
+   * | p8|female|2010-01-10|
+   * | p9|female|1999-05-12|
+   * +---+------+----------+
+   * (Rows may appear in different order, grouped by each file.)
+   */
+  it should "correctly read multiple files from Parquet folders" in {
+    // Folder including the test folders belonging to this test
+    val folderPath = "/folder-test"
+
+    // Expected values for validation
+    val expectedRowNumber = 9
+    val expectedColumns = Array("pid", "gender", "birthDate")
+    // Expected rows for validation, one row from each file
+    val expectedRows = Set(
+      Row("p1", "male", "2000-05-10"),
+      Row("p4", "male", "1999-06-05"),
+      Row("p7", "female", "1972-10-25")
+    )
+
+    // A sequence of folder names and file format of the files to be selected
+    val sourceBindingConfigurations = Seq(
+      ("parquet", Some(SourceFileFormats.PARQUET))
+    )
+
+    // Loop through each source binding configuration to run the test
+    val mappingJobSourceSettings = FileSystemSourceSettings(name = "FileDataSourceReaderTest6", sourceUri = "test-uri", dataFolderPath = testDataFolderPath.concat(folderPath))
+    sourceBindingConfigurations.foreach { case (folderName, fileFormat) =>
+      // Read the data using the reader and the defined settings
+      val mappingSourceBinding = FileSystemSource(path = folderName, fileFormat = fileFormat)
+      val result: DataFrame = fileDataSourceReader.read(mappingSourceBinding, mappingJobSourceSettings, Option.empty)
+
+      // Validate the result
+      result.count() shouldBe expectedRowNumber
+      result.columns shouldBe expectedColumns
+      result.collect().toSet should contain allElementsOf expectedRows
+    }
+  }
+}
\ No newline at end of file