From 4fa92bd983a789cae3c45bb8604f7e233f623c3a Mon Sep 17 00:00:00 2001
From: Yash Mittal <mittal.yash12@hotmail.com>
Date: Sat, 9 Mar 2024 21:21:02 +0530
Subject: [PATCH] Tutorial 21 (#28)

* Prisma: Initial Wiring

* Chores: Remove redundant code

* Chores: Readme for Prisma Adapter; GitHub CI Refactoring

* Chores: Remove random file that got added

* Demo: Fix server URL

* Fix Readme.md

* Fix main readme

* Fix CLI Tests

* Fix CI

* Fix CI

* Fix CI

* Fix CI

* Tutorial-21

* docs: add tutorial for step 21

* fix: generate command

* fix: update workflows to not use node v14

---------

Co-authored-by: Chakshu Gautam <chaks.gautam@gmail.com>
---
 .github/workflows/db-tests.yml    |   3 +-
 .gitignore                        |   4 +-
 docs/tutorials/21.md              | 173 ++++++++++++++++++++
 package-lock.json                 | 150 +++++++++++++++--
 package.json                      |   3 +
 src/csvStorage.js                 |  21 ++-
 src/queryExecuter.js              |  29 +++-
 src/queryParser.js                |  47 +++++-
 tests/appoximateLargeFile.test.js |  63 ++++++++
 tests/queryExecuter.test.js       |  56 +++++++
 tests/queryParser.test.js         | 257 +++++++++++++++++++++++++++---
 util/generateLargeFile.js         |  35 ++++
 12 files changed, 798 insertions(+), 43 deletions(-)
 create mode 100644 docs/tutorials/21.md
 create mode 100644 tests/appoximateLargeFile.test.js
 create mode 100644 util/generateLargeFile.js

diff --git a/.github/workflows/db-tests.yml b/.github/workflows/db-tests.yml
index c0cd8a3..2540e7c 100644
--- a/.github/workflows/db-tests.yml
+++ b/.github/workflows/db-tests.yml
@@ -13,7 +13,7 @@ jobs:
 
     strategy:
       matrix:
-        node-version: [14.x, 16.x, 18.x]
+        node-version: [16.x, 18.x]
 
     steps:
     - uses: actions/checkout@v3
@@ -22,4 +22,5 @@ jobs:
       with:
         node-version: ${{ matrix.node-version }}
     - run: npm i
+    - run: npm run generate
     - run: npm test
diff --git a/.gitignore b/.gitignore
index 656790b..93ee4f1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -130,4 +130,6 @@ dist
 .pnp.*
 
 
-**/.DS_Store
\ No newline at end of file
+**/.DS_Store
+
+student_large.csv
\ No newline at end of file
diff --git a/docs/tutorials/21.md b/docs/tutorials/21.md
new file mode 100644
index 0000000..d662cb6
--- /dev/null
+++ b/docs/tutorials/21.md
@@ -0,0 +1,173 @@
+## Step 21: Add Approximate Counting using `HyperLogLog`
+
+### 21.1 First things first, create a utility to generate large files
+
+Since the effect of `HyperLogLog` is best seen on large files, we need to create a utility function which generates large files, let's say with `10_000_000` data points. To do this create a file named `generateLargeFile.js` in a `utils` folder and add the following logic to it.
+
+```js
+const fs = require('fs');
+const { faker, da } = require('@faker-js/faker');
+const { parse } = require('json2csv');
+
+async function generateLargeCSV(filename) {
+    let data = [];
+    for (let i = 1; i <= 10_000_000; i++) {
+        const record = {
+            id: i,
+            name: faker.person.firstName(),
+            age: faker.number.int({ min: 18, max: 100 }),
+        };
+        data.push(record);
+
+        let rows;
+        if (i % 500_000 === 0) {
+            console.log(`Generated ${i} records`);
+            if (!fs.existsSync(filename)) {
+                rows = parse(data, { header: true });
+            } else {
+                // Rows without headers.
+                rows = parse(data, { header: false });
+            }
+            fs.appendFileSync(filename, rows);
+            data = [];
+        }
+
+    }
+    // Append file function can create new file too.
+
+    // Always add new line if file already exists.
+    fs.appendFileSync(filename, "\r\n");
+}
+
+generateLargeCSV('student_large.csv')
+```
+
+### 21.2 Implement CSV reader for `HyperLogLog`
+
+Since `HyperLogLog` is a data structure which keeps data stored in a hashed format, we implement a separete CSV reader for it. Create a function named `readCSVforHLL` in your `csvStorage.js`.
+Sample logic for it can be found here:
+```js
+function readCSVForHLL(filePath, bitSampleSize = 12, digestSize = 128) {
+    const results = [];
+    var h = hll({ bitSampleSize: bitSampleSize, digestSize: digestSize });
+
+    return new Promise((resolve, reject) => {
+        fs.createReadStream(filePath)
+            .pipe(csv())
+            .on('data', (data) => h.insert(JSON.stringify(data)))
+            .on('end', () => {
+                resolve(h);
+            })
+            .on('error', (error) => {
+                reject(error);
+            });
+    });
+}
+```
+
+### 21.3 Update the `queryParser` implementation to identify `COUNT` and `APPROXIMATE_COUNT` as valid tokens
+
+Since our SQL queries will now be accepting the `COUNT` and `APPROXIMATE_COUNT` tokens as valid tokens, we need to update the logic of our parser to identify and process them accordingly. Update your `queryParser.js` with the required logic (regex) to identify that.
+
+
+### 21.4 Update the `executeSELECTQuery` function to add support for `COUNT` and `APPROXIMATE_COUNT` 
+
+Update the existing logic in the `executeSELECTQuery` function to identify and process the `COUNT` and `APPROXIMATE_COUNT` commands in your SQL query. 
+Some snippets that might be helpful are:
+```js
+// getting approx counts
+if (isApproximateCount && fields.length === 1 && fields[0] === 'COUNT(*)' && whereClauses.length === 0) {
+  let hll = await readCSVForHLL(`${table}.csv`);
+  return [{ 'APPROXIMATE_COUNT(*)': hll.estimate() }];
+}
+
+ // Distinct inside count - example "SELECT COUNT (DISTINCT student.name) FROM student"
+            if (isCountDistinct) {
+
+                if (isApproximateCount) {
+                    var h = hll({ bitSampleSize: 12, digestSize: 128 });
+                    orderedResults.forEach(row => h.insert(distinctFields.map(field => row[field]).join('|')));
+                    return [{ [`APPROXIMATE_${fields[0]}`]: h.estimate() }];
+                }
+                else {
+                    let distinctResults = [...new Map(orderedResults.map(item => [distinctFields.map(field => item[field]).join('|'), item])).values()];
+                    return [{ [fields[0]]: distinctResults.length }];
+                }
+            }
+```
+
+
+### 21.5 Write a test case for approximate count
+
+Since we are following `TDD` in this tutorial, we are going to be writing a test case to test our implementation now.
+Create a file named `approximateLargeFile.test.js` in your `tests` folder and add the following test cases:
+
+```js
+const fs = require('fs');
+const { executeSELECTQuery } = require('../src/queryExecuter');
+const jestConsole = console;
+
+beforeEach(() => {
+    global.console = require('console');
+});
+
+afterEach(() => {
+    global.console = jestConsole;
+});
+
+test('Large File Count(*) - Approximate and Exact', async () => {
+    // Test Exact Count
+
+    const startMemoryUsageExact = process.memoryUsage().heapUsed;
+    const startTimeExact = performance.now();
+
+    const queryExact = "SELECT COUNT(*) FROM student_large";
+    const resultExact = await executeSELECTQuery(queryExact);
+    const exactResult = resultExact[0]['COUNT(*)'];
+
+    const endTimeExact = performance.now();
+    const endMemoryUsageExact = process.memoryUsage().heapUsed;
+
+    console.log(`Execution Time for Exact Count: ${(endTimeExact - startTimeExact).toFixed(2)} ms`);
+    console.log(`Start Memory for Exact Count: ${startMemoryUsageExact / 1024 / 1024} MB`);
+    console.log(`End Memory for Exact Count: ${endMemoryUsageExact / 1024 / 1024} MB`);
+    console.log(`Memory Used for Exact Count: ${(endMemoryUsageExact - startMemoryUsageExact) / 1024 / 1024} MB`);
+
+    const startMemoryUsage = process.memoryUsage().heapUsed;
+    const startTime = performance.now();
+
+    const query = "SELECT APPROXIMATE_COUNT(*) FROM student_large";
+    const result = await executeSELECTQuery(query);
+
+    // Expect the approximate count to be within 5% of the actual count
+    expect(result[0]['APPROXIMATE_COUNT(*)']).toBeGreaterThan(exactResult - 0.05 * exactResult);
+    expect(result[0]['APPROXIMATE_COUNT(*)']).toBeLessThan(exactResult + 0.05 * exactResult);
+
+    const endTime = performance.now();
+    const endMemoryUsage = process.memoryUsage().heapUsed;
+
+    console.log(`Execution Time for Approximate Count: ${(endTime - startTime).toFixed(2)} ms`);
+    console.log(`Start Memory: ${startMemoryUsage / 1024 / 1024} MB`);
+    console.log(`End Memory: ${endMemoryUsage / 1024 / 1024} MB`);
+    console.log(`Memory Used for Approximate Count: ${(endMemoryUsage - startMemoryUsage) / 1024 / 1024} MB`);
+
+}, 120000);
+
+test('Execute SQL Query with COUNT with DISTINCT on a column', async () => {
+    const queryExact = "SELECT COUNT(DISTINCT (name, age)) FROM student_large";
+    const resultExact = await executeSELECTQuery(queryExact);
+    console.log({ resultExact });
+    const exactResult = resultExact[0]['COUNT(DISTINCT (name, age))'];
+
+    const query = "SELECT APPROXIMATE_COUNT(DISTINCT (name, age)) FROM student_large";
+    const result = await executeSELECTQuery(query);
+
+    // Expect the approximate count to be within 2% of the actual count
+    expect(result[0]['APPROXIMATE_COUNT(DISTINCT (name, age))']).toBeGreaterThan(exactResult - 0.05 * exactResult);
+    expect(result[0]['APPROXIMATE_COUNT(DISTINCT (name, age))']).toBeLessThan(exactResult + 0.05 * exactResult);
+}, 120000);
+```
+
+### 21.6 Update the tests for other files to test for the updates you made in other parts of the implementation
+
+Since we have made changes to the other parts of the implementation such as the `csvStorage.js`, `queryParser.js` and `queryExecutor.js` we need to update the tests for those files to test for the functionality.
\ No newline at end of file
diff --git a/package-lock.json b/package-lock.json
index 3afaec3..9b5dd73 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -10,6 +10,7 @@
       "license": "ISC",
       "dependencies": {
         "csv-parser": "^3.0.0",
+        "hll": "^2.0.0",
         "json2csv": "^6.0.0-alpha.2",
         "xterm": "^5.3.0"
       },
@@ -17,6 +18,7 @@
         "stylusdb-cli": "node ./src/cli.js"
       },
       "devDependencies": {
+        "@faker-js/faker": "^8.4.1",
         "jest": "^29.7.0"
       }
     },
@@ -655,6 +657,22 @@
       "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==",
       "dev": true
     },
+    "node_modules/@faker-js/faker": {
+      "version": "8.4.1",
+      "resolved": "https://registry.npmjs.org/@faker-js/faker/-/faker-8.4.1.tgz",
+      "integrity": "sha512-XQ3cU+Q8Uqmrbf2e0cIC/QN43sTBSC8KF12u29Mb47tWrt2hAgBXSgpZMj4Ao8Uk0iJcU99QsOCaIL8934obCg==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/fakerjs"
+        }
+      ],
+      "engines": {
+        "node": "^14.17.0 || ^16.13.0 || >=18.0.0",
+        "npm": ">=6.14.13"
+      }
+    },
     "node_modules/@istanbuljs/load-nyc-config": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz",
@@ -1200,6 +1218,11 @@
         "sprintf-js": "~1.0.2"
       }
     },
+    "node_modules/async": {
+      "version": "3.2.5",
+      "resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz",
+      "integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg=="
+    },
     "node_modules/babel-jest": {
       "version": "29.7.0",
       "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.7.0.tgz",
@@ -1310,14 +1333,12 @@
     "node_modules/balanced-match": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-      "dev": true
+      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
     },
     "node_modules/brace-expansion": {
       "version": "1.1.11",
       "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
       "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
-      "dev": true,
       "dependencies": {
         "balanced-match": "^1.0.0",
         "concat-map": "0.0.1"
@@ -1525,8 +1546,7 @@
     "node_modules/concat-map": {
       "version": "0.0.1",
       "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
-      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
-      "dev": true
+      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg=="
     },
     "node_modules/convert-source-map": {
       "version": "2.0.0",
@@ -1796,8 +1816,7 @@
     "node_modules/fs.realpath": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
-      "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
-      "dev": true
+      "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw=="
     },
     "node_modules/fsevents": {
       "version": "2.3.3",
@@ -1865,7 +1884,6 @@
       "version": "7.2.3",
       "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
       "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
-      "dev": true,
       "dependencies": {
         "fs.realpath": "^1.0.0",
         "inflight": "^1.0.4",
@@ -1917,6 +1935,17 @@
         "node": ">= 0.4"
       }
     },
+    "node_modules/hll": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/hll/-/hll-2.0.0.tgz",
+      "integrity": "sha512-PV92xkmczdzgSDIf8ii7zInuvw40X+XFovrtSOaVx7TfuKnb4EWvgjLHTcCVm5FGXMDUNe2v/4g0vyMtQESJdg==",
+      "dependencies": {
+        "murmurhash3": "^0.5.0"
+      },
+      "engines": {
+        "node": ">=10 <15"
+      }
+    },
     "node_modules/html-escaper": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz",
@@ -1964,7 +1993,6 @@
       "version": "1.0.6",
       "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
       "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
-      "dev": true,
       "dependencies": {
         "once": "^1.3.0",
         "wrappy": "1"
@@ -1973,8 +2001,7 @@
     "node_modules/inherits": {
       "version": "2.0.4",
       "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
-      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
-      "dev": true
+      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
     },
     "node_modules/is-arrayish": {
       "version": "0.2.1",
@@ -2726,6 +2753,17 @@
         "url": "https://github.com/chalk/supports-color?sponsor=1"
       }
     },
+    "node_modules/js-beautify-node": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/js-beautify-node/-/js-beautify-node-1.0.0.tgz",
+      "integrity": "sha512-P5t/LUeK2PUIEzhJ3ehjHiMGQjstXgCBpX4HJkG0Igq6XwnSI0c4HUGsJep34pZK5BEpkCyIzOCcthV/ucSitg==",
+      "engines": [
+        "node >= 0.3.0"
+      ],
+      "bin": {
+        "jsbeautify": "beautify-node.js"
+      }
+    },
     "node_modules/js-tokens": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
@@ -2931,7 +2969,6 @@
       "version": "3.1.2",
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
       "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
-      "dev": true,
       "dependencies": {
         "brace-expansion": "^1.1.7"
       },
@@ -2947,12 +2984,41 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/mkdirp": {
+      "version": "0.5.6",
+      "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz",
+      "integrity": "sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw==",
+      "dependencies": {
+        "minimist": "^1.2.6"
+      },
+      "bin": {
+        "mkdirp": "bin/cmd.js"
+      }
+    },
     "node_modules/ms": {
       "version": "2.1.2",
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
       "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==",
       "dev": true
     },
+    "node_modules/murmurhash3": {
+      "version": "0.5.0",
+      "resolved": "https://registry.npmjs.org/murmurhash3/-/murmurhash3-0.5.0.tgz",
+      "integrity": "sha512-bgvgIBctpwslE9kIurdGH9knDDyYCZca5upcy9MLzsc16afyBdwkfhOJmxN607exbyesZgTQEtAcqs3euEsFlA==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "nan": "^2.14.2",
+        "shipitjs": "^0.3.2"
+      },
+      "engines": {
+        "node": ">=0.12.0"
+      }
+    },
+    "node_modules/nan": {
+      "version": "2.19.0",
+      "resolved": "https://registry.npmjs.org/nan/-/nan-2.19.0.tgz",
+      "integrity": "sha512-nO1xXxfh/RWNxfd/XPfbIfFk5vgLsAxUR9y5O0cHMJu/AW9U95JLXqthYHjEp+8gQ5p96K9jUp8nbVOxCdRbtw=="
+    },
     "node_modules/natural-compare": {
       "version": "1.4.0",
       "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz",
@@ -2996,7 +3062,6 @@
       "version": "1.4.0",
       "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
       "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
-      "dev": true,
       "dependencies": {
         "wrappy": "1"
       }
@@ -3098,7 +3163,6 @@
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
       "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
-      "dev": true,
       "engines": {
         "node": ">=0.10.0"
       }
@@ -3274,6 +3338,17 @@
         "node": ">=10"
       }
     },
+    "node_modules/rimraf": {
+      "version": "2.6.3",
+      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.3.tgz",
+      "integrity": "sha512-mwqeW5XsA2qAejG46gYdENaxXjx9onRNCfn7L0duuP4hCuTIi/QO7PDK07KJfp1d+izWPrzEJDcSqBa0OZQriA==",
+      "dependencies": {
+        "glob": "^7.1.3"
+      },
+      "bin": {
+        "rimraf": "bin.js"
+      }
+    },
     "node_modules/semver": {
       "version": "6.3.1",
       "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
@@ -3304,6 +3379,33 @@
         "node": ">=8"
       }
     },
+    "node_modules/shipitjs": {
+      "version": "0.3.2",
+      "resolved": "https://registry.npmjs.org/shipitjs/-/shipitjs-0.3.2.tgz",
+      "integrity": "sha512-uLtgZP2PBzxwoCDfEhARWpuQewMwRLGhkrD8O/rqXHdyYjaV8LY0F4aMi5Lr3l4XWm5YiHINsCMhzD0scu7GzQ==",
+      "dependencies": {
+        "async": "",
+        "commander": ">=0.6.1",
+        "js-beautify-node": "",
+        "semver": "^5.5.1",
+        "temp": "",
+        "underscore": ""
+      },
+      "bin": {
+        "shipitjs": "bin/shipitjs"
+      },
+      "engines": {
+        "node": ">=0.4.x"
+      }
+    },
+    "node_modules/shipitjs/node_modules/semver": {
+      "version": "5.7.2",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.2.tgz",
+      "integrity": "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==",
+      "bin": {
+        "semver": "bin/semver"
+      }
+    },
     "node_modules/signal-exit": {
       "version": "3.0.7",
       "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz",
@@ -3455,6 +3557,18 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/temp": {
+      "version": "0.9.4",
+      "resolved": "https://registry.npmjs.org/temp/-/temp-0.9.4.tgz",
+      "integrity": "sha512-yYrrsWnrXMcdsnu/7YMYAofM1ktpL5By7vZhf15CrXijWWrEYZks5AXBudalfSWJLlnen/QUJUB5aoB0kqZUGA==",
+      "dependencies": {
+        "mkdirp": "^0.5.1",
+        "rimraf": "~2.6.2"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
     "node_modules/test-exclude": {
       "version": "6.0.0",
       "resolved": "https://registry.npmjs.org/test-exclude/-/test-exclude-6.0.0.tgz",
@@ -3517,6 +3631,11 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/underscore": {
+      "version": "1.13.6",
+      "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.6.tgz",
+      "integrity": "sha512-+A5Sja4HP1M08MaXya7p5LvjuM7K6q/2EaC0+iovj/wOcMsTzMvDFbasi/oSapiwOlt252IqsKqPjCl7huKS0A=="
+    },
     "node_modules/undici-types": {
       "version": "5.26.5",
       "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
@@ -3611,8 +3730,7 @@
     "node_modules/wrappy": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
-      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
-      "dev": true
+      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="
     },
     "node_modules/write-file-atomic": {
       "version": "4.0.2",
diff --git a/package.json b/package.json
index 4eb4c53..ba8cdcc 100644
--- a/package.json
+++ b/package.json
@@ -7,6 +7,7 @@
     "doc": "docs"
   },
   "scripts": {
+    "generate": "node ./util/generateLargeFile.js",
     "test": "jest",
     "test-verbose": "jest --verbose",
     "server": "node ./src/server.js"
@@ -18,10 +19,12 @@
   "author": "Chakshu Gautam",
   "license": "ISC",
   "devDependencies": {
+    "@faker-js/faker": "^8.4.1",
     "jest": "^29.7.0"
   },
   "dependencies": {
     "csv-parser": "^3.0.0",
+    "hll": "^2.0.0",
     "json2csv": "^6.0.0-alpha.2",
     "xterm": "^5.3.0"
   }
diff --git a/src/csvStorage.js b/src/csvStorage.js
index 4729599..c4c33b9 100644
--- a/src/csvStorage.js
+++ b/src/csvStorage.js
@@ -1,9 +1,11 @@
 const fs = require('fs');
 const csv = require('csv-parser');
 const { parse } = require('json2csv');
+const hll = require('hll');
 
 function readCSV(filePath) {
     const results = [];
+    var h = hll();
 
     return new Promise((resolve, reject) => {
         fs.createReadStream(filePath)
@@ -18,9 +20,26 @@ function readCSV(filePath) {
     });
 }
 
+function readCSVForHLL(filePath, bitSampleSize = 12, digestSize = 128) {
+    const results = [];
+    var h = hll({ bitSampleSize: bitSampleSize, digestSize: digestSize });
+
+    return new Promise((resolve, reject) => {
+        fs.createReadStream(filePath)
+            .pipe(csv())
+            .on('data', (data) => h.insert(JSON.stringify(data)))
+            .on('end', () => {
+                resolve(h);
+            })
+            .on('error', (error) => {
+                reject(error);
+            });
+    });
+}
+
 async function writeCSV(filename, data) {
     const csv = parse(data);
     fs.writeFileSync(filename, csv);
 }
 
-module.exports = { readCSV, writeCSV };
\ No newline at end of file
+module.exports = { readCSV, readCSVForHLL, writeCSV };
\ No newline at end of file
diff --git a/src/queryExecuter.js b/src/queryExecuter.js
index ce210a3..863cd1a 100644
--- a/src/queryExecuter.js
+++ b/src/queryExecuter.js
@@ -1,5 +1,7 @@
 const { parseSelectQuery, parseInsertQuery, parseDeleteQuery } = require('./queryParser.js');
-const { readCSV, writeCSV } = require('./csvStorage.js');
+const { readCSV, readCSVForHLL, writeCSV } = require('./csvStorage.js');
+const hll = require('hll');
+
 
 function performInnerJoin(data, joinData, joinCondition, fields, table) {
     return data.flatMap(mainRow => {
@@ -203,8 +205,14 @@ function applyGroupBy(data, groupByFields, aggregateFunctions) {
 
 async function executeSELECTQuery(query) {
     try {
+        const { fields, table, whereClauses, joinType, joinTable, joinCondition, groupByFields, hasAggregateWithoutGroupBy, isApproximateCount, orderByFields, limit, isDistinct, distinctFields, isCountDistinct } = parseSelectQuery(query);
+
+
+        if (isApproximateCount && fields.length === 1 && fields[0] === 'COUNT(*)' && whereClauses.length === 0) {
+            let hll = await readCSVForHLL(`${table}.csv`);
+            return [{ 'APPROXIMATE_COUNT(*)': hll.estimate() }];
+        }
 
-        const { fields, table, whereClauses, joinType, joinTable, joinCondition, groupByFields, hasAggregateWithoutGroupBy, orderByFields, limit, isDistinct } = parseSelectQuery(query);
         let data = await readCSV(`${table}.csv`);
 
         // Perform INNER JOIN if specified
@@ -229,6 +237,7 @@ async function executeSELECTQuery(query) {
             ? data.filter(row => whereClauses.every(clause => evaluateCondition(row, clause)))
             : data;
 
+
         let groupResults = filteredData;
         if (hasAggregateWithoutGroupBy) {
             // Special handling for queries like 'SELECT COUNT(*) FROM table'
@@ -293,6 +302,20 @@ async function executeSELECTQuery(query) {
                 });
             }
 
+            // Distinct inside count - example "SELECT COUNT (DISTINCT student.name) FROM student"
+            if (isCountDistinct) {
+
+                if (isApproximateCount) {
+                    var h = hll({ bitSampleSize: 12, digestSize: 128 });
+                    orderedResults.forEach(row => h.insert(distinctFields.map(field => row[field]).join('|')));
+                    return [{ [`APPROXIMATE_${fields[0]}`]: h.estimate() }];
+                }
+                else {
+                    let distinctResults = [...new Map(orderedResults.map(item => [distinctFields.map(field => item[field]).join('|'), item])).values()];
+                    return [{ [fields[0]]: distinctResults.length }];
+                }
+            }
+
             // Select the specified fields
             let finalResults = orderedResults.map(row => {
                 const selectedRow = {};
@@ -303,6 +326,8 @@ async function executeSELECTQuery(query) {
                 return selectedRow;
             });
 
+            // console.log("CP-2", orderedResults)
+
             // Remove duplicates if specified
             let distinctResults = finalResults;
             if (isDistinct) {
diff --git a/src/queryParser.js b/src/queryParser.js
index 4230a15..c81aada 100644
--- a/src/queryParser.js
+++ b/src/queryParser.js
@@ -8,7 +8,25 @@ function parseSelectQuery(query) {
         query = query.trim();
 
         // Initialize distinct flag
-        let isDistinct = false;
+        let isDistinct = false; // Global DISTINCT, not within COUNT
+        let isCountDistinct = false; // New flag for DISTINCT within COUNT
+        let distinctFields = []; // Array to hold fields after DISTINCT within COUNT or APPROXIMATE_COUNT
+
+
+        // Detect APPROXIMATE_COUNT
+        let isApproximateCount = false;
+        const approximateCountRegex = /APPROXIMATE_COUNT\((DISTINCT\s)?(.+?)\)/i;
+        const approximateCountMatch = query.match(approximateCountRegex);
+        if (approximateCountMatch) {
+            isApproximateCount = true;
+            // If DISTINCT is used within APPROXIMATE_COUNT, capture the fields
+            if (approximateCountMatch[1]) {
+                isCountDistinct = true;
+                // distinctFields.push(approximateCountMatch[2].trim());
+            }
+            // Simplify further processing by normalizing to COUNT (adjust as necessary for your logic)
+            query = query.replace(approximateCountRegex, `COUNT(${approximateCountMatch[1] || ''}${approximateCountMatch[2]})`);
+        }
 
         // Check for DISTINCT keyword and update the query
         if (query.toUpperCase().includes('SELECT DISTINCT')) {
@@ -59,13 +77,24 @@ function parseSelectQuery(query) {
         // Extract JOIN information
         const { joinType, joinTable, joinCondition } = parseJoinClause(queryWithoutWhere);
 
+        const countDistinctRegex = /COUNT\((DISTINCT\s\((.*?)\))\)/gi;
+        let countDistinctMatch;
+        while ((countDistinctMatch = countDistinctRegex.exec(query)) !== null) {
+            isCountDistinct = true;
+            if (isApproximateCount) {
+                distinctFields.push(...countDistinctMatch[2].trim().split(',').map(field => field.trim()));
+            } else {
+                distinctFields.push(...countDistinctMatch[2].trim().split(',').map(field => field.trim()));
+            }
+        }
+
         // Parse SELECT part
         const selectRegex = /^SELECT\s(.+?)\sFROM\s(.+)/i;
         const selectMatch = selectPart.match(selectRegex);
         if (!selectMatch) {
             throw new Error('Invalid SELECT format');
         }
-        const [, fields, table] = selectMatch;
+        let [, fields, table] = selectMatch;
 
         // Parse WHERE part if it exists
         let whereClauses = [];
@@ -76,8 +105,17 @@ function parseSelectQuery(query) {
         // Check for aggregate functions without GROUP BY
         const hasAggregateWithoutGroupBy = checkAggregateWithoutGroupBy(query, groupByFields);
 
+        // Temporarily replace commas within parentheses to avoid incorrect splitting
+        const tempPlaceholder = '__TEMP_COMMA__'; // Ensure this placeholder doesn't appear in your actual queries
+        fields = fields.replace(/\(([^)]+)\)/g, (match) => match.replace(/,/g, tempPlaceholder));
+
+        // Now split fields and restore any temporary placeholders
+        const parsedFields = fields.split(',').map(field =>
+            field.trim().replace(new RegExp(tempPlaceholder, 'g'), ','));
+
+
         return {
-            fields: fields.split(',').map(field => field.trim()),
+            fields: parsedFields,
             table: table.trim(),
             whereClauses,
             joinType,
@@ -86,7 +124,10 @@ function parseSelectQuery(query) {
             groupByFields,
             orderByFields,
             hasAggregateWithoutGroupBy,
+            isApproximateCount,
+            isCountDistinct,
             limit,
+            distinctFields,
             isDistinct
         };
     } catch (error) {
diff --git a/tests/appoximateLargeFile.test.js b/tests/appoximateLargeFile.test.js
new file mode 100644
index 0000000..cb419da
--- /dev/null
+++ b/tests/appoximateLargeFile.test.js
@@ -0,0 +1,63 @@
+const fs = require('fs');
+const { executeSELECTQuery } = require('../src/queryExecuter');
+const jestConsole = console;
+
+beforeEach(() => {
+    global.console = require('console');
+});
+
+afterEach(() => {
+    global.console = jestConsole;
+});
+
+test('Large File Count(*) - Approximate and Exact', async () => {
+    // Test Exact Count
+
+    const startMemoryUsageExact = process.memoryUsage().heapUsed;
+    const startTimeExact = performance.now();
+
+    const queryExact = "SELECT COUNT(*) FROM student_large";
+    const resultExact = await executeSELECTQuery(queryExact);
+    const exactResult = resultExact[0]['COUNT(*)'];
+
+    const endTimeExact = performance.now();
+    const endMemoryUsageExact = process.memoryUsage().heapUsed;
+
+    console.log(`Execution Time for Exact Count: ${(endTimeExact - startTimeExact).toFixed(2)} ms`);
+    console.log(`Start Memory for Exact Count: ${startMemoryUsageExact / 1024 / 1024} MB`);
+    console.log(`End Memory for Exact Count: ${endMemoryUsageExact / 1024 / 1024} MB`);
+    console.log(`Memory Used for Exact Count: ${(endMemoryUsageExact - startMemoryUsageExact) / 1024 / 1024} MB`);
+
+    const startMemoryUsage = process.memoryUsage().heapUsed;
+    const startTime = performance.now();
+
+    const query = "SELECT APPROXIMATE_COUNT(*) FROM student_large";
+    const result = await executeSELECTQuery(query);
+
+    // Expect the approximate count to be within 5% of the actual count
+    expect(result[0]['APPROXIMATE_COUNT(*)']).toBeGreaterThan(exactResult - 0.05 * exactResult);
+    expect(result[0]['APPROXIMATE_COUNT(*)']).toBeLessThan(exactResult + 0.05 * exactResult);
+
+    const endTime = performance.now();
+    const endMemoryUsage = process.memoryUsage().heapUsed;
+
+    console.log(`Execution Time for Approximate Count: ${(endTime - startTime).toFixed(2)} ms`);
+    console.log(`Start Memory: ${startMemoryUsage / 1024 / 1024} MB`);
+    console.log(`End Memory: ${endMemoryUsage / 1024 / 1024} MB`);
+    console.log(`Memory Used for Approximate Count: ${(endMemoryUsage - startMemoryUsage) / 1024 / 1024} MB`);
+
+}, 120000);
+
+test('Execute SQL Query with COUNT with DISTINCT on a column', async () => {
+    const queryExact = "SELECT COUNT(DISTINCT (name, age)) FROM student_large";
+    const resultExact = await executeSELECTQuery(queryExact);
+    console.log({ resultExact });
+    const exactResult = resultExact[0]['COUNT(DISTINCT (name, age))'];
+
+    const query = "SELECT APPROXIMATE_COUNT(DISTINCT (name, age)) FROM student_large";
+    const result = await executeSELECTQuery(query);
+
+    // Expect the approximate count to be within 2% of the actual count
+    expect(result[0]['APPROXIMATE_COUNT(DISTINCT (name, age))']).toBeGreaterThan(exactResult - 0.05 * exactResult);
+    expect(result[0]['APPROXIMATE_COUNT(DISTINCT (name, age))']).toBeLessThan(exactResult + 0.05 * exactResult);
+}, 120000);
\ No newline at end of file
diff --git a/tests/queryExecuter.test.js b/tests/queryExecuter.test.js
index 3dc121c..891f062 100644
--- a/tests/queryExecuter.test.js
+++ b/tests/queryExecuter.test.js
@@ -418,3 +418,59 @@ test('LIKE with ORDER BY and LIMIT', async () => {
     // Expecting the first two names alphabetically that contain 'a'
     expect(result).toEqual([{ name: 'Alice' }, { name: 'Jane' }]);
 });
+
+
+test('Execute SQL Query with APPROXIMATE_COUNT Function', async () => {
+    const query = "SELECT APPROXIMATE_COUNT(id) FROM student";
+    const result = await executeSELECTQuery(query);
+    // Assuming APPROXIMATE_COUNT behaves like COUNT for testing
+    // Expecting the count of all student records
+    expect(result).toEqual([{ 'COUNT(id)': 5 }]); // Assuming there are 5 records in student.csv
+});
+
+test('Execute SQL Query with APPROXIMATE_COUNT and GROUP BY Clauses', async () => {
+    const query = "SELECT APPROXIMATE_COUNT(id), course FROM enrollment GROUP BY course";
+    const result = await executeSELECTQuery(query);
+    // Assuming APPROXIMATE_COUNT behaves like COUNT for testing
+    // Expecting the count of student records grouped by course
+    expect(result).toEqual([
+        { 'COUNT(id)': 2, course: 'Mathematics' }, // Assuming 2 students are enrolled in Mathematics
+        { 'COUNT(id)': 2, course: 'Physics' }, // Assuming 1 student is enrolled in Physics
+        { 'COUNT(id)': 1, course: 'Chemistry' }, // Assuming 1 student is enrolled in Chemistry
+        { 'COUNT(id)': 1, course: 'Biology' } // Assuming 1 student is enrolled in Biology
+    ]);
+});
+
+test('Execute SQL Query with APPROXIMATE_COUNT, WHERE, and ORDER BY Clauses', async () => {
+    const query = "SELECT APPROXIMATE_COUNT(id) FROM student WHERE age > '20' ORDER BY age DESC";
+    const result = await executeSELECTQuery(query);
+    // Assuming APPROXIMATE_COUNT behaves like COUNT for testing
+    // Expecting the count of students older than 20, ordered by age in descending order
+    // Note: The ORDER BY clause does not affect the outcome for a single aggregated result
+    expect(result).toEqual([{ 'COUNT(id)': 5 }]); // Assuming there are 4 students older than 20
+});
+
+
+test('Execute SQL Query with APPROXIMATE_COUNT only', async () => {
+    const query = "SELECT APPROXIMATE_COUNT(*) FROM student";
+    const result = await executeSELECTQuery(query);
+    expect(result).toEqual([{ 'APPROXIMATE_COUNT(*)': 5 }]);
+});
+
+test('Execute SQL Query with APPROXIMATE_COUNT with DISTINCT on a column', async () => {
+    const query = "SELECT APPROXIMATE_COUNT(DISTINCT (name)) FROM student";
+    const result = await executeSELECTQuery(query);
+    expect(result).toEqual([{ 'APPROXIMATE_COUNT(DISTINCT (name))': 4 }]);
+});
+
+test('Execute SQL Query with COUNT with DISTINCT on a column', async () => {
+    const query = "SELECT COUNT(DISTINCT (name)) FROM student";
+    const result = await executeSELECTQuery(query);
+    expect(result).toEqual([{ 'COUNT(DISTINCT (name))': 4 }]);
+});
+
+test('Execute SQL Query with COUNT with DISTINCT on a column', async () => {
+    const query = "SELECT COUNT(DISTINCT (name, age)) FROM student";
+    const result = await executeSELECTQuery(query);
+    expect(result).toEqual([{ 'COUNT(DISTINCT (name, age))': 5 }]);
+});
\ No newline at end of file
diff --git a/tests/queryParser.test.js b/tests/queryParser.test.js
index 664463b..39e12e5 100644
--- a/tests/queryParser.test.js
+++ b/tests/queryParser.test.js
@@ -16,6 +16,9 @@ test('Parse SQL Query', () => {
         orderByFields: null,
         limit: null,
         isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -38,6 +41,9 @@ test('Parse SQL Query with WHERE Clause', () => {
         orderByFields: null,
         limit: null,
         isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -64,6 +70,9 @@ test('Parse SQL Query with Multiple WHERE Clauses', () => {
         orderByFields: null,
         limit: null,
         isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -82,6 +91,9 @@ test('Parse SQL Query with INNER JOIN', async () => {
         orderByFields: null,
         limit: null,
         isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     })
 });
 
@@ -100,6 +112,9 @@ test('Parse SQL Query with INNER JOIN and WHERE Clause', async () => {
         orderByFields: null,
         limit: null,
         isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     })
 });
 
@@ -160,6 +175,9 @@ test('Parse LEFT Join Query Completely', () => {
         orderByFields: null,
         limit: null,
         isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     })
 })
 
@@ -178,6 +196,9 @@ test('Parse LEFT Join Query Completely', () => {
         orderByFields: null,
         limit: null,
         isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     })
 })
 
@@ -196,6 +217,9 @@ test('Parse SQL Query with LEFT JOIN with a WHERE clause filtering the main tabl
         orderByFields: null,
         limit: null,
         isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -214,6 +238,9 @@ test('Parse SQL Query with LEFT JOIN with a WHERE clause filtering the join tabl
         orderByFields: null,
         limit: null,
         isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -232,6 +259,9 @@ test('Parse SQL Query with RIGHT JOIN with a WHERE clause filtering the main tab
         orderByFields: null,
         limit: null,
         isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -250,6 +280,9 @@ test('Parse SQL Query with RIGHT JOIN with a WHERE clause filtering the join tab
         orderByFields: null,
         limit: null,
         isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -268,7 +301,10 @@ test('Parse COUNT Aggregate Query', () => {
         joinType: null,
         orderByFields: null,
         limit: null,
-        isDistinct: false
+        isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -287,7 +323,10 @@ test('Parse SUM Aggregate Query', () => {
         joinType: null,
         orderByFields: null,
         limit: null,
-        isDistinct: false
+        isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -305,7 +344,10 @@ test('Parse AVG Aggregate Query', () => {
         joinType: null,
         orderByFields: null,
         limit: null,
-        isDistinct: false
+        isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -323,7 +365,10 @@ test('Parse MIN Aggregate Query', () => {
         joinType: null,
         orderByFields: null,
         limit: null,
-        isDistinct: false
+        isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -341,7 +386,10 @@ test('Parse MAX Aggregate Query', () => {
         joinType: null,
         orderByFields: null,
         limit: null,
-        isDistinct: false
+        isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -359,7 +407,10 @@ test('Parse basic GROUP BY query', () => {
         hasAggregateWithoutGroupBy: false,
         orderByFields: null,
         limit: null,
-        isDistinct: false
+        isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -377,7 +428,10 @@ test('Parse GROUP BY query with WHERE clause', () => {
         hasAggregateWithoutGroupBy: false,
         orderByFields: null,
         limit: null,
-        isDistinct: false
+        isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -395,7 +449,10 @@ test('Parse GROUP BY query with multiple fields', () => {
         hasAggregateWithoutGroupBy: false,
         orderByFields: null,
         limit: null,
-        isDistinct: false
+        isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -416,7 +473,10 @@ test('Parse GROUP BY query with JOIN and WHERE clauses', () => {
         hasAggregateWithoutGroupBy: false,
         orderByFields: null,
         limit: null,
-        isDistinct: false
+        isDistinct: false,
+        isApproximateCount: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -483,6 +543,7 @@ test('Parse SQL Query with Basic DISTINCT', () => {
         fields: ['age'],
         table: 'student',
         isDistinct: true,
+        isApproximateCount: false,
         whereClauses: [],
         groupByFields: null,
         joinType: null,
@@ -490,7 +551,9 @@ test('Parse SQL Query with Basic DISTINCT', () => {
         joinCondition: null,
         orderByFields: null,
         limit: null,
-        hasAggregateWithoutGroupBy: false
+        hasAggregateWithoutGroupBy: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -501,6 +564,7 @@ test('Parse SQL Query with DISTINCT and Multiple Columns', () => {
         fields: ['student_id', 'course'],
         table: 'enrollment',
         isDistinct: true,
+        isApproximateCount: false,
         whereClauses: [],
         groupByFields: null,
         joinType: null,
@@ -508,7 +572,9 @@ test('Parse SQL Query with DISTINCT and Multiple Columns', () => {
         joinCondition: null,
         orderByFields: null,
         limit: null,
-        hasAggregateWithoutGroupBy: false
+        hasAggregateWithoutGroupBy: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -519,6 +585,7 @@ test('Parse SQL Query with DISTINCT and WHERE Clause', () => {
         fields: ['course'],
         table: 'enrollment',
         isDistinct: true,
+        isApproximateCount: false,
         whereClauses: [{ field: 'student_id', operator: '=', value: '"1"' }],
         groupByFields: null,
         joinType: null,
@@ -526,7 +593,9 @@ test('Parse SQL Query with DISTINCT and WHERE Clause', () => {
         joinCondition: null,
         orderByFields: null,
         limit: null,
-        hasAggregateWithoutGroupBy: false
+        hasAggregateWithoutGroupBy: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -537,6 +606,7 @@ test('Parse SQL Query with DISTINCT and JOIN Operations', () => {
         fields: ['student.name'],
         table: 'student',
         isDistinct: true,
+        isApproximateCount: false,
         whereClauses: [],
         groupByFields: null,
         joinType: 'INNER',
@@ -547,7 +617,9 @@ test('Parse SQL Query with DISTINCT and JOIN Operations', () => {
         },
         orderByFields: null,
         limit: null,
-        hasAggregateWithoutGroupBy: false
+        hasAggregateWithoutGroupBy: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -558,6 +630,7 @@ test('Parse SQL Query with DISTINCT, ORDER BY, and LIMIT', () => {
         fields: ['age'],
         table: 'student',
         isDistinct: true,
+        isApproximateCount: false,
         whereClauses: [],
         groupByFields: null,
         joinType: null,
@@ -565,7 +638,9 @@ test('Parse SQL Query with DISTINCT, ORDER BY, and LIMIT', () => {
         joinCondition: null,
         orderByFields: [{ fieldName: 'age', order: 'DESC' }],
         limit: 2,
-        hasAggregateWithoutGroupBy: false
+        hasAggregateWithoutGroupBy: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -576,6 +651,7 @@ test('Parse SQL Query with DISTINCT on All Columns', () => {
         fields: ['*'],
         table: 'student',
         isDistinct: true,
+        isApproximateCount: false,
         whereClauses: [],
         groupByFields: null,
         joinType: null,
@@ -583,7 +659,9 @@ test('Parse SQL Query with DISTINCT on All Columns', () => {
         joinCondition: null,
         orderByFields: null,
         limit: null,
-        hasAggregateWithoutGroupBy: false
+        hasAggregateWithoutGroupBy: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -595,13 +673,16 @@ test('Parse SQL Query with LIKE Clause', () => {
         table: 'student',
         whereClauses: [{ field: 'name', operator: 'LIKE', value: '%Jane%' }],
         isDistinct: false,
+        isApproximateCount: false,
         groupByFields: null,
         joinType: null,
         joinTable: null,
         joinCondition: null,
         orderByFields: null,
         limit: null,
-        hasAggregateWithoutGroupBy: false
+        hasAggregateWithoutGroupBy: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -613,13 +694,16 @@ test('Parse SQL Query with LIKE Clause and Wildcards', () => {
         table: 'student',
         whereClauses: [{ field: 'name', operator: 'LIKE', value: 'J%' }],
         isDistinct: false,
+        isApproximateCount: false,
         groupByFields: null,
         joinType: null,
         joinTable: null,
         joinCondition: null,
         orderByFields: null,
         limit: null,
-        hasAggregateWithoutGroupBy: false
+        hasAggregateWithoutGroupBy: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -634,13 +718,16 @@ test('Parse SQL Query with Multiple LIKE Clauses', () => {
             { field: 'age', operator: 'LIKE', value: '2%' }
         ],
         isDistinct: false,
+        isApproximateCount: false,
         groupByFields: null,
         joinType: null,
         joinTable: null,
         joinCondition: null,
         orderByFields: null,
         limit: null,
-        hasAggregateWithoutGroupBy: false
+        hasAggregateWithoutGroupBy: false,
+        isCountDistinct: false,
+        distinctFields: []
     });
 });
 
@@ -653,6 +740,95 @@ test('Parse SQL Query with LIKE and ORDER BY Clauses', () => {
         whereClauses: [{ field: 'name', operator: 'LIKE', value: '%e%' }],
         orderByFields: [{ fieldName: 'age', order: 'DESC' }],
         isDistinct: false,
+        isApproximateCount: false,
+        groupByFields: null,
+        joinType: null,
+        joinTable: null,
+        joinCondition: null,
+        limit: null,
+        hasAggregateWithoutGroupBy: false,
+        isCountDistinct: false,
+        distinctFields: []
+    });
+});
+
+test('Parse SQL Query with APPROXIMATE_COUNT Function', () => {
+    const query = "SELECT APPROXIMATE_COUNT(id) FROM student";
+    const parsed = parseSelectQuery(query);
+    expect(parsed).toEqual({
+        fields: ['COUNT(id)'], // Assuming APPROXIMATE_COUNT is replaced with COUNT for simplicity
+        table: 'student',
+        whereClauses: [],
+        isDistinct: false,
+        isApproximateCount: true, // This flag should be true when APPROXIMATE_COUNT is used
+        groupByFields: null,
+        joinType: null,
+        joinTable: null,
+        joinCondition: null,
+        orderByFields: null,
+        limit: null,
+        hasAggregateWithoutGroupBy: true,
+        isCountDistinct: false,
+        distinctFields: []
+    });
+});
+
+test('Parse SQL Query with APPROXIMATE_COUNT and GROUP BY Clauses', () => {
+    const query = "SELECT APPROXIMATE_COUNT(id), course FROM enrollment GROUP BY course";
+    const parsed = parseSelectQuery(query);
+    expect(parsed).toEqual({
+        fields: ['COUNT(id)', 'course'], // Assuming APPROXIMATE_COUNT is replaced with COUNT for simplicity
+        table: 'enrollment',
+        whereClauses: [],
+        isDistinct: false,
+        isApproximateCount: true, // This flag should be true when APPROXIMATE_COUNT is used
+        groupByFields: ['course'],
+        joinType: null,
+        joinTable: null,
+        joinCondition: null,
+        orderByFields: null,
+        limit: null,
+        hasAggregateWithoutGroupBy: false,
+        isCountDistinct: false,
+        distinctFields: []
+    });
+});
+
+test('Parse SQL Query with APPROXIMATE_COUNT, WHERE, and ORDER BY Clauses', () => {
+    const query = "SELECT APPROXIMATE_COUNT(id) FROM student WHERE age > 20 ORDER BY age DESC";
+    const parsed = parseSelectQuery(query);
+    expect(parsed).toEqual({
+        fields: ['COUNT(id)'],
+        table: 'student',
+        whereClauses: [
+            { field: 'age', operator: '>', value: '20' }
+        ],
+        orderByFields: [{ fieldName: 'age', order: 'DESC' }],
+        isDistinct: false,
+        isApproximateCount: true,
+        groupByFields: null,
+        joinType: null,
+        joinTable: null,
+        joinCondition: null,
+        limit: null,
+        hasAggregateWithoutGroupBy: true,
+        isCountDistinct: false,
+        distinctFields: []
+    });
+});
+
+test('Parse SQL Query with APPROXIMATE_COUNT with DISTINCT on a column', () => {
+    const query = "SELECT APPROXIMATE_COUNT(DISTINCT (name)) FROM student";
+    const parsed = parseSelectQuery(query);
+    expect(parsed).toEqual({
+        fields: ['COUNT(DISTINCT (name))'],
+        table: 'student',
+        whereClauses: [],
+        orderByFields: null,
+        isDistinct: false,
+        distinctFields: ['name'],
+        isCountDistinct: true,
+        isApproximateCount: true,
         groupByFields: null,
         joinType: null,
         joinTable: null,
@@ -660,4 +836,47 @@ test('Parse SQL Query with LIKE and ORDER BY Clauses', () => {
         limit: null,
         hasAggregateWithoutGroupBy: false
     });
-});
\ No newline at end of file
+});
+
+test('Parse SQL Query with COUNT with DISTINCT on a column', () => {
+    const query = "SELECT COUNT(DISTINCT (name)) FROM student";
+    const parsed = parseSelectQuery(query);
+    expect(parsed).toEqual({
+        fields: ['COUNT(DISTINCT (name))'],
+        table: 'student',
+        whereClauses: [],
+        orderByFields: null,
+        isDistinct: false,
+        distinctFields: ['name'],
+        isCountDistinct: true,
+        isApproximateCount: false,
+        groupByFields: null,
+        joinType: null,
+        joinTable: null,
+        joinCondition: null,
+        limit: null,
+        hasAggregateWithoutGroupBy: false
+    });
+});
+
+test('Parse SQL Query with COUNT with DISTINCT on multiple column', () => {
+    const query = "SELECT COUNT(DISTINCT (name, age)) FROM student";
+    const parsed = parseSelectQuery(query);
+    expect(parsed).toEqual({
+        fields: ['COUNT(DISTINCT (name, age))'],
+        table: 'student',
+        whereClauses: [],
+        orderByFields: null,
+        isDistinct: false,
+        distinctFields: ['name', 'age'],
+        isCountDistinct: true,
+        isApproximateCount: false,
+        groupByFields: null,
+        joinType: null,
+        joinTable: null,
+        joinCondition: null,
+        limit: null,
+        hasAggregateWithoutGroupBy: false
+    });
+});
+
diff --git a/util/generateLargeFile.js b/util/generateLargeFile.js
new file mode 100644
index 0000000..d70c913
--- /dev/null
+++ b/util/generateLargeFile.js
@@ -0,0 +1,35 @@
+const fs = require('fs');
+const { faker, da } = require('@faker-js/faker');
+const { parse } = require('json2csv');
+
+async function generateLargeCSV(filename) {
+    let data = [];
+    for (let i = 1; i <= 10_000_000; i++) {
+        const record = {
+            id: i,
+            name: faker.person.firstName(),
+            age: faker.number.int({ min: 18, max: 100 }),
+        };
+        data.push(record);
+
+        let rows;
+        if (i % 500_000 === 0) {
+            console.log(`Generated ${i} records`);
+            if (!fs.existsSync(filename)) {
+                rows = parse(data, { header: true });
+            } else {
+                // Rows without headers.
+                rows = parse(data, { header: false });
+            }
+            fs.appendFileSync(filename, rows);
+            data = [];
+        }
+
+    }
+    // Append file function can create new file too.
+
+    // Always add new line if file already exists.
+    fs.appendFileSync(filename, "\r\n");
+}
+
+generateLargeCSV('student_large.csv')
\ No newline at end of file