From 059e9c651bbe1baaa1e6808cd6f27ef33dca1e84 Mon Sep 17 00:00:00 2001 From: Vincent Hardouin Date: Mon, 8 Apr 2024 11:18:10 +0200 Subject: [PATCH] feat: add extract data script --- scripts/extract-data.js | 171 ++++++++++++++++++++++++++++++++++++- tests/extract-data.spec.js | 31 ++++++- 2 files changed, 199 insertions(+), 3 deletions(-) diff --git a/scripts/extract-data.js b/scripts/extract-data.js index e3d0d86..9cf65bb 100644 --- a/scripts/extract-data.js +++ b/scripts/extract-data.js @@ -1,4 +1,10 @@ -import { knex } from '../db/knex-database-connection.js'; +import { access, constants, readFile, writeFile } from 'node:fs/promises'; +import path from 'node:path'; +import os from 'node:os'; +import url from 'node:url'; +import process from 'node:process'; +import { disconnect, knex } from '../db/knex-database-connection.js'; +import { parseData } from './parse-data.js'; async function getRoutes() { const routes = await knex('routes').select('*').where('route_type', 1); @@ -73,4 +79,165 @@ async function getAdjacentStations() { .groupBy('from_stop_id', 'to_stop_id'); } -export { getRoutes, getStops, getAdjacentStations }; +async function getRoutesPaths() { + const filePath = path.resolve(`${os.homedir()}/Downloads/traces-des-lignes-de-transport-en-commun-idfm.geojson`); + await _verifyPath(filePath); + const file = await readFile(filePath, 'utf8'); + const routePaths = JSON.parse(file); + return routePaths.features.filter(feature => feature.properties.route_type === 'Subway'); +} + +async function _verifyPath(dirPath) { + try { + await access(dirPath, constants.R_OK); + } + catch (e) { + throw new Error(`The path ${dirPath} is not accessible.`); + } +} + +async function fillPathsInAdjacentStation({ adjacentStations, stations, routePaths }) { + const stationsMap = stations.reduce((acc, station) => { + acc[station.stop_id] = station; + return acc; + }, {}); + return adjacentStations.map((adjacentStation) => { + const fromStation = stationsMap[adjacentStation.from_stop_id]; + const toStation = stationsMap[adjacentStation.to_stop_id]; + + if (fromStation.route_id !== toStation.route_id) { + return { + from_stop_id: adjacentStation.from_stop_id, + to_stop_id: adjacentStation.to_stop_id, + duration: adjacentStation.time, + path: null, + }; + } + + const routePath = routePaths.find((routePath) => { + return routePath.properties.route_id === fromStation.route_id; + }); + + for (const line of routePath.geometry.coordinates) { + const fromIndex = line.findIndex((coord) => { + return coord[0] === fromStation.stop_lon && coord[1] === fromStation.stop_lat; + }); + + const toIndex = line.findIndex((coord) => { + return coord[0] === toStation.stop_lon && coord[1] === toStation.stop_lat; + }); + + if (fromIndex !== -1 && toIndex !== -1) { + let path; + if (fromIndex < toIndex) + path = line.slice(fromIndex, toIndex + 1); + else + path = line.slice(toIndex, fromIndex + 1); + + return { + from_stop_id: adjacentStation.from_stop_id, + to_stop_id: adjacentStation.to_stop_id, + duration: adjacentStation.time, + path, + route_id: fromStation.route_id, + }; + } + } + + return { + from_stop_id: adjacentStation.from_stop_id, + to_stop_id: adjacentStation.to_stop_id, + duration: adjacentStation.time, + path: null, + }; + }); +} + +async function getUniqueStops() { + const filePath = path.resolve(`${os.homedir()}/Downloads/emplacement-des-gares-idf-data-generalisee.csv`); + await _verifyPath(filePath); + const file = await readFile(filePath, 'utf8'); + const stops = await parseData(file); + + return stops.data + .filter(stop => stop.metro === '1' || stop.mode_.includes('METRO')) + .map((stop) => { + const [lon, lat] = stop['Geo Point'].split(',').map(Number); + return { + stop_unique_id: stop.codeunique, + stop_name: stop.nom_long, + stop_lon: lon, + stop_lat: lat, + }; + }); +} + +async function main() { + const stops = await getStops(); + const adjacentStations = await getAdjacentStations(); + const routePaths = await getRoutesPaths(); + const routes = routePaths.map(_keepOnlyRouteProperties); + const adjacentStationsWithPath = await fillPathsInAdjacentStation({ adjacentStations, stations: stops, routePaths }); + const uniqueStops = await getUniqueStops(); + const normalizedStopName = stop => stop.stop_name.normalize('NFD').replace(/[\u0300-\u036F]/g, '').replace(/[\s-–_'’]/g, '').toLowerCase(); + const stopsWithUniqueId = stops.map((stop) => { + const stopName = normalizedStopName(stop); + const uniqueStop = uniqueStops.find((uniqueStop) => { + const uniqueStopName = normalizedStopName(uniqueStop); + return new RegExp(stopName, 'i').test(uniqueStopName) || new RegExp(uniqueStopName, 'i').test(stopName); + }); + + if (!uniqueStop) + throw new Error(`Stop ${stop.stop_name} not found in unique stops`); + + return { + ...stop, + stop_unique_id: uniqueStop.stop_unique_id, + }; + }); + const adjacentStationsWithPathAndWithUniqueId = adjacentStationsWithPath.map((adjacentStation) => { + const fromStop = stopsWithUniqueId.find(stop => stop.stop_id === adjacentStation.from_stop_id); + const toStop = stopsWithUniqueId.find(stop => stop.stop_id === adjacentStation.to_stop_id); + return { + ...adjacentStation, + from_stop_unique_id: fromStop.stop_unique_id, + to_stop_unique_id: toStop.stop_unique_id, + }; + }); + await saveData({ + routes, + stops: stopsWithUniqueId, + adjacentStations: adjacentStationsWithPathAndWithUniqueId, + uniqueStops, + }); +} + +function _keepOnlyRouteProperties(route) { + return { + ...route.properties, + }; +} + +async function saveData(data) { + await writeFile('./assets/data.json', JSON.stringify(data, null, 2)); +} + +const modulePath = url.fileURLToPath(import.meta.url); +const isLaunchedFromCommandLine = process.argv[1] === modulePath; + +(async () => { + if (isLaunchedFromCommandLine) { + try { + await main(); + } + catch (error) { + console.error(error); + process.exitCode = 1; + } + finally { + await disconnect(); + } + } +})(); + +export { getRoutes, getStops, getAdjacentStations, getRoutesPaths, fillPathsInAdjacentStation }; diff --git a/tests/extract-data.spec.js b/tests/extract-data.spec.js index 5127c59..5707539 100644 --- a/tests/extract-data.spec.js +++ b/tests/extract-data.spec.js @@ -1,6 +1,12 @@ import { afterEach, describe, expect, it } from 'vitest'; import { saveRoutes, saveStopTimes, saveStops, saveTransfers, saveTrips } from '../scripts/save-data.js'; -import { getAdjacentStations, getRoutes, getStops } from '../scripts/extract-data.js'; +import { + fillPathsInAdjacentStation, + getAdjacentStations, + getRoutes, + getRoutesPaths, + getStops, +} from '../scripts/extract-data.js'; import { emptyAllTables, knex } from '../db/knex-database-connection.js'; describe('extract-data', () => { @@ -407,4 +413,27 @@ describe('extract-data', () => { expect(adjacentStations).to.deep.equal(expectedAdjacentStations); }); }); + + describe('#getRoutePath', () => { + it('should extract route path', async () => { + const routePaths = await getRoutesPaths(); + + expect(routePaths.map(path => ({ + route_id: path.route_id, + coordinates: path.coordinates.length, + }))).to.deep.equal([{ ID: 'IDFM:C01371' }]); + }); + }); + + describe('#fillPath', () => { + it('should fill path', async () => { + const stops = await getStops(); + const adjacentStations = await getAdjacentStations(); + const routePaths = await getRoutesPaths(); + + const result = await fillPathsInAdjacentStation(adjacentStations, stops, routePaths); + + expect(result).to.deep.equal([]); + }); + }); });