Skip to content

Commit

Permalink
feat: add extract data script
Browse files Browse the repository at this point in the history
  • Loading branch information
VincentHardouin committed Apr 8, 2024
1 parent 7af5c9a commit 059e9c6
Show file tree
Hide file tree
Showing 2 changed files with 199 additions and 3 deletions.
171 changes: 169 additions & 2 deletions scripts/extract-data.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
import { knex } from '../db/knex-database-connection.js';
import { access, constants, readFile, writeFile } from 'node:fs/promises';
import path from 'node:path';
import os from 'node:os';
import url from 'node:url';
import process from 'node:process';
import { disconnect, knex } from '../db/knex-database-connection.js';
import { parseData } from './parse-data.js';

async function getRoutes() {
const routes = await knex('routes').select('*').where('route_type', 1);
Expand Down Expand Up @@ -73,4 +79,165 @@ async function getAdjacentStations() {
.groupBy('from_stop_id', 'to_stop_id');
}

export { getRoutes, getStops, getAdjacentStations };
async function getRoutesPaths() {
const filePath = path.resolve(`${os.homedir()}/Downloads/traces-des-lignes-de-transport-en-commun-idfm.geojson`);
await _verifyPath(filePath);
const file = await readFile(filePath, 'utf8');
const routePaths = JSON.parse(file);
return routePaths.features.filter(feature => feature.properties.route_type === 'Subway');
}

async function _verifyPath(dirPath) {
try {
await access(dirPath, constants.R_OK);
}
catch (e) {
throw new Error(`The path ${dirPath} is not accessible.`);
}
}

async function fillPathsInAdjacentStation({ adjacentStations, stations, routePaths }) {
const stationsMap = stations.reduce((acc, station) => {
acc[station.stop_id] = station;
return acc;
}, {});
return adjacentStations.map((adjacentStation) => {
const fromStation = stationsMap[adjacentStation.from_stop_id];
const toStation = stationsMap[adjacentStation.to_stop_id];

if (fromStation.route_id !== toStation.route_id) {
return {
from_stop_id: adjacentStation.from_stop_id,
to_stop_id: adjacentStation.to_stop_id,
duration: adjacentStation.time,
path: null,
};
}

const routePath = routePaths.find((routePath) => {
return routePath.properties.route_id === fromStation.route_id;
});

for (const line of routePath.geometry.coordinates) {
const fromIndex = line.findIndex((coord) => {
return coord[0] === fromStation.stop_lon && coord[1] === fromStation.stop_lat;
});

const toIndex = line.findIndex((coord) => {
return coord[0] === toStation.stop_lon && coord[1] === toStation.stop_lat;
});

if (fromIndex !== -1 && toIndex !== -1) {
let path;
if (fromIndex < toIndex)
path = line.slice(fromIndex, toIndex + 1);
else
path = line.slice(toIndex, fromIndex + 1);

return {
from_stop_id: adjacentStation.from_stop_id,
to_stop_id: adjacentStation.to_stop_id,
duration: adjacentStation.time,
path,
route_id: fromStation.route_id,
};
}
}

return {
from_stop_id: adjacentStation.from_stop_id,
to_stop_id: adjacentStation.to_stop_id,
duration: adjacentStation.time,
path: null,
};
});
}

async function getUniqueStops() {
const filePath = path.resolve(`${os.homedir()}/Downloads/emplacement-des-gares-idf-data-generalisee.csv`);
await _verifyPath(filePath);
const file = await readFile(filePath, 'utf8');
const stops = await parseData(file);

return stops.data
.filter(stop => stop.metro === '1' || stop.mode_.includes('METRO'))
.map((stop) => {
const [lon, lat] = stop['Geo Point'].split(',').map(Number);
return {
stop_unique_id: stop.codeunique,
stop_name: stop.nom_long,
stop_lon: lon,
stop_lat: lat,
};
});
}

async function main() {
const stops = await getStops();
const adjacentStations = await getAdjacentStations();
const routePaths = await getRoutesPaths();
const routes = routePaths.map(_keepOnlyRouteProperties);
const adjacentStationsWithPath = await fillPathsInAdjacentStation({ adjacentStations, stations: stops, routePaths });
const uniqueStops = await getUniqueStops();
const normalizedStopName = stop => stop.stop_name.normalize('NFD').replace(/[\u0300-\u036F]/g, '').replace(/[\s-_']/g, '').toLowerCase();
const stopsWithUniqueId = stops.map((stop) => {
const stopName = normalizedStopName(stop);
const uniqueStop = uniqueStops.find((uniqueStop) => {
const uniqueStopName = normalizedStopName(uniqueStop);
return new RegExp(stopName, 'i').test(uniqueStopName) || new RegExp(uniqueStopName, 'i').test(stopName);
});

if (!uniqueStop)
throw new Error(`Stop ${stop.stop_name} not found in unique stops`);

return {
...stop,
stop_unique_id: uniqueStop.stop_unique_id,
};
});
const adjacentStationsWithPathAndWithUniqueId = adjacentStationsWithPath.map((adjacentStation) => {
const fromStop = stopsWithUniqueId.find(stop => stop.stop_id === adjacentStation.from_stop_id);
const toStop = stopsWithUniqueId.find(stop => stop.stop_id === adjacentStation.to_stop_id);
return {
...adjacentStation,
from_stop_unique_id: fromStop.stop_unique_id,
to_stop_unique_id: toStop.stop_unique_id,
};
});
await saveData({
routes,
stops: stopsWithUniqueId,
adjacentStations: adjacentStationsWithPathAndWithUniqueId,
uniqueStops,
});
}

function _keepOnlyRouteProperties(route) {
return {
...route.properties,
};
}

async function saveData(data) {
await writeFile('./assets/data.json', JSON.stringify(data, null, 2));
}

const modulePath = url.fileURLToPath(import.meta.url);
const isLaunchedFromCommandLine = process.argv[1] === modulePath;

(async () => {
if (isLaunchedFromCommandLine) {
try {
await main();
}
catch (error) {
console.error(error);
process.exitCode = 1;
}
finally {
await disconnect();
}
}
})();

export { getRoutes, getStops, getAdjacentStations, getRoutesPaths, fillPathsInAdjacentStation };
31 changes: 30 additions & 1 deletion tests/extract-data.spec.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import { afterEach, describe, expect, it } from 'vitest';
import { saveRoutes, saveStopTimes, saveStops, saveTransfers, saveTrips } from '../scripts/save-data.js';
import { getAdjacentStations, getRoutes, getStops } from '../scripts/extract-data.js';
import {
fillPathsInAdjacentStation,
getAdjacentStations,
getRoutes,
getRoutesPaths,
getStops,
} from '../scripts/extract-data.js';
import { emptyAllTables, knex } from '../db/knex-database-connection.js';

describe('extract-data', () => {
Expand Down Expand Up @@ -407,4 +413,27 @@ describe('extract-data', () => {
expect(adjacentStations).to.deep.equal(expectedAdjacentStations);
});
});

describe('#getRoutePath', () => {
it('should extract route path', async () => {
const routePaths = await getRoutesPaths();

expect(routePaths.map(path => ({
route_id: path.route_id,
coordinates: path.coordinates.length,
}))).to.deep.equal([{ ID: 'IDFM:C01371' }]);
});
});

describe('#fillPath', () => {
it('should fill path', async () => {
const stops = await getStops();
const adjacentStations = await getAdjacentStations();
const routePaths = await getRoutesPaths();

const result = await fillPathsInAdjacentStation(adjacentStations, stops, routePaths);

expect(result).to.deep.equal([]);
});
});
});

0 comments on commit 059e9c6

Please sign in to comment.