Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat : add new analysis output #266

Open
wants to merge 9 commits into
base: develop
Choose a base branch
from
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

**Under development**

- feat: add population analysis output
- fix: avoid regenerating OSM when population changes
- feat: add municipality information to households and activities
- chore: update to `eqasim-java` commit `ece4932`
Expand Down
13 changes: 10 additions & 3 deletions analysis/grid/comparison_flow_volume.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import pandas as pd
import geopandas as gpd
import os

import plotly.express as px

ANALYSIS_FOLDER = "compare_flow_volume"

SAMPLING_RATE = 0.05

Expand Down Expand Up @@ -84,6 +85,12 @@ def execute(context):
df_grids = stat_grid(df_trips_comp,df_locations_comp,df_persons_comp,df_grid)
point = df_grid.unary_union.centroid # a changé avec ploy_dep
print("Printing grids...")

# check output folder existence
analysis_output_path = os.path.join(context.config("output_path"), ANALYSIS_FOLDER)
if not os.path.exists(analysis_output_path):
os.mkdir(analysis_output_path)

for prefix, figure in figures.items():
df_select_age = df_stats[df_stats["age"].between(figure["min_age"],figure["max_age"])]
df_select_age = df_select_age.dissolve(by=["id_carr_1km","following_purpose"],aggfunc="count").reset_index()
Expand All @@ -103,14 +110,14 @@ def execute(context):
df_select = df_select[df_select["count"] != 0]
fig = px.choropleth_mapbox(df_select,geojson=df_select.geometry,locations=df_select.index,color="count", opacity= 0.7,color_continuous_scale='reds',
mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Localisation flow distribution for {prefix} group with {purpose} purpose")
fig.write_html(f'{context.config("output_path")}/{context.config("output_prefix")}{prefix}_{purpose}.html')
fig.write_html(f'{analysis_output_path}/{context.config("output_prefix")}{prefix}_{purpose}.html')
else :
df_grids_select = gpd.sjoin(df_grids_select,df_grid,how='right',predicate="contains").fillna(0)
df_select = gpd.sjoin(df_select,df_grids_select.drop(columns=[ 'index_left']),how='right',predicate="contains").rename(columns={"count_left":"volume_studied_simu","count_right":"volume_compared_simu"}).fillna(0)
df_select["volume_difference"] = df_select["volume_studied_simu"] - df_select["volume_compared_simu"]
df_select = df_select[(df_select["volume_studied_simu"] != 0 )| (df_select["volume_compared_simu"] != 0)]
df_select["pourcentage_vol"] = df_select["volume_difference"] / df_select["volume_compared_simu"]
px.choropleth_mapbox(df_select,geojson=df_select.geometry,locations=df_select.index,color="volume_difference", opacity= 0.7,color_continuous_scale="picnic", color_continuous_midpoint= 0,hover_name="id_carr_1km_right", hover_data=["volume_studied_simu", "volume_compared_simu","pourcentage_vol"],
mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Comparison flow distribution with previous simulation for {prefix} group with {purpose} purpose").write_html(f'{context.config("output_path")}/{context.config("output_prefix")}{prefix}_{purpose}.html')
mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Comparison flow distribution with previous simulation for {prefix} group with {purpose} purpose").write_html(f'{analysis_output_path}/{context.config("output_prefix")}{prefix}_{purpose}.html')


127 changes: 127 additions & 0 deletions analysis/synthesis/population.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@

import os
import numpy as np
import pandas as pd
import geopandas as gpd
from analysis.marginals import NUMBER_OF_VEHICLES_LABELS
from shapely import distance
AGE_CLASS = [0, 10, 14, 17, 25, 50, 65, np.inf]
NUMBER_OF_VEHICLES= [0,1,2,3,np.inf]
NAME_AGE_CLASS = ["0-10","11-14","15-17","18-25","26-50","51-65","65+"]
ANALYSIS_FOLDER = "analysis_population"
def configure(context):

context.config("output_path")
context.config("output_prefix", "ile_de_france_")
context.config("sampling_rate")

context.stage("synthesis.population.trips")
context.stage("synthesis.population.enriched")
context.stage("synthesis.population.spatial.locations")

context.stage("data.census.filtered", alias = "census")
context.stage("data.hts.selected", alias = "hts")

def execute(context):

# check output folder existence
analysis_output_path = os.path.join(context.config("output_path"), ANALYSIS_FOLDER)
if not os.path.exists(analysis_output_path):
os.mkdir(analysis_output_path)

prefix = context.config("output_prefix")
sampling_rate = context.config("sampling_rate")
df_person_eq = context.stage("synthesis.population.enriched")
df_trip_eq = context.stage("synthesis.population.trips")
df_location_eq = context.stage("synthesis.population.spatial.locations")[["person_id", "activity_index", "geometry"]]

df_location_eq = df_location_eq.to_crs("EPSG:2154")
df_trip_eq["preceding_activity_index"] = df_trip_eq["trip_index"]
df_trip_eq["following_activity_index"] = df_trip_eq["trip_index"] + 1

df_census = context.stage("census")
df_hts_households, df_hts_person, df_hts_trip = context.stage("hts")
df_hts_person["person_weight"] *=df_census["weight"].sum()/df_hts_person["person_weight"].sum()
df_hts_households["household_weight"] *=df_census["weight"].sum()/df_hts_households["household_weight"].sum()
# get age class
df_person_eq["age_class"] = pd.cut(df_person_eq["age"],AGE_CLASS,include_lowest=True,labels=NAME_AGE_CLASS)
df_census["age_class"] = pd.cut(df_census["age"],AGE_CLASS,include_lowest=True,labels=NAME_AGE_CLASS)
df_hts_person["age_class"] = pd.cut(df_hts_person["age"],AGE_CLASS,include_lowest=True,labels=NAME_AGE_CLASS)

# get vehicule class
df_person_eq["vehicles_class"] = pd.cut(df_person_eq["number_of_vehicles"],NUMBER_OF_VEHICLES,right=True,labels=NUMBER_OF_VEHICLES_LABELS)
df_hts_households["vehicles_class"] = pd.cut(df_hts_households["number_of_vehicles"],NUMBER_OF_VEHICLES,right=True,labels=NUMBER_OF_VEHICLES_LABELS)


df_eq_travel = pd.merge(df_trip_eq,df_person_eq[["person_id","age_class"]],on=["person_id"])
df_hts_travel = pd.merge(df_hts_trip,df_hts_person[["person_id","age_class","person_weight"]],on=["person_id"])

print("Generate tables ...")
# Age purpose analysis
analysis_age_purpose = pd.pivot_table(df_eq_travel,"person_id",index="age_class",columns="following_purpose",aggfunc="count")
analysis_age_purpose = analysis_age_purpose/sampling_rate
analysis_age_purpose.to_csv(f"{analysis_output_path}/{prefix}age_purpose.csv")

# Compare age volume
analysis_age_class = pd.concat([df_census.groupby("age_class")["weight"].sum(),df_person_eq.groupby("age_class")["person_id"].count()],axis=1).reset_index()
analysis_age_class.columns = ["Age class","INSEE","EQASIM"]
analysis_age_class["Proportion_INSEE"] = analysis_age_class["INSEE"] /df_census["weight"].sum()
analysis_age_class["Proportion_EQASIM"] = analysis_age_class["EQASIM"] /len(df_person_eq)
analysis_age_class["EQASIM"] = analysis_age_class["EQASIM"]/sampling_rate
analysis_age_class.to_csv(f"{analysis_output_path}/{prefix}age.csv")

# Compare vehicle volume
analysis_vehicles_class = pd.concat([df_hts_households.groupby("vehicles_class")["household_weight"].sum(),df_person_eq.groupby("vehicles_class")["household_id"].nunique()],axis=1).reset_index()
analysis_vehicles_class.columns = ["Number of vehicles class","HTS","EQASIM"]
analysis_vehicles_class["Proportion_HTS"] = analysis_vehicles_class["HTS"] / df_hts_households["household_weight"].sum()
analysis_vehicles_class["Proportion_EQASIM"] = analysis_vehicles_class["EQASIM"] / df_person_eq["household_id"].nunique()
analysis_vehicles_class.to_csv(f"{analysis_output_path}/{prefix}nbr_vehicle.csv")

# Compare license volume
analysis_license_class = pd.concat([df_hts_person.groupby("has_license")["person_weight"].sum(),df_person_eq.groupby("has_license")["person_id"].count()],axis=1).reset_index()
analysis_license_class.columns = ["Possession of license","HTS","EQASIM"]
analysis_license_class["Proportion_HTS"] = analysis_license_class["HTS"] /df_hts_person["person_weight"].sum()
analysis_license_class["Proportion_EQASIM"] = analysis_license_class["EQASIM"] /len(df_person_eq)
analysis_license_class["EQASIM"] = analysis_license_class["EQASIM"]/sampling_rate
analysis_license_class.to_csv(f"{analysis_output_path}/{prefix}license.csv")

# Compare travel volume
analysis_travel = pd.concat([df_hts_travel.groupby("age_class")["person_weight"].sum(),df_eq_travel.groupby("age_class")["person_id"].count()],axis=1).reset_index()
analysis_travel.columns = ["Age class","HTS","EQASIM"]
analysis_travel["Proportion_HTS"] = analysis_travel["HTS"] /df_hts_travel["person_weight"].sum()
analysis_travel["Proportion_EQASIM"] = analysis_travel["EQASIM"] /len(df_eq_travel)
analysis_travel["EQASIM"] = analysis_travel["EQASIM"]/sampling_rate
analysis_travel.to_csv(f"{analysis_output_path}/{prefix}travel.csv")

# Compare distance
df_hts_travel["routed_distance"] = df_hts_travel["routed_distance"]/1000 if "routed_distance" in df_hts_travel.columns else df_hts_travel["euclidean_distance"]/1000
df_hts_travel["distance_class"] = pd.cut(df_hts_travel["routed_distance"],list(np.arange(100))+[np.inf])

df_spatial = pd.merge(df_trip_eq, df_location_eq.rename(columns = {
"activity_index": "preceding_activity_index",
"geometry": "preceding_geometry"
}), how = "left", on = ["person_id", "preceding_activity_index"])

df_spatial = pd.merge(df_spatial, df_location_eq.rename(columns = {
"activity_index": "following_activity_index",
"geometry": "following_geometry"
}), how = "left", on = ["person_id", "following_activity_index"])
df_spatial["distance"] = df_spatial.apply(lambda x:distance( x["preceding_geometry"],x["following_geometry"])/1000,axis=1)
df_spatial["distance_class"] = pd.cut(df_spatial["distance"],list(np.arange(100))+[np.inf])

analysis_distance = pd.concat([df_hts_travel.groupby("distance_class")["person_weight"].sum(),df_spatial.groupby("distance_class")["person_id"].count()],axis=1).reset_index()
analysis_distance.columns = ["Distance class","HTS","EQASIM"]
analysis_distance["Proportion_HTS"] = analysis_distance["HTS"] / analysis_distance["HTS"].sum()
analysis_distance["Proportion_EQASIM"] = analysis_distance["EQASIM"] / len(df_spatial)
analysis_distance["EQASIM"] = analysis_distance["EQASIM"]/ sampling_rate
analysis_distance.to_csv(f"{analysis_output_path}/{prefix}distance.csv")










8 changes: 8 additions & 0 deletions docs/population.md
Original file line number Diff line number Diff line change
Expand Up @@ -450,3 +450,11 @@ folder as: `{output_prefix}_{age group}_{trip pupose}.html`

Note:
With `analysis_from_file` at False, the last synthetic population is studied by default. Also if `output_prefix` and `comparison_file_prefix` refer to the same outputs, or `comparison_file_prefix` is not specified, then only a volume visualisation of this particular population is produced.


### Comparaison population to source data

Using the population pipeline in the Analysis directory, you can generate multiple tables comparing composition of synthetic population to source data. Right now the tables generated compare : population volume by age range, households volume by number of vehicles, population volume with a license and without, trip volume by age range and trip volume by length.
Complementary from the synthetic population only, a table of population volume by age range and trip purpose is also created.

To be able to use this pipeline, you must already have create a synthetic population. Then you need to open the `config.yml` and add the `analysis.synthesis.population` stage in the `run` section.
7 changes: 5 additions & 2 deletions synthesis/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
import sqlite3
import math
import numpy as np
from analysis.synthesis.population import ANALYSIS_FOLDER

def configure(context):

context.stage("synthesis.population.enriched")

context.stage("synthesis.population.activities")
Expand All @@ -22,7 +24,8 @@ def configure(context):
context.config("output_path")
context.config("output_prefix", "ile_de_france_")
context.config("output_formats", ["csv", "gpkg"])

context.config("sampling_rate")

if context.config("mode_choice", False):
context.stage("matsim.simulation.prepare")

Expand Down Expand Up @@ -270,4 +273,4 @@ def execute(context):
clean_gpkg(path)
if "geoparquet" in output_formats:
path = "%s/%strips.geoparquet" % (output_path, output_prefix)
df_spatial.to_parquet(path)
df_spatial.to_parquet(path)
Loading