.Rhistory

'strongGravityMode': False                     # ?boolean false
}
# Construct the sigma graph and customize visualization
Sigma.write_html(G,
#layout_settings        = layout_settings,                                       # Set layout settings
default_edge_type      = "arrow",                                                # Set default edge type
fullscreen             = True,                                                   # Display in fullscreen mode
label_density          = 2,                                                      # Increase this to have more labels appear
label_font             = "Helvetica Neue",                                       # Set label font
max_categorical_colors = 30,                                                     # Max categorical colors for communities
node_border_color_from = 'node',                                                 # Set node border color from 'node' attribute
node_color             = "community",                                            # Set node colors
node_label             = "author",                                               # Set node label from 'author' attribute
node_label_size        = G.in_degree,                                            # Set node label size
node_label_size_range  = (12, 36),                                               # Set node label size range
node_metrics           = {"community": {"name": "louvain", "resolution": 2}},    # Specify node metrics
node_size              = G.in_degree,                                            # Set node size based on the in_degree attribute
node_size_range        = (3, 30),                                                # Set node size range
path                   = f"networks/references/{period_label}_sigmadefault.html",       # Specify the output file path
start_layout           = 10                                                       # Start the layout algorithm automatically and lasts 5 seconds
#node_border_color     = "black",                                                # Set node border color
#edge_color            = "source",                                               # Set edge color from 'source' attribute
)
return G
TEST20222023 = sigma_graph_references(list_references_standardized, 2011, 2012)
#| label: citations-construct-dataframes
def get_citations_df(df, start_year=None, end_year=None):
"""
Filter and extract necessary columns for a citation network from a DataFrame based on a range of years.
Parameters:
- df: DataFrame containing the data
- start_year: Optional, the starting year for filtering
- end_year: Optional, the ending year for filtering
Returns:
- DataFrame with filtered data
"""
# Only apply filtering if both start_year and end_year are provided
if start_year is not None and end_year is not None:
# Filter the data based on the 'year' column for the given range
df = df[df['year'].between(start_year, end_year)]
# Extract necessary columns for the citation network
# (Assuming these are the columns you need. Adjust if necessary.)
citations_df = df[['citing_art', 'scopus_id', 'sourcetitle', 'title', 'citedby_count', 'citations_per_year', 'author', 'year']]
# Rename the 'citedby_count' column to 'citations'
citations_df = citations_df.rename(columns={'citedby_count': 'citations'})
return citations_df
def sort_dict(dict):
sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[0])}
return sorted_dict
def get_info_references_dict(df, key, column):
"""
Create a dictionary with keys from the specified key_column and values from the specified value_column.
:param df: Input DataFrame.
:param key_column: Column name to be used as keys in the resulting dictionary.
:param value_column: Column name to be used as values in the resulting dictionary.
:return: Dictionary with keys from key_column and values from value_column.
"""
if key not in df.columns or column not in df.columns:
raise ValueError("The required columns are not present in the DataFrame.")
return sort_dict(df.set_index(key)[column].to_dict())
def sigma_graph_references(df, start_year=None, end_year=None):
citations_df = get_citations_df(list_references_standardized, start_year, end_year)# Create a graph from the given dataframe
# Initialize the output dictionary
dict_references = {}
# Retrieve the information for each period and each column
for period, df in dfs.items():
dict_references[period] = {}
for column in columns_to_extract:
# This check is important in case all columns are not present across all dataframes
if column in df.columns:
dict_references[period][column] = get_info_references_dict(df, 'scopus_id', column)
# Get the citing_art dictionary from 'data' DataFrame
for column in columns_to_extract:
if column in data.columns:
citing_art_dict = get_info_references_dict(data, 'citing_art', column)
# Add to dict_references only if key is not already present
# It means that the article in our data has been cited by others and is then already present in the references dataframe
for key, value in citing_art_dict.items():
if key not in dict_references[period].get(column, {}):
dict_references[period].setdefault(column, {})[key] = value
G = nx.from_pandas_edgelist(citations_df, 'citing_art', 'scopus_id', create_using=nx.DiGraph())
# Create the period label with start_year and end_year
period_label = "{}_{}".format(start_year, end_year) if start_year and end_year else "overall"
# Fetch attributes for the given period from the global dict_references
attributes_dict = dict_references.get(period_label, {})
# Set the attributes from dict_references to the nodes of the graph
for attribute, attribute_dict in attributes_dict.items():
nx.set_node_attributes(G, attribute_dict, name=attribute)
# Set edge colors for visualization
for u, v in G.edges:
G[u][v]["color"] = "#7D7C7C"
# Calculate the degree of each node
node_degree = dict(G.degree)
# Compute multiple centrality metrics for nodes
node_degree_centrality = nx.degree_centrality(G)
node_degree_betweenness = nx.betweenness_centrality(G)
node_degree_closeness = nx.closeness_centrality(G)
node_degree_eigenvector = nx.closeness_centrality(G)
#node_degree_constraint_weighted = nx.constraint(G, weight="value")
node_degree_constraint_unweighted = nx.constraint(G)
# Set node attributes for various metrics
nx.set_node_attributes(G, node_degree_centrality, 'centrality')
nx.set_node_attributes(G, node_degree_betweenness, 'betweenness')
nx.set_node_attributes(G, node_degree_closeness, 'closeness')
nx.set_node_attributes(G, node_degree_eigenvector, 'eigenvector centrality')
#nx.set_node_attributes(G, node_degree_constraint_weighted, 'burt\'s constraint weighted')
nx.set_node_attributes(G, node_degree_constraint_unweighted, 'burt constraint unweighted')
# Layout settings of graphology  https://graphology.github.io/standard-library/layout-forceatlas2#settings
# Some experiments of the different settings: https://observablehq.com/@mef/forceatlas2-layout-settings-visualized
layout_settings = {
'adjustSizes': False,                          # ?boolean false: should the node’s sizes be taken into account?
'barnesHutOptimize': True,                     # ?boolean false: whether to use the Barnes-Hut approximation to compute repulsion in O(n*log(n)) rather than default O(n^2), n being the number of nodes.
'barnesHutTheta': 0.5,                         # ?number 0.5: Barnes-Hut approximation theta parameter.
'edgeWeightInfluence': 1,                      # ?number 1: influence of the edge’s weights on the layout. To consider edge weight, don’t forget to pass weighted as true when applying the synchronous layout or when instantiating the worker.
'gravity': 5,                                 # ?number 1: strength of the layout’s gravity.
'linLogMode': True,                            # ?boolean false: whether to use Noack’s LinLog model.
'outboundAttractionDistribution': False,       # ?boolean false
'scalingRatio': 6,                             # ?number 1
'slowDown': 1,                                 # ?number 1
'strongGravityMode': False                     # ?boolean false
}
# Construct the sigma graph and customize visualization
Sigma.write_html(G,
#layout_settings        = layout_settings,                                       # Set layout settings
default_edge_type      = "arrow",                                                # Set default edge type
fullscreen             = True,                                                   # Display in fullscreen mode
label_density          = 2,                                                      # Increase this to have more labels appear
label_font             = "Helvetica Neue",                                       # Set label font
max_categorical_colors = 30,                                                     # Max categorical colors for communities
node_border_color_from = 'node',                                                 # Set node border color from 'node' attribute
node_color             = "community",                                            # Set node colors
node_label             = "author",                                               # Set node label from 'author' attribute
node_label_size        = G.in_degree,                                            # Set node label size
node_label_size_range  = (12, 36),                                               # Set node label size range
node_metrics           = {"community": {"name": "louvain", "resolution": 2}},    # Specify node metrics
node_size              = G.in_degree,                                            # Set node size based on the in_degree attribute
node_size_range        = (3, 30),                                                # Set node size range
path                   = f"networks/references/{period_label}_sigmadefault.html",       # Specify the output file path
start_layout           = 10                                                       # Start the layout algorithm automatically and lasts 5 seconds
#node_border_color     = "black",                                                # Set node border color
#edge_color            = "source",                                               # Set edge color from 'source' attribute
)
return G
TEST20222023 = sigma_graph_references(list_references_standardized, 2011, 2012)
#| label: citations-construct-dataframes
def get_citations_df(df, start_year=None, end_year=None):
"""
Filter and extract necessary columns for a citation network from a DataFrame based on a range of years.
Parameters:
- df: DataFrame containing the data
- start_year: Optional, the starting year for filtering
- end_year: Optional, the ending year for filtering
Returns:
- DataFrame with filtered data
"""
# Only apply filtering if both start_year and end_year are provided
if start_year is not None and end_year is not None:
# Filter the data based on the 'year' column for the given range
df = df[df['year'].between(start_year, end_year)]
# Extract necessary columns for the citation network
# (Assuming these are the columns you need. Adjust if necessary.)
citations_df = df[['citing_art', 'scopus_id', 'sourcetitle', 'title', 'citedby_count', 'citations_per_year', 'author', 'year']]
# Rename the 'citedby_count' column to 'citations'
citations_df = citations_df.rename(columns={'citedby_count': 'citations'})
return citations_df
def sort_dict(dict):
sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[0])}
return sorted_dict
def get_info_references_dict(df, key, column):
"""
Create a dictionary with keys from the specified key_column and values from the specified value_column.
:param df: Input DataFrame.
:param key_column: Column name to be used as keys in the resulting dictionary.
:param value_column: Column name to be used as values in the resulting dictionary.
:return: Dictionary with keys from key_column and values from value_column.
"""
if key not in df.columns or column not in df.columns:
raise ValueError("The required columns are not present in the DataFrame.")
return sort_dict(df.set_index(key)[column].to_dict())
def sigma_graph_references(df, start_year=None, end_year=None):
citations_df = get_citations_df(list_references_standardized, start_year, end_year)# Create a graph from the given dataframe
# Initialize the output dictionary
dict_references = {}
columns_to_extract = ['sourcetitle', 'title', 'citedby_count', 'citations_per_year', 'author', 'year']
# Create the period label with start_year and end_year
period_label = "{}_{}".format(start_year, end_year) if start_year and end_year else "overall"
# Initialize the nested dictionary for this period
dict_references = {period_label: {}}
for column in columns_to_extract:
# Check if the column exists in the DataFrame
if column in df.columns:
# Populate the dictionary with the column data using the get_info_references_dict function
dict_references[period_label][column] = get_info_references_dict(df, 'scopus_id', column)
# Get the citing_art dictionary from 'data' DataFrame and merge
for column in columns_to_extract:
# Check if the column exists in the 'data' DataFrame
if column in data.columns:
citing_art_dict = get_info_references_dict(data, 'citing_art', column)
# Merge the dictionaries, checking for the presence of the key
for key, value in citing_art_dict.items():
if key not in dict_references[period_label].get(column, {}):
dict_references[period_label].setdefault(column, {})[key] = value
G = nx.from_pandas_edgelist(citations_df, 'citing_art', 'scopus_id', create_using=nx.DiGraph())
# Fetch attributes for the given period from the global dict_references
attributes_dict = dict_references.get(period_label, {})
# Set the attributes from dict_references to the nodes of the graph
for attribute, attribute_dict in attributes_dict.items():
nx.set_node_attributes(G, attribute_dict, name=attribute)
# Set edge colors for visualization
for u, v in G.edges:
G[u][v]["color"] = "#7D7C7C"
# Calculate the degree of each node
node_degree = dict(G.degree)
# Compute multiple centrality metrics for nodes
node_degree_centrality = nx.degree_centrality(G)
node_degree_betweenness = nx.betweenness_centrality(G)
node_degree_closeness = nx.closeness_centrality(G)
node_degree_eigenvector = nx.closeness_centrality(G)
#node_degree_constraint_weighted = nx.constraint(G, weight="value")
node_degree_constraint_unweighted = nx.constraint(G)
# Set node attributes for various metrics
nx.set_node_attributes(G, node_degree_centrality, 'centrality')
nx.set_node_attributes(G, node_degree_betweenness, 'betweenness')
nx.set_node_attributes(G, node_degree_closeness, 'closeness')
nx.set_node_attributes(G, node_degree_eigenvector, 'eigenvector centrality')
#nx.set_node_attributes(G, node_degree_constraint_weighted, 'burt\'s constraint weighted')
nx.set_node_attributes(G, node_degree_constraint_unweighted, 'burt constraint unweighted')
# Layout settings of graphology  https://graphology.github.io/standard-library/layout-forceatlas2#settings
# Some experiments of the different settings: https://observablehq.com/@mef/forceatlas2-layout-settings-visualized
layout_settings = {
'adjustSizes': False,                          # ?boolean false: should the node’s sizes be taken into account?
'barnesHutOptimize': True,                     # ?boolean false: whether to use the Barnes-Hut approximation to compute repulsion in O(n*log(n)) rather than default O(n^2), n being the number of nodes.
'barnesHutTheta': 0.5,                         # ?number 0.5: Barnes-Hut approximation theta parameter.
'edgeWeightInfluence': 1,                      # ?number 1: influence of the edge’s weights on the layout. To consider edge weight, don’t forget to pass weighted as true when applying the synchronous layout or when instantiating the worker.
'gravity': 5,                                 # ?number 1: strength of the layout’s gravity.
'linLogMode': True,                            # ?boolean false: whether to use Noack’s LinLog model.
'outboundAttractionDistribution': False,       # ?boolean false
'scalingRatio': 6,                             # ?number 1
'slowDown': 1,                                 # ?number 1
'strongGravityMode': False                     # ?boolean false
}
# Construct the sigma graph and customize visualization
Sigma.write_html(G,
#layout_settings        = layout_settings,                                       # Set layout settings
default_edge_type      = "arrow",                                                # Set default edge type
fullscreen             = True,                                                   # Display in fullscreen mode
label_density          = 2,                                                      # Increase this to have more labels appear
label_font             = "Helvetica Neue",                                       # Set label font
max_categorical_colors = 30,                                                     # Max categorical colors for communities
node_border_color_from = 'node',                                                 # Set node border color from 'node' attribute
node_color             = "community",                                            # Set node colors
node_label             = "author",                                               # Set node label from 'author' attribute
node_label_size        = G.in_degree,                                            # Set node label size
node_label_size_range  = (12, 36),                                               # Set node label size range
node_metrics           = {"community": {"name": "louvain", "resolution": 2}},    # Specify node metrics
node_size              = G.in_degree,                                            # Set node size based on the in_degree attribute
node_size_range        = (3, 30),                                                # Set node size range
path                   = f"networks/references/{period_label}_sigmadefault.html",       # Specify the output file path
start_layout           = 10                                                       # Start the layout algorithm automatically and lasts 5 seconds
#node_border_color     = "black",                                                # Set node border color
#edge_color            = "source",                                               # Set edge color from 'source' attribute
)
return G
TEST20222023 = sigma_graph_references(list_references_standardized, 2011, 2012)
#| label: citations-construct-network-2022-2023
G_2022_2023_references, df_2022_2023_references = sigma_graph_references(list_references_standardized, "2022_2023")
#| label: citations-construct-dataframes
def get_citations_df(df, start_year=None, end_year=None):
"""
Filter and extract necessary columns for a citation network from a DataFrame based on a range of years.
Parameters:
- df: DataFrame containing the data
- start_year: Optional, the starting year for filtering
- end_year: Optional, the ending year for filtering
Returns:
- DataFrame with filtered data
"""
# Only apply filtering if both start_year and end_year are provided
if start_year is not None and end_year is not None:
# Filter the data based on the 'year' column for the given range
df = df[df['year'].between(start_year, end_year)]
# Extract necessary columns for the citation network
# (Assuming these are the columns you need. Adjust if necessary.)
citations_df = df[['citing_art', 'scopus_id', 'sourcetitle', 'title', 'citedby_count', 'citations_per_year', 'author', 'year']]
# Rename the 'citedby_count' column to 'citations'
citations_df = citations_df.rename(columns={'citedby_count': 'citations'})
return citations_df
def sort_dict(dict):
sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[0])}
return sorted_dict
def get_info_references_dict(df, key, column):
"""
Create a dictionary with keys from the specified key_column and values from the specified value_column.
:param df: Input DataFrame.
:param key_column: Column name to be used as keys in the resulting dictionary.
:param value_column: Column name to be used as values in the resulting dictionary.
:return: Dictionary with keys from key_column and values from value_column.
"""
if key not in df.columns or column not in df.columns:
raise ValueError("The required columns are not present in the DataFrame.")
return sort_dict(df.set_index(key)[column].to_dict())
def sigma_graph_references(df, start_year=None, end_year=None):
citations_df = get_citations_df(list_references_standardized, start_year, end_year)# Create a graph from the given dataframe
# Initialize the output dictionary
dict_references = {}
columns_to_extract = ['title', 'sourcetitle', 'author', 'year', 'citedby_count', 'citations_per_year']
# Create the period label with start_year and end_year
period_label = "{}_{}".format(start_year, end_year) if start_year and end_year else "overall"
# Initialize the nested dictionary for this period
dict_references = {period_label: {}}
for column in columns_to_extract:
# Check if the column exists in the DataFrame
if column in df.columns:
# Populate the dictionary with the column data using the get_info_references_dict function
dict_references[period_label][column] = get_info_references_dict(df, 'scopus_id', column)
# Get the citing_art dictionary from 'data' DataFrame and merge
for column in columns_to_extract:
# Check if the column exists in the 'data' DataFrame
if column in data.columns:
citing_art_dict = get_info_references_dict(data, 'citing_art', column)
# Merge the dictionaries, checking for the presence of the key
for key, value in citing_art_dict.items():
if key not in dict_references[period_label].get(column, {}):
dict_references[period_label].setdefault(column, {})[key] = value
G = nx.from_pandas_edgelist(citations_df, 'citing_art', 'scopus_id', create_using=nx.DiGraph())
# Fetch attributes for the given period from the global dict_references
attributes_dict = dict_references.get(period_label, {})
# Set the attributes from dict_references to the nodes of the graph
for attribute, attribute_dict in attributes_dict.items():
nx.set_node_attributes(G, attribute_dict, name=attribute)
# Set edge colors for visualization
for u, v in G.edges:
G[u][v]["color"] = "#7D7C7C"
# Calculate the degree of each node
node_degree = dict(G.degree)
# Compute multiple centrality metrics for nodes
node_degree_centrality = nx.degree_centrality(G)
node_degree_betweenness = nx.betweenness_centrality(G)
node_degree_closeness = nx.closeness_centrality(G)
node_degree_eigenvector = nx.closeness_centrality(G)
#node_degree_constraint_weighted = nx.constraint(G, weight="value")
node_degree_constraint_unweighted = nx.constraint(G)
# Set node attributes for various metrics
nx.set_node_attributes(G, node_degree_centrality, 'centrality')
nx.set_node_attributes(G, node_degree_betweenness, 'betweenness')
nx.set_node_attributes(G, node_degree_closeness, 'closeness')
nx.set_node_attributes(G, node_degree_eigenvector, 'eigenvector centrality')
#nx.set_node_attributes(G, node_degree_constraint_weighted, 'burt\'s constraint weighted')
nx.set_node_attributes(G, node_degree_constraint_unweighted, 'burt constraint unweighted')
# Layout settings of graphology  https://graphology.github.io/standard-library/layout-forceatlas2#settings
# Some experiments of the different settings: https://observablehq.com/@mef/forceatlas2-layout-settings-visualized
layout_settings = {
'adjustSizes': False,                          # ?boolean false: should the node’s sizes be taken into account?
'barnesHutOptimize': True,                     # ?boolean false: whether to use the Barnes-Hut approximation to compute repulsion in O(n*log(n)) rather than default O(n^2), n being the number of nodes.
'barnesHutTheta': 0.5,                         # ?number 0.5: Barnes-Hut approximation theta parameter.
'edgeWeightInfluence': 1,                      # ?number 1: influence of the edge’s weights on the layout. To consider edge weight, don’t forget to pass weighted as true when applying the synchronous layout or when instantiating the worker.
'gravity': 5,                                 # ?number 1: strength of the layout’s gravity.
'linLogMode': True,                            # ?boolean false: whether to use Noack’s LinLog model.
'outboundAttractionDistribution': False,       # ?boolean false
'scalingRatio': 6,                             # ?number 1
'slowDown': 1,                                 # ?number 1
'strongGravityMode': False                     # ?boolean false
}
# Construct the sigma graph and customize visualization
Sigma.write_html(G,
#layout_settings        = layout_settings,                                       # Set layout settings
default_edge_type      = "arrow",                                                # Set default edge type
fullscreen             = True,                                                   # Display in fullscreen mode
label_density          = 2,                                                      # Increase this to have more labels appear
label_font             = "Helvetica Neue",                                       # Set label font
max_categorical_colors = 30,                                                     # Max categorical colors for communities
node_border_color_from = 'node',                                                 # Set node border color from 'node' attribute
node_color             = "community",                                            # Set node colors
node_label             = "author",                                               # Set node label from 'author' attribute
node_label_size        = G.in_degree,                                            # Set node label size
node_label_size_range  = (12, 36),                                               # Set node label size range
node_metrics           = {"community": {"name": "louvain", "resolution": 2}},    # Specify node metrics
node_size              = G.in_degree,                                            # Set node size based on the in_degree attribute
node_size_range        = (3, 30),                                                # Set node size range
path                   = f"networks/references/{period_label}_sigmadefault.html",       # Specify the output file path
start_layout           = 10                                                       # Start the layout algorithm automatically and lasts 5 seconds
#node_border_color     = "black",                                                # Set node border color
#edge_color            = "source",                                               # Set edge color from 'source' attribute
)
return G, citations_df
#| label: citations-construct-dataframes
def get_citations_df(df, start_year=None, end_year=None):
"""
Filter and extract necessary columns for a citation network from a DataFrame based on a range of years.
Parameters:
- df: DataFrame containing the data
- start_year: Optional, the starting year for filtering
- end_year: Optional, the ending year for filtering
Returns:
- DataFrame with filtered data
"""
# Only apply filtering if both start_year and end_year are provided
if start_year is not None and end_year is not None:
# Filter the data based on the 'year' column for the given range
df = df[df['year'].between(start_year, end_year)]
# Extract necessary columns for the citation network
# (Assuming these are the columns you need. Adjust if necessary.)
citations_df = df[['citing_art', 'scopus_id', 'sourcetitle', 'title', 'citedby_count', 'citations_per_year', 'author', 'year']]
# Rename the 'citedby_count' column to 'citations'
citations_df = citations_df.rename(columns={'citedby_count': 'citations'})
return citations_df
def sort_dict(dict):
sorted_dict = {k: v for k, v in sorted(dict.items(), key=lambda item: item[0])}
return sorted_dict
def get_info_references_dict(df, key, column):
"""
Create a dictionary with keys from the specified key_column and values from the specified value_column.
:param df: Input DataFrame.
:param key_column: Column name to be used as keys in the resulting dictionary.
:param value_column: Column name to be used as values in the resulting dictionary.
:return: Dictionary with keys from key_column and values from value_column.
"""
if key not in df.columns or column not in df.columns:
raise ValueError("The required columns are not present in the DataFrame.")
return sort_dict(df.set_index(key)[column].to_dict())
def sigma_graph_references(df, start_year=None, end_year=None):
citations_df = get_citations_df(list_references_standardized, start_year, end_year)# Create a graph from the given dataframe
# Initialize the output dictionary
dict_references = {}
columns_to_extract = ['title', 'sourcetitle', 'author', 'year', 'citedby_count', 'citations_per_year']
# Create the period label with start_year and end_year
period_label = "{}_{}".format(start_year, end_year) if start_year and end_year else "overall"
# Initialize the nested dictionary for this period
dict_references = {period_label: {}}
for column in columns_to_extract:
# Check if the column exists in the DataFrame
if column in df.columns:
# Populate the dictionary with the column data using the get_info_references_dict function
dict_references[period_label][column] = get_info_references_dict(df, 'scopus_id', column)
# Get the citing_art dictionary from 'data' DataFrame and merge
for column in columns_to_extract:
# Check if the column exists in the 'data' DataFrame
if column in data.columns:
citing_art_dict = get_info_references_dict(data, 'citing_art', column)
# Merge the dictionaries, checking for the presence of the key
for key, value in citing_art_dict.items():
if key not in dict_references[period_label].get(column, {}):
dict_references[period_label].setdefault(column, {})[key] = value
G = nx.from_pandas_edgelist(citations_df, 'citing_art', 'scopus_id', create_using=nx.DiGraph())
# Fetch attributes for the given period from the global dict_references
attributes_dict = dict_references.get(period_label, {})
# Set the attributes from dict_references to the nodes of the graph
for attribute, attribute_dict in attributes_dict.items():
nx.set_node_attributes(G, attribute_dict, name=attribute)
# Set edge colors for visualization
for u, v in G.edges:
G[u][v]["color"] = "#7D7C7C"
# Calculate the degree of each node
node_degree = dict(G.degree)
# Compute multiple centrality metrics for nodes
node_degree_centrality = nx.degree_centrality(G)
node_degree_betweenness = nx.betweenness_centrality(G)
node_degree_closeness = nx.closeness_centrality(G)
node_degree_eigenvector = nx.closeness_centrality(G)
#node_degree_constraint_weighted = nx.constraint(G, weight="value")
node_degree_constraint_unweighted = nx.constraint(G)
# Set node attributes for various metrics
nx.set_node_attributes(G, node_degree_centrality, 'centrality')
nx.set_node_attributes(G, node_degree_betweenness, 'betweenness')
nx.set_node_attributes(G, node_degree_closeness, 'closeness')
nx.set_node_attributes(G, node_degree_eigenvector, 'eigenvector centrality')
#nx.set_node_attributes(G, node_degree_constraint_weighted, 'burt\'s constraint weighted')
nx.set_node_attributes(G, node_degree_constraint_unweighted, 'burt constraint unweighted')
# Layout settings of graphology  https://graphology.github.io/standard-library/layout-forceatlas2#settings
# Some experiments of the different settings: https://observablehq.com/@mef/forceatlas2-layout-settings-visualized
layout_settings = {
'adjustSizes': False,                          # ?boolean false: should the node’s sizes be taken into account?
'barnesHutOptimize': True,                     # ?boolean false: whether to use the Barnes-Hut approximation to compute repulsion in O(n*log(n)) rather than default O(n^2), n being the number of nodes.
'barnesHutTheta': 0.5,                         # ?number 0.5: Barnes-Hut approximation theta parameter.
'edgeWeightInfluence': 1,                      # ?number 1: influence of the edge’s weights on the layout. To consider edge weight, don’t forget to pass weighted as true when applying the synchronous layout or when instantiating the worker.
'gravity': 5,                                 # ?number 1: strength of the layout’s gravity.
'linLogMode': True,                            # ?boolean false: whether to use Noack’s LinLog model.
'outboundAttractionDistribution': False,       # ?boolean false
'scalingRatio': 6,                             # ?number 1
'slowDown': 1,                                 # ?number 1
'strongGravityMode': False                     # ?boolean false
}
# Construct the sigma graph and customize visualization
Sigma.write_html(G,
#layout_settings        = layout_settings,                                       # Set layout settings
default_edge_type      = "arrow",                                                # Set default edge type
fullscreen             = True,                                                   # Display in fullscreen mode
label_density          = 2,                                                      # Increase this to have more labels appear
label_font             = "Helvetica Neue",                                       # Set label font
max_categorical_colors = 30,                                                     # Max categorical colors for communities
node_border_color_from = 'node',                                                 # Set node border color from 'node' attribute
node_color             = "community",                                            # Set node colors
node_label             = "author",                                               # Set node label from 'author' attribute
node_label_size        = G.in_degree,                                            # Set node label size
node_label_size_range  = (12, 36),                                               # Set node label size range
node_metrics           = {"community": {"name": "louvain", "resolution": 2}},    # Specify node metrics
node_size              = G.in_degree,                                            # Set node size based on the in_degree attribute
node_size_range        = (3, 30),                                                # Set node size range
path                   = f"networks/references/{period_label}_sigmadefault.html",       # Specify the output file path
start_layout           = 10                                                       # Start the layout algorithm automatically and lasts 5 seconds
#node_border_color     = "black",                                                # Set node border color
#edge_color            = "source",                                               # Set edge color from 'source' attribute
)
return G, citations_df
#| label: citations-construct-network-2022-2023
G_2022_2023_references, df_2022_2023_references = sigma_graph_references(list_references_standardized, "2022_2023")
#| label: citations-construct-network-2022-2023
G_2022_2023_references, df_2022_2023_references = sigma_graph_references(list_references_standardized, 2022, 2023)
View(df_2022_2023_references)
install.packages("gtExtras")
quit
install.packages("gtExtras")