Imports and Login
import pandas as pd
from ipycytoscape import *
import networkx as nx
from karateclub import EgoNetSplitter
from funcy import select, walk_values
This heuristical approach to the detection of research groups within co-author networks follows the example of SciSight and uses the Ego-Splitting Frawework to create overlapping clusters of authors.
import pandas as pd
from ipycytoscape import *
import networkx as nx
from karateclub import EgoNetSplitter
from funcy import select, walk_values
= pd.read_csv(INPUT_FILE_EDGES) df_edges
# build networkx graph from pandas edge list
= nx.from_pandas_edgelist(df_edges)
G # get largest connected component and extract subgraph
= sorted(nx.connected_components(G), key=len, reverse=True)
components = G.subgraph(components[0]).copy()
S = nx.convert_node_labels_to_integers(S, label_attribute="openalex_id")
Sp
# fit EgoNetSplitter Model
= EgoNetSplitter(resolution=EGO_NET_SPLITTER_RES)
model
model.fit(Sp)
= model.get_memberships() ms
= select(lambda x: len(x[1]) >= MIN_SIZE_RG, ms)
rgs = walk_values(lambda ns: [Sp.nodes[n]["openalex_id"] for n in ns], rgs)
rgs
for k,v in rgs.items()],
pd.DataFrame([(k, v) =["cluster_id", "oa_researcher_ids"])\
columns=None) .to_csv(OUTPUT_FILE_RG_CLUSTER, index