Imports and Login
import pandas as pd
from ipycytoscape import *
import networkx as nx
from karateclub import EgoNetSplitter
from funcy import select, walk_valuesThis heuristical approach to the detection of research groups within co-author networks follows the example of SciSight and uses the Ego-Splitting Frawework to create overlapping clusters of authors.
import pandas as pd
from ipycytoscape import *
import networkx as nx
from karateclub import EgoNetSplitter
from funcy import select, walk_valuesdf_edges = pd.read_csv(INPUT_FILE_EDGES)# build networkx graph from pandas edge list
G = nx.from_pandas_edgelist(df_edges)
# get largest connected component and extract subgraph
components = sorted(nx.connected_components(G), key=len, reverse=True)
S = G.subgraph(components[0]).copy()
Sp = nx.convert_node_labels_to_integers(S, label_attribute="openalex_id")
# fit EgoNetSplitter Model
model = EgoNetSplitter(resolution=EGO_NET_SPLITTER_RES)
model.fit(Sp)
ms = model.get_memberships()rgs = select(lambda x: len(x[1]) >= MIN_SIZE_RG, ms)
rgs = walk_values(lambda ns: [Sp.nodes[n]["openalex_id"] for n in ns], rgs)
pd.DataFrame([(k, v) for k,v in rgs.items()],
columns=["cluster_id", "oa_researcher_ids"])\
.to_csv(OUTPUT_FILE_RG_CLUSTER, index=None)