Imports and Login
import pandas as pd
from itertools import combinations
from funcy import lmapcat, ldistinct
from ipycytoscape import *Disambiguated author information from OpenAlex can be used as the basis for calculating networks of co-authorship, where authors define the nodes and co-authorship of a publication defines the edges between nodes.
import pandas as pd
from itertools import combinations
from funcy import lmapcat, ldistinct
from ipycytoscape import *df = pd.read_csv(INPUT_FILE)
df_sample = df[:SAMPLE_SIZE].copy()def calc_coauthor_network(df):
nodes = list(df["author.id"]
.unique())
print("number of nodes", len(nodes))
authors_by_publication = list(df
.groupby(by="doi")["author.id"]
.apply(set))
edges = lmapcat(lambda x:
combinations(x, 2),
authors_by_publication)
print("number of directional edges", len(edges))
edges = ldistinct(edges, key=lambda x: tuple(sorted(x))) # coauthorship is not directional, remove duplicates
print("number of undirectional edges", len(edges))
return nodes, edges
nodes, edges = calc_coauthor_network(df)
nodes_sample, edges_sample = calc_coauthor_network(df_sample)pd.DataFrame(nodes, columns=["node"])\
.to_csv(OUTPUT_FILE_NODES, index=None)
pd.DataFrame(edges, columns=["source", "target"])\
.to_csv(OUTPUT_FILE_EDGES, index=None)cytoscape_nodes = list(map(lambda x: {'data': {'id':x, 'name':x.split("/")[-1], 'classes':'node'}}, nodes_sample))
cytoscape_edges = list(map(lambda x: {'data': {'source': x[0], 'target': x[1]}}, edges_sample))
cytoscape_json = {'nodes': cytoscape_nodes, 'edges': cytoscape_edges}cytoscapeobj = CytoscapeWidget()
cytoscapeobj.set_tooltip_source('name')
cytoscapeobj.graph.add_graph_from_json(cytoscape_json)
cytoscapeobj.set_layout(name="random")
cytoscapeobj.set_style([
{'selector':'edge', 'style': {'width':1, "opacity":0.5, 'curve-style':'bezier'}},
{'selector':'node', 'style': {'background-color': 'blue', 'border-color':'black', 'border-width':1, 'width':10, 'height':10}}
])
cytoscapeobj