Imports and Login
import pandas as pd
from itertools import combinations
from funcy import lmapcat, ldistinct
from ipycytoscape import *
Disambiguated author information from OpenAlex can be used as the basis for calculating networks of co-authorship, where authors define the nodes and co-authorship of a publication defines the edges between nodes.
import pandas as pd
from itertools import combinations
from funcy import lmapcat, ldistinct
from ipycytoscape import *
= pd.read_csv(INPUT_FILE)
df = df[:SAMPLE_SIZE].copy() df_sample
def calc_coauthor_network(df):
= list(df["author.id"]
nodes
.unique())print("number of nodes", len(nodes))
= list(df
authors_by_publication ="doi")["author.id"]
.groupby(byapply(set))
.= lmapcat(lambda x:
edges 2),
combinations(x,
authors_by_publication)print("number of directional edges", len(edges))
= ldistinct(edges, key=lambda x: tuple(sorted(x))) # coauthorship is not directional, remove duplicates
edges print("number of undirectional edges", len(edges))
return nodes, edges
= calc_coauthor_network(df)
nodes, edges = calc_coauthor_network(df_sample) nodes_sample, edges_sample
=["node"])\
pd.DataFrame(nodes, columns=None)
.to_csv(OUTPUT_FILE_NODES, index=["source", "target"])\
pd.DataFrame(edges, columns=None) .to_csv(OUTPUT_FILE_EDGES, index
= list(map(lambda x: {'data': {'id':x, 'name':x.split("/")[-1], 'classes':'node'}}, nodes_sample))
cytoscape_nodes = list(map(lambda x: {'data': {'source': x[0], 'target': x[1]}}, edges_sample))
cytoscape_edges = {'nodes': cytoscape_nodes, 'edges': cytoscape_edges} cytoscape_json
= CytoscapeWidget()
cytoscapeobj 'name')
cytoscapeobj.set_tooltip_source(
cytoscapeobj.graph.add_graph_from_json(cytoscape_json)
="random")
cytoscapeobj.set_layout(name
cytoscapeobj.set_style(['selector':'edge', 'style': {'width':1, "opacity":0.5, 'curve-style':'bezier'}},
{'selector':'node', 'style': {'background-color': 'blue', 'border-color':'black', 'border-width':1, 'width':10, 'height':10}}
{
])
cytoscapeobj