Information about Researchers

Introduction

We will use the researchers data source from Dimensions to get more detailed informations about authors and their affiliation.

Imports and Login
import dimcli
import pandas as pd
from clj import mapcat
from funcy import chunks
import json


dimcli.login()
dsl = dimcli.Dsl()
Read input data
df = pd.read_csv(INPUT_FILE)
pub_ids = list(df.id.values)

Query information about researchers

Query author and affiliation data
CHUNK_SIZE = 300

q = "search publications where id in {} return publications [id + doi + authors]" + f" limit {CHUNK_SIZE}"

data = []

for ids in chunks(CHUNK_SIZE, pub_ids):
    pub_results = dsl.query(q.format(json.dumps(ids)))
    data.append(pub_results.as_dataframe_authors_affiliations())

auth_aff_df = pd.concat(data)
0 1 2 3 4
aff_city Lans
aff_city_id 2772960.0
aff_country Austria
aff_country_code AT
aff_id
aff_name Instituto Nacional de Cardiología Ignacio Chávez Instituto Nacional de Cardiología Ignacio Chávez Guarner-Lans Instituto Nacional de Cardiología Ignacio Chávez Instituto Nacional de Cardiología Ignacio Chávez
aff_raw_affiliation Instituto Nacional de Cardiología Ignacio Chávez Instituto Nacional de Cardiología Ignacio Chávez Guarner-Lans Instituto Nacional de Cardiología Ignacio Chávez Instituto Nacional de Cardiología Ignacio Chávez
aff_state Tyrol
aff_state_code
pub_id pub.1139066291 pub.1139066291 pub.1139066291 pub.1139066291 pub.1139066291
researcher_id ur.01365723444.19 ur.015146751111.39 ur.01030625573.19
first_name Elizabeth María Elena Gustavo Mario
last_name Soria-Castro Soto Verónica Rojas Perezpeña-Diazconti
Saves results
auth_aff_df.to_csv(OUTPUT_FILE, index=None)