Imports and Login
import dimcli
import pandas as pd
from clj import mapcat
from funcy import chunks
import json
dimcli.login()
dsl = dimcli.Dsl()We will use the researchers data source from Dimensions to get more detailed informations about authors and their affiliation.
import dimcli
import pandas as pd
from clj import mapcat
from funcy import chunks
import json
dimcli.login()
dsl = dimcli.Dsl()df = pd.read_csv(INPUT_FILE)
pub_ids = list(df.id.values)CHUNK_SIZE = 300
q = "search publications where id in {} return publications [id + doi + authors]" + f" limit {CHUNK_SIZE}"
data = []
for ids in chunks(CHUNK_SIZE, pub_ids):
pub_results = dsl.query(q.format(json.dumps(ids)))
data.append(pub_results.as_dataframe_authors_affiliations())
auth_aff_df = pd.concat(data)| 0 | 1 | 2 | 3 | 4 | |
|---|---|---|---|---|---|
| aff_city | Lans | ||||
| aff_city_id | 2772960.0 | ||||
| aff_country | Austria | ||||
| aff_country_code | AT | ||||
| aff_id | |||||
| aff_name | Instituto Nacional de Cardiología Ignacio Chávez | Instituto Nacional de Cardiología Ignacio Chávez | Guarner-Lans | Instituto Nacional de Cardiología Ignacio Chávez | Instituto Nacional de Cardiología Ignacio Chávez |
| aff_raw_affiliation | Instituto Nacional de Cardiología Ignacio Chávez | Instituto Nacional de Cardiología Ignacio Chávez | Guarner-Lans | Instituto Nacional de Cardiología Ignacio Chávez | Instituto Nacional de Cardiología Ignacio Chávez |
| aff_state | Tyrol | ||||
| aff_state_code | |||||
| pub_id | pub.1139066291 | pub.1139066291 | pub.1139066291 | pub.1139066291 | pub.1139066291 |
| researcher_id | ur.01365723444.19 | ur.015146751111.39 | ur.01030625573.19 | ||
| first_name | Elizabeth | María Elena | Gustavo | Mario | |
| last_name | Soria-Castro | Soto | Verónica | Rojas | Perezpeña-Diazconti |
auth_aff_df.to_csv(OUTPUT_FILE, index=None)