Imports and Login
import dimcli
import pandas as pd
from clj import mapcat
from funcy import chunks
import json
dimcli.login()= dimcli.Dsl() dsl
We will use the researchers data source from Dimensions to get more detailed informations about authors and their affiliation.
import dimcli
import pandas as pd
from clj import mapcat
from funcy import chunks
import json
dimcli.login()= dimcli.Dsl() dsl
= pd.read_csv(INPUT_FILE)
df = list(df.id.values) pub_ids
= 300
CHUNK_SIZE
= "search publications where id in {} return publications [id + doi + authors]" + f" limit {CHUNK_SIZE}"
q
= []
data
for ids in chunks(CHUNK_SIZE, pub_ids):
= dsl.query(q.format(json.dumps(ids)))
pub_results
data.append(pub_results.as_dataframe_authors_affiliations())
= pd.concat(data) auth_aff_df
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
aff_city | Lans | ||||
aff_city_id | 2772960.0 | ||||
aff_country | Austria | ||||
aff_country_code | AT | ||||
aff_id | |||||
aff_name | Instituto Nacional de Cardiología Ignacio Chávez | Instituto Nacional de Cardiología Ignacio Chávez | Guarner-Lans | Instituto Nacional de Cardiología Ignacio Chávez | Instituto Nacional de Cardiología Ignacio Chávez |
aff_raw_affiliation | Instituto Nacional de Cardiología Ignacio Chávez | Instituto Nacional de Cardiología Ignacio Chávez | Guarner-Lans | Instituto Nacional de Cardiología Ignacio Chávez | Instituto Nacional de Cardiología Ignacio Chávez |
aff_state | Tyrol | ||||
aff_state_code | |||||
pub_id | pub.1139066291 | pub.1139066291 | pub.1139066291 | pub.1139066291 | pub.1139066291 |
researcher_id | ur.01365723444.19 | ur.015146751111.39 | ur.01030625573.19 | ||
first_name | Elizabeth | María Elena | Gustavo | Mario | |
last_name | Soria-Castro | Soto | Verónica | Rojas | Perezpeña-Diazconti |
=None) auth_aff_df.to_csv(OUTPUT_FILE, index