Imports and Login
import dimcli
import pandas as pd
from funcy import chunks
from clj import mapcat
import json
dimcli.login()= dimcli.Dsl() dsl
An interesting question that arises for preprints is whether they are later published in a peer-reviewed journal. Dimensions enriches the metadata of preprints with the DOI of the resulting publication.
import dimcli
import pandas as pd
from funcy import chunks
from clj import mapcat
import json
dimcli.login()= dimcli.Dsl() dsl
= pd.read_csv(INPUT_FILE)
preprints_df = list(preprints_df.dropna(subset=["resulting_publication_doi"]).resulting_publication_doi.values) resulting_publication_dois
The additional metadata for the resulting publication are queried below using their DOIs.
= 400
CHUNK_SIZE = []
data
= "search publications where doi in {} return publications [id + doi + title + year + journal + altmetric + times_cited + recent_citations + type + issn + linkout]"
q = q + f" limit {CHUNK_SIZE*2}"
q
for dois in chunks(CHUNK_SIZE, resulting_publication_dois):
= dsl.query(
results format(json.dumps(dois)))
q.
data.append(results)
= pd.DataFrame(mapcat(lambda x: x["publications"], data)) resulting_publications_df
= resulting_publications_df\
resulting_publications_df
.join(apply(
pd.json_normalize(resulting_publications_df.journal.lambda x: eval(str(x)) if x and not pd.isna(x) else x))
"journal."))\
.add_prefix(="journal") .drop(columns
=None) resulting_publications_df.to_csv(OUTPUT_FILE, index