In [953]:
import numpy as np
import pandas as pd

In [954]:
raw = pd.read_csv('WELLCOME_APC.csv', encoding='mac_roman')

In [955]:
raw.describe()

Unnamed: 0,PMID/PMCID,Publisher,Journal title,Article title,COST (£) charged to Wellcome (inc VAT when charged)
count,1928,2127,2126,2127,2127
unique,1880,299,984,2126,1402
top,Not yet available,Elsevier,PLoS One,"Exclusive breastfeeding, diarrhoel morbidity a...",£2040.00
freq,7,387,92,2,94


In [956]:
df = raw.copy()
cols = list(df.columns)
cols

['PMID/PMCID',
 'Publisher',
 'Journal title',
 'Article title',
 'COST (£) charged to Wellcome (inc VAT when charged)']

In [957]:
cols[4] = 'Cost'
df.columns = cols

In [958]:
df.loc[:, 'Publisher'] = df['Publisher'].str.strip()
df.loc[:, 'Journal title'] = df['Journal title'].str.strip()
df.loc[:, 'Article title'] = df['Article title'].str.strip()
df.loc[:, 'Cost'] = df['Cost'].str.strip()

In [959]:
df.describe()

Unnamed: 0,PMID/PMCID,Publisher,Journal title,Article title,Cost
count,1928,2127,2126,2127,2127
unique,1880,278,953,2126,1402
top,Not yet available,Elsevier,PLoS One,"Exclusive breastfeeding, diarrhoel morbidity a...",£2040.00
freq,7,395,92,2,94


In [960]:
df[df.duplicated('Article title', keep=False)]

Unnamed: 0,PMID/PMCID,Publisher,Journal title,Article title,Cost
1490,Pending,Public Library of Science,PLoS One,"Exclusive breastfeeding, diarrhoel morbidity a...",£825.68
1496,,Public Library of Science,PLoS One,"Exclusive breastfeeding, diarrhoel morbidity a...",£825.68


In [961]:
df = df.drop_duplicates('Article title')

In [962]:
pd.set_option('display.max_rows', 1000)

In [963]:
pd.DataFrame(df['Publisher'].unique())

Unnamed: 0,0
0,CUP
1,ACS
2,ACS (Amercian Chemical Society) Publications
3,ACS Publications
4,AGA Institute
5,AMBSB
6,American Association of Immunologists
7,American Chemical Society
8,AMERICAN CHEMICAL SOCIETY
9,American Chemical Society Publications


Starting with this basic list of unique publisher names, correct errors and remove unnecessary words to combine duplicates.

In [964]:
related = {'AMBSB': 'ASBMB', 'Elseveier': 'Elsevier', 'Haematology': 'Hematology', 'Hamatology': 'Hematology', 'Benthan': 'Bentham', 
           'Byophysical': 'Biophysical', 'Biosciences': 'Bioscience', 'Harbour': 'Harbor', 'Darmouth': 'Dartmouth',
           'OUP': 'Oxford', 'Genermal': 'General', 'Habour': 'Harbor', 'Neurosciences': 'Neuroscience',
           'NEURO SCIENCES': 'Neuroscience', 'Biolgists': 'Biologists', 'NPG': 'Nature', 'Camdus': 'Cadmus',
           'Socety': 'Society', 'Endrocrine': 'Endocrine', 'Wliey': 'Wiley'}

for key in related:
    df['Publisher'] = df['Publisher'].str.replace(key, related[key])

In [965]:
null_out = {}
words = ['Laboratory', r'(?i)limited', r'(?i)ltd', r'(?i)press', r'(?i)services', r'(?i)service', r'(?i)publishing', r'(?i)corporation', r'(?i)LLC', 'communications', '(iucr)',
         r'(?i)inc', r'(?i)group', r'(?i)publications', r'(?i)publication', r'(?i)publishers', r'(?i)publisher', r'^(?i)the', r'(?i)journals', r'(?i)journal', r'^J ', '.', ',']
for word in words:
    null_out[word] = ''
for key in null_out:
    df['Publisher'] = df['Publisher'].str.replace(key, null_out[key])

In [966]:
pd.options.mode.chained_assignment = None  # disable since it flags false positives used here

In [967]:
mask = df['Publisher'].str.contains('ACS')
df.loc[mask, 'Publisher'] = 'American Chemical Society'

In [968]:
mask = df['Publisher'].str.contains('ASBM')
df.loc[mask, 'Publisher'] = 'ASBMB'
mask = df['Publisher'].str.contains('Biochemistry and Molecular')
df.loc[mask, 'Publisher'] = 'ASBMB'

In [969]:
mask = df['Publisher'].str.contains('ASM')
df.loc[mask, 'Publisher'] = 'American Society for Microbiology'

In [970]:
mask = df['Publisher'].str.contains('BMJ')
df.loc[mask, 'Publisher'] = 'BMJ'

In [971]:
mask = df['Publisher'].str.contains('Cambridge')
df.loc[mask, 'Publisher'] = 'Cambridge'

In [972]:
mask = df['Publisher'].str.contains('Company of Bio')
df.loc[mask, 'Publisher'] = 'Company of Biologists'

In [973]:
mask = df['Publisher'].str.contains('Elsevier')
df.loc[mask, 'Publisher'] = 'Elsevier'

In [974]:
mask = df['Publisher'].str.contains('Federation of')
df.loc[mask, 'Publisher'] = 'FASEB'

In [975]:
mask = df['Publisher'].str.contains('Frontiers')
df.loc[mask, 'Publisher'] = 'Frontiers'

In [976]:
mask = df['Publisher'].str.contains('MIT')
df.loc[mask, 'Publisher'] = 'MIT'

In [977]:
mask = df['Publisher'].str.contains('National Academy')
df.loc[mask, 'Publisher'] = 'National Academy of Sciences'

In [978]:
mask = df['Publisher'].str.contains('Nature')
df.loc[mask, 'Publisher'] = 'Nature'

In [979]:
mask = df['Publisher'].str.contains(r'(?i)Oxford')
df.loc[mask, 'Publisher'] = 'Oxford'

In [980]:
mask = df['Publisher'].str.contains(r'(?i)PLoS')
df.loc[mask, 'Publisher'] = 'Public Library of Science'

In [981]:
mask = df['Publisher'].str.contains('PNAS')
df.loc[mask, 'Publisher'] = 'National Academy of Sciences'

In [982]:
mask = df['Publisher'].str.contains('RSC')
df.loc[mask, 'Publisher'] = 'Royal Society of Chemistry'

In [983]:
mask = df['Publisher'].str.contains(r'(?i)Springer')
df.loc[mask, 'Publisher'] = 'Springer-Verlag'

In [984]:
mask = df['Publisher'].str.contains('Taylor')
df.loc[mask, 'Publisher'] = 'Taylor & Francis'
mask = df['Publisher'].str.contains('T&F')
df.loc[mask, 'Publisher'] = 'Taylor & Francis'

In [985]:
mask = df['Publisher'].str.contains('Visualized')
df.loc[mask, 'Publisher'] = 'JoVE'

In [986]:
mask = df['Publisher'].str.contains(r'(?i)Wiley')
df.loc[mask, 'Publisher'] = 'Wiley-Blackwell'

In [987]:
mask = df['Publisher'].str.contains('Wolters')
df.loc[mask, 'Publisher'] = 'Wolters Kluwer'

In [988]:
df.loc[:, 'Publisher'] = df['Publisher'].str.strip()
df.loc[:, 'Journal title'] = df['Journal title'].str.strip()

In [989]:
df['Publisher'] = df['Publisher'].str.upper()  # eliminate dupicates due to mixed cases

I waited until now to eliminate case to avoid accidentally finding unique upper/lower case letter combinations that might have occurred unexpectedly within words if the search were case insensitive.

In [990]:
df.describe()

Unnamed: 0,PMID/PMCID,Publisher,Journal title,Article title,Cost
count,1928,2126,2125,2126,2126
unique,1880,111,953,2126,1402
top,Not yet available,ELSEVIER,PLoS One,Orthobunyavirus Ultrastructure and the Curious...,£2040.00
freq,7,409,91,1,94


Down to 111 unique publishers. Keep going.

In [991]:
pd.DataFrame(sorted(df['Publisher'].unique()))

Unnamed: 0,0
0,AGA INSTITUTE
1,AMERICAN ASSOCIATION OF IMMUNOLOGISTS
2,AMERICAN CHEMICAL SOCIETY
3,AMERICAN CHEMICAL SOCIETY PETROLEUM RESEARCH T...
4,AMERICAN COLLEGE OF CHEST PHYSICIANS
5,AMERICAN PHYSIOLOGICAL SOCIETY
6,AMERICAN PSYCHIATRIC
7,AMERICAN PSYCHIATRIC ASSOCIATION
8,AMERICAN PSYCHOLOGICAL ASSOCIATION
9,AMERICAN PUBLIC HEALTH ASSOCIATION


In [992]:
mask = df['Publisher'].str.contains('AMERICAN CHEMICAL')
df.loc[mask, 'Publisher'] = 'AMERICAN CHEMICAL SOCIETY'

In [993]:
mask = df['Publisher'].str.contains('AMERICAN PSYCHIATRIC')
df.loc[mask, 'Publisher'] = 'AMERICAN PSYCHIATRIC ASSOCIATION'

In [994]:
mask = df['Publisher'].str.contains('BMC')
df.loc[mask, 'Publisher'] = 'BIOMED CENTRAL'

In [995]:
mask = df['Publisher'].str.contains(r'^BM')
df.loc[mask, 'Publisher'] = 'BMJ'

In [996]:
mask = df['Publisher'].str.contains('BRITISH MEDICAL')
df.loc[mask, 'Publisher'] = 'BMJ'

In [997]:
mask = df['Publisher'].str.contains('CADMUS')
df.loc[mask, 'Publisher'] = 'CADMUS'

In [998]:
mask = df['Publisher'].str.contains('CRYSTALLO')
df.loc[mask, 'Publisher'] = 'INTERNATIONAL UNION OF CRYSTALLOGRAPHY'

In [999]:
mask = df['Publisher'].str.contains('JOVE')
df.loc[mask, 'Publisher'] = 'JOVE'

In [1000]:
df['Publisher'] = df['Publisher'].str.replace('SOCIETY OF', 'SOCIETY FOR')

In [1001]:
df['Publisher'] = df['Publisher'].str.replace('PLOS', 'PUBLIC LIBRARY OF SCIENCE')

In [1002]:
df.describe()

Unnamed: 0,PMID/PMCID,Publisher,Journal title,Article title,Cost
count,1928,2126,2125,2126,2126
unique,1880,98,953,2126,1402
top,Not yet available,ELSEVIER,PLoS One,Orthobunyavirus Ultrastructure and the Curious...,£2040.00
freq,7,409,91,1,94


In [1003]:
pd.DataFrame(sorted(df['Publisher'].unique()))

Unnamed: 0,0
0,AGA INSTITUTE
1,AMERICAN ASSOCIATION OF IMMUNOLOGISTS
2,AMERICAN CHEMICAL SOCIETY
3,AMERICAN COLLEGE OF CHEST PHYSICIANS
4,AMERICAN PHYSIOLOGICAL SOCIETY
5,AMERICAN PSYCHIATRIC ASSOCIATION
6,AMERICAN PSYCHOLOGICAL ASSOCIATION
7,AMERICAN PUBLIC HEALTH ASSOCIATION
8,AMERICAN SOCIETY FOR HEMATOLOGY
9,AMERICAN SOCIETY FOR INVESTIGATIVE PATHOLOGY


Now repeat the winnowing process on the journal titles. Note that I have removed several displays used in the process, which are long and intrusive.

In [1004]:
df.loc[((df['Publisher'] == 'ROYAL SOCIETY') & (df['Journal title'].str.contains('Trans'))), 'Journal title'] = 'Transactions B'

In [1005]:
df.loc[1864, 'Journal title'] = 'Transactions B'

In [1006]:
df.loc[((df['Publisher'] == 'ROYAL SOCIETY') & (df['Journal title'].str.contains('Interface'))), 'Journal title'] = 'Interface'

In [1007]:
df.loc[((df['Publisher'] == 'ROYAL SOCIETY') & (df['Journal title'].str.contains('Proceedings'))), 'Journal title'] = 'Proceedings B'

In [1008]:
df['Journal title'] = df['Journal title'].str.replace(r'(?i)plos one', 'PLoS One')

In [1009]:
pd.set_option('display.max_rows', 1000)  # to display all rows

In [1010]:
related = {'Americal': 'American', 'Agfents': 'Agents', '&': 'and', 'Antimicobial': 'Antimicrobial', 'Am ': 'American ',
          'Journals': 'Journal', 'Biinformatics': 'Bioinformatics', 'Inyernational': 'International', r'(?i)open': 'Open',
          r'(?i)behaviour': 'Behavior', 'BIOCHEM SOC TRANS': 'Biochemical Society Transactions', r'(?i)biol ': 'Biological ',
          r'(?i)chem$': 'Chemistry', 'Biologicial': 'Biological', 'Britsh': 'British', 'Brt': 'British', 'Opthalmology': 'Ophthalmology',
          r'(?i) j ': ' ', 'Epigentics': 'Epigenetics', 'Epidemology': 'Epidemiology', 'Heptology': 'Hepatology', 'Jounral': 'Journal',
          'NTD': 'Neglected Tropical Diseases', 'Negected': 'Neglected Tropical Diseases', 'Public Library of Science': 'PLoS',
          'Experiements': 'Experiments', 'expermiments': 'Experiments'}

for key in related:
    df['Journal title'] = df['Journal title'].str.replace(key, related[key])

In [1013]:
null_out = {}
words = [r'(?i)journal of ', r'(?i)journal for ', r'(?i)jnl *', r'^(?i)j +', '.', r'(?i)online', r'^(?i)the +',
        r'(?i)journal *', r'^(?i)j +', r' *J$' ]
for word in words:
    null_out[word] = ''
for key in null_out:
    df['Journal title'] = df['Journal title'].str.replace(key, null_out[key])
    
df.loc[:, 'Journal title'] = df['Journal title'].str.strip()

In [1014]:
df.loc[((df['Publisher'] == 'WOLTERS KLUWER') & (df['Journal title'].str.contains(r'(?i)AIDS'))), 'Journal title'] = 'AIDS'

In [1015]:
df.loc[((df['Publisher'] == 'WOLTERS KLUWER') & (df['Journal title'].str.contains(r'(?i)acquired'))), 'Journal title'] = 'AIDS'

In [1016]:
df.loc[((df['Publisher'].str.contains('CRYSTALLOGRAPHY')) & (df['Journal title'].str.contains('F'))), 'Journal title'] = 'Acta Crystallographica F'

In [1017]:
df.loc[((df['Publisher'].str.contains('CRYSTALLOGRAPHY')) & (df['Journal title'].str.contains('D'))), 'Journal title'] = 'Acta Crystallographica D'

In [1018]:
df.loc[((df['Publisher'] == 'SPRINGER-VERLAG') & (df['Journal title'].str.contains('Acta Neuropathol'))), 'Journal title'] = 'Acta Neuropathologica'

In [1019]:
df.loc[((df['Publisher'] == 'BIOMED CENTRAL') & (df['Journal title'].str.contains('Genom'))), 'Journal title'] = 'BMC Genomics'

In [1020]:
df.loc[((df['Publisher'] == 'WILEY-BLACKWELL') & (df['Journal title'].str.contains('Angew'))), 'Journal title'] = 'Angewandte Chemie'

In [1021]:
df.loc[((df['Publisher'] == 'CADMUS') & (df['Journal title'].str.contains('Antimicro'))), 'Publisher'] = \
       'AMERICAN SOCIETY FOR MICROBIOLOGY'

In [1022]:
df.loc[173, ['Publisher', 'Journal title']] = ['PORTLAND', 'Biochemical Journal']

In [1023]:
df.loc[((df['Publisher'] == 'ELSEVIER') & (df['Journal title'].str.contains('- Mole'))), 'Journal title'] = \
        'Biochimica et Biophysica Acta - Molecular Basis of Disease'

In [1024]:
df.loc[df['Journal title'] == 'Biological Chemistry', 'Publisher'] = 'ASBMB'

In [1025]:
df.loc[1026, 'Publisher'] = 'PALGRAVE MACMILLAN'

In [1026]:
df.loc[((df['Publisher'] == 'WILEY-BLACKWELL') & df['Journal title'].str.contains('Defects')), 'Journal title'] = \
        'Birth Defects Research Part A: Clinical and Molecular Teratology'

In [1027]:
df.loc[((df['Publisher'].str.contains('HEMATOLOGY')) & (df['Journal title'].str.contains('Blood'))), 'Journal title'] = 'Blood'

In [1028]:
df.loc[1205, 'Journal title'] = 'British Medical Bulletin'

In [1029]:
df.loc[288, 'Journal title'] = 'BMJ'

In [1030]:
df.loc[((df['Publisher'] == 'COMPANY OF BIOLOGISTS') & (df['Journal title'].str.contains('Development'))), \
                                                                 'Journal title'] = 'Development'

In [1031]:
df.loc[((df['Publisher'] == 'COMPANY OF BIOLOGISTS') & (df['Journal title'].str.contains('Cell'))), \
                                                                 'Journal title'] = 'Cell Science'

In [1032]:
df.loc[((df['Publisher'] == 'COMPANY OF BIOLOGISTS') & (df['Journal title'].str.contains('Open'))), \
                                                                 'Journal title'] = 'Biology Open'

In [1033]:
df['Journal title'] = df['Journal title'].str.replace('Cell Press - ', '')

In [1034]:
df.loc[df['Journal title'] == 'Cell Reports', 'Publisher'] = 'ELSEVIER'

In [1035]:
df.loc[1732, 'Journal title'] = 'Cellular and Molecular Life Sciences'

In [1036]:
df.loc[1183, 'Journal title'] = 'Cerebral Cortex'

In [1037]:
df.loc[1849, 'Journal title'] = 'Clinical Endocrinology and Metabolism'

In [1038]:
df.loc[376, 'Publisher'] = 'ENDOCRINE SOCIETY'

In [1039]:
df.loc[((df['Publisher'] == 'OXFORD') & df['Journal title'].str.contains('Infectious')), 'Journal title'] = \
    'Clinical Infectious Diseases'

In [1040]:
df.loc[1621, 'Publisher'] = 'ELSEVIER'

In [1041]:
df.loc[841, 'Journal title'] = 'Current Biology'

In [1042]:
df.loc[530, 'Journal title'] = 'Developmental Cell'

In [1043]:
df.loc[((df['Publisher'] == 'NATURE') & (df['Journal title'].str.contains('EMBO'))), 'Journal title'] = 'EMBO'

In [1044]:
df.loc[df['Journal title'] == 'Hepatology', 'Publisher'] = 'WILEY-BLACKWELL'

In [1045]:
df.loc[1184, 'Journal title'] = 'Human Molecular Genetics'

In [1046]:
df.loc[((df['Publisher'] == 'OXFORD') & df['Journal title'].str.contains('Epidemiol')), 'Journal title'] = 'Epidemiology'

In [1047]:
df.loc[1622, 'Publisher'] = 'BIOMED CENTRAL'

In [1048]:
df.loc[[599, 659], 'Journal title'] = 'Molecular biology'

In [1049]:
df.loc[[1988, 1989], 'Journal title'] = 'Movement Disorders'

In [1050]:
df.loc[1105, 'Publisher'] = 'NATURE'

In [1051]:
df.loc[[721, 722], 'Journal title'] = 'NeuroImage'

In [1052]:
df.loc[734, 'Journal title'] = 'Neuropsychologia'

In [1053]:
df.loc[373, 'Publisher'] = 'AMERICAN PHYSIOLOGICAL SOCIETY'

In [1054]:
df.loc[[1238, 1239, 1240], 'Journal title'] = 'Nucleic Acids Research'

In [1055]:
df.loc[((df['Publisher'] == 'PUBLIC LIBRARY OF SCIENCE') & df['Journal title'].str.contains('1')), 'Journal title'] = 'PLoS One'

In [1056]:
df.loc[[429, 430, 431, 436], 'Publisher'] = 'NATIONAL ACADEMY OF SCIENCES'

In [1057]:
df.loc[df['Publisher'] == 'NATIONAL ACADEMY OF SCIENCES', 'Journal title'] = 'Proceedings'

In [1058]:
df.loc[1615, 'Journal title'] = 'Neglected Tropical Disease'

In [1059]:
df.loc[df['Journal title'] == 'Psychological Medicine', 'Publisher'] = 'Cambridge'

In [1060]:
df.loc[[1787, 1788], 'Journal title'] = 'Social Psychiatry and Psychiatric Epidemiology'

In [1061]:
df.loc[2045, 'Journal title'] = 'Tropical Medicine and International Health'

In [1062]:
df.loc[377, 'Journal title'] = 'Virology'

In [1063]:
df.loc[[225, 1837], 'Publisher'] = 'Elsevier'

In [1064]:
df['Journal title'] = df['Journal title'].str.upper()

At this point, all the journals with significant counts have been processed. There are still duplicates and some indeterminate cases but they all have possible totals less than 10.

In [1065]:
highest = pd.DataFrame(df.groupby(['Journal title', 'Publisher']).size().reset_index().rename(columns={0: 'count'}).
            sort_values('count', ascending=False)).head(5)
highest

Unnamed: 0,Journal title,Publisher,count
638,PLOS ONE,PUBLIC LIBRARY OF SCIENCE,199
100,BIOLOGICAL CHEMISTRY,ASBMB,68
645,PROCEEDINGS,NATIONAL ACADEMY OF SCIENCES,40
560,NEUROIMAGE,ELSEVIER,31
588,NUCLEIC ACIDS RESEARCH,OXFORD,29


The five most common journals, with the total number of articles for each.

In [1066]:
journals = list(highest['Journal title'].values)
publishers = list(highest['Publisher'].values)

In [1067]:
top_5 = df[((df['Publisher'].isin(publishers)) & (df['Journal title'].isin(journals)))]
top_5['Cost'] = top_5['Cost'].replace('[£.]', '', regex=True).astype(float) # £
reasonable = top_5[top_5['Cost'] < 19000000]

Unnamed: 0,PMID/PMCID,Publisher,Journal title,Article title,Cost
16,22610094,ASBMB,BIOLOGICAL CHEMISTRY,Annexin-1 interaction with FPR2/ALX,£265.67
60,PMC3576085,ASBMB,BIOLOGICAL CHEMISTRY,Understanding how noncatalytic carbohydrate bi...,£1100.00
61,23239883 PMC3561570,ASBMB,BIOLOGICAL CHEMISTRY,Molecular architecture and functional analysis...,£2259.64
64,PMCID: PMC3642348,ASBMB,BIOLOGICAL CHEMISTRY,Human and viral golgi anti-apoptotic protein (...,£1019.71
65,PMID: 22992744 PMC3493908,ASBMB,BIOLOGICAL CHEMISTRY,Dynamic exchange of myosin VI on endocytic str...,£1119.61
66,PMCID: PMC3531748,ASBMB,BIOLOGICAL CHEMISTRY,Uncoupling proteostasis and development in vit...,£1131.01
67,PMCID: PMC3436574\n,ASBMB,BIOLOGICAL CHEMISTRY,Structural Requirements for Recognition of Maj...,£1137.51
68,PMID: 23223336 PMC3543027,ASBMB,BIOLOGICAL CHEMISTRY,Visualization of structural changes accompanyi...,£1152.72
69,PMCID: PMC3795252,ASBMB,BIOLOGICAL CHEMISTRY,SLP-76 sterile alpha motif (SAM) and individua...,£1160.09
70,PMCID: PMC3724652,ASBMB,BIOLOGICAL CHEMISTRY,?-Amino-3-hydroxy-5-methyl-4-isoxazole propion...,£1166.85


Create a smaller dataframe of just the entries for the top five journals, remove the '£' sign from the Cost column (none of these journals had dollar pricing) and remove the decimal, effectively getting the cost in pennies. Then remove the few unrealistic outliers, one with a cost of £192,000 and several with a cost of £999,999

In [1070]:
stats = pd.DataFrame(reasonable.groupby(['Journal title', 'Publisher'])['Cost'].agg(['mean', 'median', 'std']))

In [1071]:
stats

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,median,std
Journal title,Publisher,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BIOLOGICAL CHEMISTRY,ASBMB,137134.712121,128730.5,40162.090457
NEUROIMAGE,ELSEVIER,221218.129032,232643.0,27319.324435
NUCLEIC ACIDS RESEARCH,OXFORD,116234.482759,85200.0,44215.093382
PLOS ONE,PUBLIC LIBRARY OF SCIENCE,93941.047619,89934.0,19895.609102
PROCEEDINGS,NATIONAL ACADEMY OF SCIENCES,83988.333333,73400.0,45531.982256


In [1072]:
stats[['mean', 'median', 'std']].apply(lambda x: round(x) / 100) # convert back to £

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,median,std
Journal title,Publisher,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BIOLOGICAL CHEMISTRY,ASBMB,1371.35,1287.3,401.62
NEUROIMAGE,ELSEVIER,2212.18,2326.43,273.19
NUCLEIC ACIDS RESEARCH,OXFORD,1162.34,852.0,442.15
PLOS ONE,PUBLIC LIBRARY OF SCIENCE,939.41,899.34,198.96
PROCEEDINGS,NATIONAL ACADEMY OF SCIENCES,839.88,734.0,455.32


The mean, median and standard deviation of the cost per article, in £, for the five most common journals.