In [1]:
import pandas as pd

## Requête de récupération des référecences bibliographiques
```sql
SELECT
 bib_refs.id, 
 bib_refs.ref_infos, 
 bib_refs.year, 
 ne3.name auteur,
 bib_refs.old_publicity, 
 nameable_entities1.name ref_type, 
 notes.content notes, 
 nameable_entities2.name, 
 bib_referencements_types.type_id 
FROM bib_refs
LEFT JOIN bib_referencements
ON bib_referencements.reference_id=bib_refs.id
LEFT JOIN bib_referencements_types
ON bib_referencements.id = bib_referencements_types.referencement_id 
LEFT JOIN  bib_ref_ling_types 
ON bib_referencements_types.type_id = bib_ref_ling_types.id
LEFT JOIN nameable_entities as nameable_entities1
ON bib_ref_ling_types.ref_type_id = nameable_entities1.id
LEFT JOIN commentable_entities_notes
ON bib_refs.id = commentable_entities_notes.entity_id
LEFT JOIN notes
ON commentable_entities_notes.note_id = notes.id
LEFT JOIN nameable_entities as nameable_entities2
ON bib_refs.id = nameable_entities2.id
LEFT JOIN bib_ranked_authors
ON bib_ranked_authors.reference_id = bib_refs.id
LEFT JOIN nameable_entities as ne3
ON bib_rankend_authors.author_id = ne3.id
-- ne pas mettre cette condition si on veut aussi les refs non exportables
-- WHERE bib_refs.old_publicity = 1 
WHERE bib_refs.ref_infos is not NULL 
ORDER BY bib_refs.year; 
```

In [2]:
df = pd.read_csv("../PROJETS/RSPA/ref_biblio_all.csv")
df.fillna("", inplace=True)

def convert_to_int(x):
    return (int(x) if x else "")

df.year = df.year.apply(convert_to_int)
df.old_publicity = df.old_publicity.apply(convert_to_int)

df.rename(columns={'ref_infos': 'publication',
                   'ref_type.1': 'titre'},
          inplace=True, errors='raise')

df = df[["id","titre","auteur","year","publication","ref_type","notes","old_publicity"]]

df.head()

Unnamed: 0,id,titre,auteur,year,publication,ref_type,notes,old_publicity
0,00041d507b2d742fd713b80828e2189f4cf3351c,"Organon. 4, Seconds analytiques / Aristote; in...","Pellegrin, Pierre",2005,"Paris: Flammarion, 2005, 432 p. Coll. « GF », ...",traduction,"Suit principalement l’édition de W.D. Ross, n°...",1
1,001be92168018fcd37606ddc2f33426cfbe61574,Rethinking early greek philosophy: Hippolytus ...,"Osborne, Catherine",1987,"Londres: Duckworth, 1987, viii-383 p.",,"Pour Hippolyte, suit principalement l’édition ...",1
2,003229cbcccc97401cd119ea27558bea12cbb837,[Fragments et témoignages] / Les cosmogonies o...,"Raven, John Earle",1995,"Cf. G. S. Kirk, J. E. Raven, M. Schofield, H.-...",édition,,1
3,003229cbcccc97401cd119ea27558bea12cbb837,[Fragments et témoignages] / Les cosmogonies o...,"De Weck, Hélène-Alix",1995,"Cf. G. S. Kirk, J. E. Raven, M. Schofield, H.-...",édition,,1
4,003229cbcccc97401cd119ea27558bea12cbb837,[Fragments et témoignages] / Les cosmogonies o...,"Schofield, Malcolm",1995,"Cf. G. S. Kirk, J. E. Raven, M. Schofield, H.-...",édition,,1


In [3]:
df.shape

(14922, 8)

In [4]:
len(df.id.unique())

6194

In [5]:
df_grouped_authors = df.groupby(['id'])['auteur'].apply(lambda x: '/'.join(x)).reset_index()

In [6]:
df_grouped_authors.shape

(6194, 2)

In [7]:
df_grouped_types = df.groupby(['id'])['ref_type'].apply(lambda x: ', '.join(x)).reset_index()

In [8]:
df_grouped_types.shape

(6194, 2)

In [9]:
df_grouped_types.head(10)

Unnamed: 0,id,ref_type
0,00041d507b2d742fd713b80828e2189f4cf3351c,traduction
1,001be92168018fcd37606ddc2f33426cfbe61574,
2,003229cbcccc97401cd119ea27558bea12cbb837,"édition, édition, édition, édition, édition, c..."
3,0048b30b02159daed1bd2cd8b0f10161f7edf8ac,"édition, traduction"
4,0049009ae4af035b08310cef4930fe673e5e91a1,traduction
5,004a25aef1a854c52131848c7ff3f3df3b8bd9fb,"édition, traduction"
6,005b35c41de3903d386ab1348fdc18ffb1d3da73,édition critique
7,00846a34a52093e0ce0b732590dde683fb43c44f,"édition, commentaire"
8,009760968578745831843715565036faabb62ad9,édition critique
9,00aea3c021babdada6fada757f4480cd54e6e328,"édition, traduction"


In [10]:
def reduce_types(x):
    return ",".join(set(map(str.strip,x.split(','))))
test = "édition, traduction, édition, commentaire"
reduce_types(test)

def reduce_authors(x):
    return "/".join(set(map(str.strip,x.split('/'))))

In [11]:
df_grouped_types["concat_ref_types"] = df_grouped_types.ref_type.apply(reduce_types)

In [12]:
df_grouped_authors["concat_authors"] = df_grouped_authors.auteur.apply(reduce_authors)

In [13]:
df_grouped_types.tail()

Unnamed: 0,id,ref_type,concat_ref_types
6189,ff7f04c037603086417d5cdf57c471753e003b39,"traduction, traduction",traduction
6190,ff97e7e246ef7f75cca9ca5f7e37714d6a3fcfc4,édition critique,édition critique
6191,ffc3418c40391b4462c5706cdc7b0cc5493e0584,"édition critique, édition critique",édition critique
6192,ffeb9822d72c69179f86dd5c2e7d864e9469de7c,"édition critique, édition critique",édition critique
6193,ffef7b3b07e80e945f0ce4aa8cdaff4b476825f4,"édition, commentaire","édition,commentaire"


In [14]:
df_grouped_authors.tail()

Unnamed: 0,id,auteur,concat_authors
6189,ff7f04c037603086417d5cdf57c471753e003b39,"Morand, Anne-France/Lukinovich, Alessandra","Lukinovich, Alessandra/Morand, Anne-France"
6190,ff97e7e246ef7f75cca9ca5f7e37714d6a3fcfc4,"Meiser, Karl","Meiser, Karl"
6191,ffc3418c40391b4462c5706cdc7b0cc5493e0584,"Bernabé, Alberto/Olmos, Ricardo","Bernabé, Alberto/Olmos, Ricardo"
6192,ffeb9822d72c69179f86dd5c2e7d864e9469de7c,"Gigon, Olof/Bekker, Immanuel","Bekker, Immanuel/Gigon, Olof"
6193,ffef7b3b07e80e945f0ce4aa8cdaff4b476825f4,"Marx, Friedrich/Marx, Friedrich","Marx, Friedrich"


In [15]:
df.drop(['auteur', 'ref_type'], axis = 1, inplace = True) 

In [16]:
df.drop_duplicates(subset=['id'], inplace=True)

In [17]:
df.shape

(6194, 6)

In [18]:
df.head()

Unnamed: 0,id,titre,year,publication,notes,old_publicity
0,00041d507b2d742fd713b80828e2189f4cf3351c,"Organon. 4, Seconds analytiques / Aristote; in...",2005,"Paris: Flammarion, 2005, 432 p. Coll. « GF », ...","Suit principalement l’édition de W.D. Ross, n°...",1.0
1,001be92168018fcd37606ddc2f33426cfbe61574,Rethinking early greek philosophy: Hippolytus ...,1987,"Londres: Duckworth, 1987, viii-383 p.","Pour Hippolyte, suit principalement l’édition ...",1.0
2,003229cbcccc97401cd119ea27558bea12cbb837,[Fragments et témoignages] / Les cosmogonies o...,1995,"Cf. G. S. Kirk, J. E. Raven, M. Schofield, H.-...",,1.0
17,0048b30b02159daed1bd2cd8b0f10161f7edf8ac,"Plato. 2, Laches ; Protagoras ; Meno ; Euthyde...",1990,Cambridge (Mass.); Londres:: Harvard universit...,Texte grec et trad. anglaise en regard. - Bibl...,
19,0049009ae4af035b08310cef4930fe673e5e91a1,[Fragmenta et testimonia] / Bro(n)tino,1969,"Cf. G. Giannantoni, n° [[49]], I, p. 134-135",,1.0


In [19]:
df1 = pd.merge(df, df_grouped_authors, how="inner", on="id")

In [20]:
df1.shape

(6194, 8)

In [21]:
df_final = pd.merge(df1, df_grouped_types, how="inner", on="id")

In [22]:
df_final.shape

(6194, 10)

In [23]:
df_final.head()

Unnamed: 0,id,titre,year,publication,notes,old_publicity,auteur,concat_authors,ref_type,concat_ref_types
0,00041d507b2d742fd713b80828e2189f4cf3351c,"Organon. 4, Seconds analytiques / Aristote; in...",2005,"Paris: Flammarion, 2005, 432 p. Coll. « GF », ...","Suit principalement l’édition de W.D. Ross, n°...",1.0,"Pellegrin, Pierre","Pellegrin, Pierre",traduction,traduction
1,001be92168018fcd37606ddc2f33426cfbe61574,Rethinking early greek philosophy: Hippolytus ...,1987,"Londres: Duckworth, 1987, viii-383 p.","Pour Hippolyte, suit principalement l’édition ...",1.0,"Osborne, Catherine","Osborne, Catherine",,
2,003229cbcccc97401cd119ea27558bea12cbb837,[Fragments et témoignages] / Les cosmogonies o...,1995,"Cf. G. S. Kirk, J. E. Raven, M. Schofield, H.-...",,1.0,"Raven, John Earle/De Weck, Hélène-Alix/Schofie...","De Weck, Hélène-Alix/Raven, John Earle/O’Meara...","édition, édition, édition, édition, édition, c...","édition,traduction,commentaire"
3,0048b30b02159daed1bd2cd8b0f10161f7edf8ac,"Plato. 2, Laches ; Protagoras ; Meno ; Euthyde...",1990,Cambridge (Mass.); Londres:: Harvard universit...,Texte grec et trad. anglaise en regard. - Bibl...,,"Lamb, Walter Rangeley Maitland/Lamb, Walter Ra...","Lamb, Walter Rangeley Maitland","édition, traduction","édition,traduction"
4,0049009ae4af035b08310cef4930fe673e5e91a1,[Fragmenta et testimonia] / Bro(n)tino,1969,"Cf. G. Giannantoni, n° [[49]], I, p. 134-135",,1.0,"Maddalena, Antonio","Maddalena, Antonio",traduction,traduction


In [24]:
df_final.drop(["auteur", "ref_type"], axis=1, inplace=True)

In [25]:
df_final.head()

Unnamed: 0,id,titre,year,publication,notes,old_publicity,concat_authors,concat_ref_types
0,00041d507b2d742fd713b80828e2189f4cf3351c,"Organon. 4, Seconds analytiques / Aristote; in...",2005,"Paris: Flammarion, 2005, 432 p. Coll. « GF », ...","Suit principalement l’édition de W.D. Ross, n°...",1.0,"Pellegrin, Pierre",traduction
1,001be92168018fcd37606ddc2f33426cfbe61574,Rethinking early greek philosophy: Hippolytus ...,1987,"Londres: Duckworth, 1987, viii-383 p.","Pour Hippolyte, suit principalement l’édition ...",1.0,"Osborne, Catherine",
2,003229cbcccc97401cd119ea27558bea12cbb837,[Fragments et témoignages] / Les cosmogonies o...,1995,"Cf. G. S. Kirk, J. E. Raven, M. Schofield, H.-...",,1.0,"De Weck, Hélène-Alix/Raven, John Earle/O’Meara...","édition,traduction,commentaire"
3,0048b30b02159daed1bd2cd8b0f10161f7edf8ac,"Plato. 2, Laches ; Protagoras ; Meno ; Euthyde...",1990,Cambridge (Mass.); Londres:: Harvard universit...,Texte grec et trad. anglaise en regard. - Bibl...,,"Lamb, Walter Rangeley Maitland","édition,traduction"
4,0049009ae4af035b08310cef4930fe673e5e91a1,[Fragmenta et testimonia] / Bro(n)tino,1969,"Cf. G. Giannantoni, n° [[49]], I, p. 134-135",,1.0,"Maddalena, Antonio",traduction


In [26]:
df_final.to_csv("export_ref_biblio_6194.csv", sep=";", encoding="utf-8", index=False)