In [8]:
import pandas as pd

from pymongo import MongoClient

client = MongoClient('localhost', 27017)  
db = client['Big_Data'] 
collection = db['patents'] 

# Test de la connexion
try:
    client.server_info()
    print("Connexion à la base de données réussie !")
except Exception as e:
    print("Erreur lors de la connexion à la base de données :", e)


Connexion à la base de données réussie !


Dans ce Jupyter Notebook, on a développé des requêtes de recherche pour interroger ma base de données de brevets. Ces requêtes sont flexibles, permettant à l'utilisateur de rechercher par ID, titre, inventeur, date, et bien plus encore. Pour les titres, il suffit à l'utilisateur d'entrer un mot-clé et le système affichera tous les résultats correspondants.

De plus, on a créé des tables de brevets organisées par langue, offrant ainsi à l'utilisateur la possibilité de rechercher des brevets spécifiquement dans la langue de son choix. Cette fonctionnalité rend la recherche et la lecture des brevets plus conviviales et accessibles à un public plus large.








In [9]:
# Récupération de tous les documents de la collection
cursor = collection.find()

# Conversion des documents en un DataFrame pandas
df = pd.DataFrame(list(cursor))

# Affichage du DataFrame
df.head() 

Unnamed: 0,_id,Document Number,Document Type,Publication Date,Title,Abstract,Inventor Name,Assignee,Filing Date
0,662fc4ed19b05a444f63337d,DE102022133524A1,DE,2023-06-22,Luftfahrzeugantriebssystem,Ein Antriebssystem (4) für ein Luftfahrzeug (1...,SWANN PETER (GB); BEMMENT CRAIG W (GB); HOBDAY...,"ROLLS ROYCE PLC (London, GB)",2022-12-15
1,662fc4ed19b05a444f63337e,DE102022133535A1,DE,2023-06-22,BESTIMMUNG VON KRAFTSTOFFMERKMALEN,Die vorliegende Anmeldung offenbart ein Verfah...,SWANN PETER (GB); BEAVEN DAVID M (GB); BEMMENT...,"ROLLS ROYCE PLC (London, GB)",2022-12-15
2,662fc4ed19b05a444f63337f,DE102022133677A1,DE,2023-06-22,BESTIMMUNG VON KRAFTSTOFFMERKMALEN,Die vorliegende Anmeldung offenbart ein Verfah...,SWANN PETER (GB); BEAVEN DAVID M (GB); BEMMENT...,"ROLLS ROYCE PLC (London, GB)",2022-12-16
3,662fc4ed19b05a444f633380,DE102022133862A1,DE,2023-06-22,LEISTUNGSPARAMETER,Die vorliegende Anmeldung offenbart ein Verfah...,SWANN PETER (GB); BEAVEN DAVID M (GB); BEMMENT...,"ROLLS ROYCE PLC (London, GB)",2022-12-19
4,662fc4ed19b05a444f633381,DE102022133870A1,DE,2023-06-22,BESTIMMUNG VON KRAFTSTOFFMERKMALEN,Ein Verfahren (1040) zur Bestimmung eines oder...,SWANN PETER (GB); BEAVEN DAVID M (GB); BEMMENT...,"ROLLS ROYCE PLC (London, GB)",2022-12-19


In [11]:
df.shape

(3499, 9)

In [18]:
def search_patents_by_title(title_query):
    results = collection.find({'Title': {'$regex': title_query, '$options': 'i'}})
    
    df = pd.DataFrame(list(results))
    return df

In [41]:
results_df = search_patents_by_title('aviation')

# Display the DataFrame
print(results_df)

                         _id   Document Number Document Type  \
0   662fc4ed19b05a444f63338b       EP3184611A1            EP   
1   662fc4ed19b05a444f63338c       EP3184611B1            EP   
2   662fc4ed19b05a444f63338e       EP3507348A1            EP   
3   662fc4ed19b05a444f633390       EP4202436A1            EP   
4   662fc4ed19b05a444f633391       EP4330350A1            EP   
5   662fc4ed19b05a444f633397     JP2022151754A            JP   
6   662fc4ed19b05a444f63339b  WO/2012/012855A1          WIPO   
7   662fc4ed19b05a444f63339c  WO/2017/108529A1          WIPO   
8   662fc4ed19b05a444f63339e  WO/2018/045397A1          WIPO   
9   662fc4ed19b05a444f63339f  WO/2019/023607A1          WIPO   
10  662fc4ed19b05a444f6333a0  WO/2019/047407A1          WIPO   
11  662fc4ed19b05a444f6333a2  WO/2022/256443A1          WIPO   
12  662fc4ed19b05a444f6333a3  WO/2023/064150A2          WIPO   
13  662fc4ed19b05a444f6333a4  WO/2023/066738A1          WIPO   
14  662fc4ed19b05a444f6333a6  WO/2023/11

In [39]:
from datetime import datetime  
def search_patents_by_year_pd(year_query):
    start_date = datetime(year_query, 1, 1)
    end_date = datetime(year_query, 12, 31)
    
    results = collection.find({'Publication Date': {'$gte': start_date, '$lte': end_date}})
    
    df = pd.DataFrame(list(results))
    return df

def search_patents_by_year_fd(year_query):
    start_date = datetime(year_query, 1, 1)
    end_date = datetime(year_query, 12, 31)
    
    results = collection.find({'Filing Date': {'$gte': start_date, '$lte': end_date}})
    
    df = pd.DataFrame(list(results))
    return df



In [35]:
def search_patents_by_inventor(inventor_query):
    results = collection.find({'Inventor Name': {'$regex': inventor_query, '$options': 'i'}})
    
    df = pd.DataFrame(list(results))
    return df

In [40]:
# Search for patents filed in the year 2024
results_year_df = search_patents_by_year_pd(2023)
print("Results for year query:")
print(results_year_df)



Results for year query:
                          _id   Document Number Document Type  \
0    662fc4ed19b05a444f63337d  DE102022133524A1            DE   
1    662fc4ed19b05a444f63337e  DE102022133535A1            DE   
2    662fc4ed19b05a444f63337f  DE102022133677A1            DE   
3    662fc4ed19b05a444f633380  DE102022133862A1            DE   
4    662fc4ed19b05a444f633381  DE102022133870A1            DE   
..                        ...               ...           ...   
426  662fc4ed19b05a444f6340c4     US20230002037            US   
427  662fc4ed19b05a444f6340ca     US20230306218            US   
428  662fc4ed19b05a444f6340d8     US20230390744            US   
429  662fc4ed19b05a444f6340e5          11825863            US   
430  662fc4ed19b05a444f634104     US20230136731            US   

       Publication Date                                              Title  \
0   2023-06-22 00:00:00                         Luftfahrzeugantriebssystem   
1   2023-06-22 00:00:00                

In [43]:

def select_data_by_language(language):
    csv_file_path = f"C:\\Users\\hp\\id2\\bg\\idiomas\\{language}_titles.csv"

    try:
        df = pd.read_csv(csv_file_path)

        print(df.head())
    except FileNotFoundError:
        print(f"Le fichier CSV pour la langue '{language}' n'a pas été trouvé.")

select_data_by_language('ja')


  Document Number Document Type Publication Date  \
0   JP2012519768A            JP       2012-08-30   
1   JP2019518823A            JP       2019-07-04   
2   JP2019529613A            JP       2019-10-17   
3   JP2020511350A            JP       2020-04-16   
4   JP2024506450A            JP       2024-02-14   

                                               Title  \
0                           バイオマスからの有機化合物を一部含有する航空燃料   
1          バイオ再生可能ケロシン、ジェット燃料、ジェット燃料ブレンドストック、および製造方法   
2           合成由来の代替航空タービン燃料である、合成パラフィンケロシン（ＳＰＫ）の製造方法   
3                                無人航空車両のための電力システムの冷却   
4  炭素回収を利用する化学物質および燃料を製造するプラントのための、コンピュータで実行されるモニ...   

                                            Abstract  \
0  The invention relates to an aviation fuel cont...   
1  本技術は、他の驚くべき特徴の中でも、ディーゼル燃料、航空燃料、ジェット燃料ブレンドストック、...   
2  本発明は、航空タービン燃料の製造方法を提供する。本方法は、高温フィッシャー・トロプシュ法に由...   
3  無人航空車両は、回転するように少なくとも１つのプロペラを駆動するように構成される、少なくとも...   
4  本開示は、ブループラントのためのコンピュータで実行されるモニタリング方法であって、前記プラン...   

             

In [44]:
def search_patents_by_document_type(document_type_query):
    results = collection.find({'Document Type': {'$regex': document_type_query, '$options': 'i'}})
    df = pd.DataFrame(list(results))
    return df


In [49]:
# Rechercher des brevets par type de document
document_type_query = "WIPO"
document_type_results = search_patents_by_document_type(document_type_query)


In [50]:
document_type_results.head()

Unnamed: 0,_id,Document Number,Document Type,Publication Date,Title,Abstract,Inventor Name,Assignee,Filing Date
0,662fc4ed19b05a444f63339b,WO/2012/012855A1,WIPO,2012-02-02,CATALYTIC HYDROGENATION OF HYDROXYCYCLOALKANES...,The present patent application relates to a ne...,FRAGA MARCO ANDRE (BR); BORGES LUIZ EDUARDO PI...,INT INST NAC DE TECNOLOGIA (BR); INST MILITAR ...,2011-07-28
1,662fc4ed19b05a444f63339c,WO/2017/108529A1,WIPO,2017-06-29,AVIATION FUEL COMPOSITION,The present invention relates to an aviation f...,SANDBERG KATI (FI); KIISKI ULLA (FI),NESTE CORP (FI),2016-12-14
2,662fc4ed19b05a444f63339d,WO/2017/197017A1,WIPO,2017-11-16,"BIORENEWABLE KEROSENE, JET FUEL, JET FUEL BLEN...",The present technology provides compositions t...,ABHARI RAMIN (US); SLADE DAVID A (US); TOMLINS...,REG SYNTHETIC FUELS LLC (US),2017-05-10
3,662fc4ed19b05a444f63339e,WO/2018/045397A1,WIPO,2018-03-08,METHOD TO PRODUCE AN ALTERNATIVE SYNTHETICALLY...,The invention provides a process for the produ...,MDLELENI MASIKANA MILLAN (ZA); KNOTTENBELT CYR...,THE PETROLEUM OIL & GAS CORP OF SOUTH AFRICA P...,2017-08-28
4,662fc4ed19b05a444f63339f,WO/2019/023607A1,WIPO,2019-01-31,AVIATION GASOLINE ENGINE COOLANT INJECTION SYSTEM,A set of apparatus to inject distilled-water i...,D'ACOSTA CHRIS (US); STIRM BRIAN (US); ALBUZAT...,DACOSTA CHRIS (US); STIRM BRIAN (US); ALBUZAT ...,2018-07-27


In [51]:
# Rechercher des brevets par type de document
document_type_query = "wipo"
document_type_results = search_patents_by_document_type(document_type_query)
document_type_results.head()

Unnamed: 0,_id,Document Number,Document Type,Publication Date,Title,Abstract,Inventor Name,Assignee,Filing Date
0,662fc4ed19b05a444f63339b,WO/2012/012855A1,WIPO,2012-02-02,CATALYTIC HYDROGENATION OF HYDROXYCYCLOALKANES...,The present patent application relates to a ne...,FRAGA MARCO ANDRE (BR); BORGES LUIZ EDUARDO PI...,INT INST NAC DE TECNOLOGIA (BR); INST MILITAR ...,2011-07-28
1,662fc4ed19b05a444f63339c,WO/2017/108529A1,WIPO,2017-06-29,AVIATION FUEL COMPOSITION,The present invention relates to an aviation f...,SANDBERG KATI (FI); KIISKI ULLA (FI),NESTE CORP (FI),2016-12-14
2,662fc4ed19b05a444f63339d,WO/2017/197017A1,WIPO,2017-11-16,"BIORENEWABLE KEROSENE, JET FUEL, JET FUEL BLEN...",The present technology provides compositions t...,ABHARI RAMIN (US); SLADE DAVID A (US); TOMLINS...,REG SYNTHETIC FUELS LLC (US),2017-05-10
3,662fc4ed19b05a444f63339e,WO/2018/045397A1,WIPO,2018-03-08,METHOD TO PRODUCE AN ALTERNATIVE SYNTHETICALLY...,The invention provides a process for the produ...,MDLELENI MASIKANA MILLAN (ZA); KNOTTENBELT CYR...,THE PETROLEUM OIL & GAS CORP OF SOUTH AFRICA P...,2017-08-28
4,662fc4ed19b05a444f63339f,WO/2019/023607A1,WIPO,2019-01-31,AVIATION GASOLINE ENGINE COOLANT INJECTION SYSTEM,A set of apparatus to inject distilled-water i...,D'ACOSTA CHRIS (US); STIRM BRIAN (US); ALBUZAT...,DACOSTA CHRIS (US); STIRM BRIAN (US); ALBUZAT ...,2018-07-27
