# Étape 1 : Installation de Java et Hadoop




In [3]:
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://downloads.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
!tar -xzf hadoop-3.3.6.tar.gz

# Étape 2 : Définition des variables d’environnement :

In [4]:
import os

os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["HADOOP_HOME"] = "/content/hadoop-3.3.6"
os.environ["PATH"] = os.environ["HADOOP_HOME"] + "/bin:" + os.environ["PATH"]

# Étape 3 : Configuration minimale de Hadoop

In [7]:
core_site = """
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://localhost:9000</value>
    </property>
</configuration>
"""

with open("/content/hadoop-3.3.6/etc/hadoop/core-site.xml", "w") as f:
    f.write(core_site)
hdfs_site = """
<configuration>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:///content/hadoop_tmp/dfs/name</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:///content/hadoop_tmp/dfs/data</value>
    </property>
</configuration>
"""

with open("/content/hadoop-3.3.6/etc/hadoop/hdfs-site.xml", "w") as f:
    f.write(hdfs_site)


# Étape 4 : Formater le système de fichiers HDFS :


In [8]:
!hdfs namenode -format

2025-04-09 22:49:41,280 INFO namenode.NameNode: STARTUP_MSG: 
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG:   host = 666a58e84549/172.28.0.12
STARTUP_MSG:   args = [-format]
STARTUP_MSG:   version = 3.3.6
STARTUP_MSG:   classpath = /content/hadoop-3.3.6/etc/hadoop:/content/hadoop-3.3.6/share/hadoop/common/lib/checker-qual-2.5.2.jar:/content/hadoop-3.3.6/share/hadoop/common/lib/netty-codec-stomp-4.1.89.Final.jar:/content/hadoop-3.3.6/share/hadoop/common/lib/kerb-client-1.0.1.jar:/content/hadoop-3.3.6/share/hadoop/common/lib/failureaccess-1.0.jar:/content/hadoop-3.3.6/share/hadoop/common/lib/kerby-pkix-1.0.1.jar:/content/hadoop-3.3.6/share/hadoop/common/lib/commons-logging-1.1.3.jar:/content/hadoop-3.3.6/share/hadoop/common/lib/httpclient-4.5.13.jar:/content/hadoop-3.3.6/share/hadoop/common/lib/netty-transport-classes-epoll-4.1.89.Final.jar:/content/hadoop-3.3.6/share/hadoop/common/lib/netty-codec-mqtt-4.1.89.Final.jar:/content/

# Étape 5 : Démarrer HDFS :


In [1]:
!/content/hadoop-3.3.6/bin/hdfs namenode &
!/content/hadoop-3.3.6/bin/hdfs datanode &

ERROR: JAVA_HOME is not set and could not be found.
ERROR: JAVA_HOME is not set and could not be found.


# Étape 6 : Créer un répertoire dans HDFS


In [2]:
!hdfs dfs -mkdir -p /tp_hdfs

/bin/bash: line 1: hdfs: command not found


# Étape 7 : Créer un fichier local et l’envoyer à HDFS

In [3]:
!echo "Bonjour HDFS depuis Google Colab!" > fichier_test.txt
!hdfs dfs -put fichier_test.txt /tp_hdfs

/bin/bash: line 1: hdfs: command not found


# Étape 8 : Lire et afficher un fichier dans HDFS


In [4]:
!hdfs dfs -ls /tp_hdfs
!hdfs dfs -cat /tp_hdfs/fichier_test.txt

/bin/bash: line 1: hdfs: command not found
/bin/bash: line 1: hdfs: command not found


# Étape 9 : Supprimer un fichier et un dossier

In [5]:
!hdfs dfs -rm /tp_hdfs/fichier_test.txt
!hdfs dfs -rmdir /tp_hdfs

/bin/bash: line 1: hdfs: command not found
/bin/bash: line 1: hdfs: command not found
