In [1]:
# =============================
# ðŸ§© STEP 1: Install Java
# =============================
!apt-get install openjdk-11-jdk-headless -qq > /dev/null

# =============================
# ðŸ§© STEP 2: Download & Setup Hadoop 3.3
# =============================
!wget -q https://downloads.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
!tar -xzf hadoop-3.3.6.tar.gz
!mv hadoop-3.3.6 /usr/local/hadoop

# =============================
# ðŸ§© STEP 3: Configure Environment Variables
# =============================
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk-amd64"
os.environ["HADOOP_HOME"] = "/usr/local/hadoop"
os.environ["PATH"] = f"{os.environ['HADOOP_HOME']}/bin:{os.environ['JAVA_HOME']}/bin:" + os.environ["PATH"]

# Verify versions
!java -version
!hadoop version


openjdk version "11.0.28" 2025-07-15
OpenJDK Runtime Environment (build 11.0.28+6-post-Ubuntu-1ubuntu122.04.1)
OpenJDK 64-Bit Server VM (build 11.0.28+6-post-Ubuntu-1ubuntu122.04.1, mixed mode, sharing)
Hadoop 3.3.6
Source code repository https://github.com/apache/hadoop.git -r 1be78238728da9266a4f88195058f08fd012bf9c
Compiled by ubuntu on 2023-06-18T08:22Z
Compiled on platform linux-x86_64
Compiled with protoc 3.7.1
From source with checksum 5652179ad55f76cb287d9c633bb53bbd
This command was run using /usr/local/hadoop/share/hadoop/common/hadoop-common-3.3.6.jar


In [2]:
# Format namenode
!hdfs namenode -format


2025-11-11 15:17:26,372 INFO namenode.NameNode: STARTUP_MSG: 
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG:   host = 7dc7ebdfbbd2/172.28.0.12
STARTUP_MSG:   args = [-format]
STARTUP_MSG:   version = 3.3.6
STARTUP_MSG:   classpath = /usr/local/hadoop/etc/hadoop:/usr/local/hadoop/share/hadoop/common/lib/netty-resolver-4.1.89.Final.jar:/usr/local/hadoop/share/hadoop/common/lib/netty-transport-classes-epoll-4.1.89.Final.jar:/usr/local/hadoop/share/hadoop/common/lib/netty-codec-socks-4.1.89.Final.jar:/usr/local/hadoop/share/hadoop/common/lib/commons-beanutils-1.9.4.jar:/usr/local/hadoop/share/hadoop/common/lib/kerby-config-1.0.1.jar:/usr/local/hadoop/share/hadoop/common/lib/kerb-identity-1.0.1.jar:/usr/local/hadoop/share/hadoop/common/lib/jetty-security-9.4.51.v20230217.jar:/usr/local/hadoop/share/hadoop/common/lib/netty-all-4.1.89.Final.jar:/usr/local/hadoop/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar:/usr/local/hadoop/share/had

In [3]:
# Create folders for namenode/datanode
!mkdir -p ~/hdfs/namenode ~/hdfs/datanode

# Edit core-site.xml and hdfs-site.xml to use local paths
core_site = """
<configuration>
 <property>
   <name>fs.defaultFS</name>
   <value>hdfs://localhost:9000</value>
 </property>
</configuration>
"""
with open("/usr/local/hadoop/etc/hadoop/core-site.xml", "w") as f:
    f.write(core_site)

hdfs_site = """
<configuration>
 <property>
   <name>dfs.namenode.name.dir</name>
   <value>file:///root/hdfs/namenode</value>
 </property>
 <property>
   <name>dfs.datanode.data.dir</name>
   <value>file:///root/hdfs/datanode</value>
 </property>
 <property>
   <name>dfs.replication</name>
   <value>1</value>
 </property>
</configuration>
"""
with open("/usr/local/hadoop/etc/hadoop/hdfs-site.xml", "w") as f:
    f.write(hdfs_site)


In [6]:
# Start HDFS (namenode + datanode)
!hdfs --daemon start namenode
!hdfs --daemon start datanode

# Create a directory in HDFS
!hdfs dfs -mkdir /mydata

# Create sample local text files (for upload & testing)
!echo "This is the first Hadoop sample file." > localfile1.txt
!echo "This is another sample text file for HDFS operations." > localfile2.txt

# Upload local file to HDFS
!hdfs dfs -put localfile1.txt /mydata/

# Verify uploaded file
!hdfs dfs -ls /mydata

# Copy file inside HDFS
!hdfs dfs -cp /mydata/localfile1.txt /mydata/localfile_copy.txt

# Move file within HDFS
!hdfs dfs -mv /mydata/localfile_copy.txt /mydata/movedfile.txt

# Download file from HDFS to local system
!hdfs dfs -get /mydata/movedfile.txt downloaded.txt

# Display downloaded file content
!cat downloaded.txt

# Remove a file from HDFS
!hdfs dfs -rm /mydata/movedfile.txt

# Remove the directory completely
!hdfs dfs -rm -r /mydata


mkdir: Call From 7dc7ebdfbbd2/172.28.0.12 to localhost:9000 failed on connection exception: java.net.ConnectException: Connection refused; For more details see:  http://wiki.apache.org/hadoop/ConnectionRefused
put: Call From 7dc7ebdfbbd2/172.28.0.12 to localhost:9000 failed on connection exception: java.net.ConnectException: Connection refused; For more details see:  http://wiki.apache.org/hadoop/ConnectionRefused
ls: Call From 7dc7ebdfbbd2/172.28.0.12 to localhost:9000 failed on connection exception: java.net.ConnectException: Connection refused; For more details see:  http://wiki.apache.org/hadoop/ConnectionRefused
cp: Call From 7dc7ebdfbbd2/172.28.0.12 to localhost:9000 failed on connection exception: java.net.ConnectException: Connection refused; For more details see:  http://wiki.apache.org/hadoop/ConnectionRefused
mv: Call From 7dc7ebdfbbd2/172.28.0.12 to localhost:9000 failed on connection exception: java.net.ConnectException: Connection refused; For more details see:  http://wi

In [5]:
!hdfs --daemon stop namenode
!hdfs --daemon stop datanode
