<a href="https://colab.research.google.com/github/ykitaguchi77/github-tree-explorer/blob/main/github-tree-explorer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **📂 GitHub-Tree-Explorer**

## 🚀 Overview
`GitRepoTreeExplorer` is a tool that visualizes the file hierarchy of a GitHub repository and exports it in XML format. It allows you to easily review the contents of a repository and clearly understand its structure.

## 📌 Key Features
- Clone the contents of a GitHub repository to local storage
- Visually display the file hierarchy
- Export the directory structure in XML format

## 💡 Future Application
- Output txt file includes total codes of the repository with XML structure, allowing for analysis of GitHub repositories with large language models (LLM) such as Claude3.






In [1]:
!pip install gitpython treelib

Collecting gitpython
  Downloading GitPython-3.1.42-py3-none-any.whl (195 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m195.4/195.4 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting treelib
  Downloading treelib-1.7.0-py3-none-any.whl (18 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython)
  Downloading gitdb-4.0.11-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython)
  Downloading smmap-5.0.1-py3-none-any.whl (24 kB)
Installing collected packages: treelib, smmap, gitdb, gitpython
Successfully installed gitdb-4.0.11 gitpython-3.1.42 smmap-5.0.1 treelib-1.7.0


In [3]:
import os
from git import Repo
from treelib import Node, Tree

# GitHub repository URL
repository_url = "https://github.com/ykitaguchi77/github-tree-explorer.git" #enter repository URL

# クローンするディレクトリのパス
clone_dir = "/content/repo"
os.makedirs(clone_dir, exist_ok=True)

%cd $clone_dir
!git clone $repository_url

exclude_dirs = [".git"]

/content/repo
Cloning into 'github-tree-explorer'...
remote: Enumerating objects: 10, done.[K
remote: Counting objects: 100% (10/10), done.[K
remote: Compressing objects: 100% (9/9), done.[K
remote: Total 10 (delta 1), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (10/10), 5.51 KiB | 2.76 MiB/s, done.
Resolving deltas: 100% (1/1), done.


In [12]:
from treelib import Tree
import os

# Function to display the file hierarchy
def display_file_hierarchy(directory, exclude_dirs):
    tree = Tree()
    tree.create_node(directory, directory)  # Add root node

    # Traverse the directory structure
    for root, dirs, files in os.walk(directory):
        dirs[:] = [d for d in dirs if d not in exclude_dirs]  # Exclude specified directories
        for dir in dirs:
            tree.create_node(dir, os.path.join(root, dir), parent=root)  # Add directory nodes
        for file in files:
            tree.create_node(file, os.path.join(root, file), parent=root)  # Add file nodes

    # Define a fixed file name for the output
    tree_file = os.path.join(directory, "tree_structure.txt")

    # Open (and create) the file to ensure it's empty before saving the tree structure
    with open(tree_file, 'w') as file:
        pass  # This action creates or truncates the file

    # Save the tree structure to the text file
    tree.save2file(tree_file)
    print(f"Directory structure saved to {tree_file}")

    # Read and display the content of the text file
    with open(tree_file, 'r') as file:
        print(file.read())

# Example usage to display the file hierarchy of a cloned repository, excluding the .git directory
display_file_hierarchy(clone_dir, exclude_dirs=[".git"])

Directory structure saved to /content/repo/tree_structure.txt
/content/repo
├── directory_structure_with_content.xml
├── github-tree-explorer
│   ├── LICENSE
│   ├── README.md
│   └── github-tree-explorer.py
└── tree_structure.txt



In [18]:
import os
import xml.etree.ElementTree as ET

# Function to read the content of a file
def read_file_content(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        return str(e)

# Function to convert a directory structure into an XML format
def directory_to_xml(directory, exclude_dirs=None):
    if exclude_dirs is None:
        exclude_dirs = []

    root_element = ET.Element("directory", name=os.path.basename(directory))

    # Walk through the directory structure
    for root, dirs, files in os.walk(directory, topdown=True):
        dirs[:] = [d for d in dirs if d not in exclude_dirs]  # Exclude specified directories
        for dir_name in dirs:
            dir_path = os.path.join(root, dir_name)
            sub_element = ET.SubElement(root_element, "directory", name=dir_name)
            append_files_and_dirs(sub_element, dir_path, exclude_dirs)

        for file_name in files:
            file_path = os.path.join(root, file_name)
            file_element = ET.SubElement(root_element, "file", name=file_name)
            content = read_file_content(file_path)
            file_element.text = content

        break  # Stop the loop after the first iteration to avoid deep recursion

    return root_element

# Function to append files and directories to the XML structure recursively
def append_files_and_dirs(parent_element, path, exclude_dirs):
    for item in os.listdir(path):
        item_path = os.path.join(path, item)
        if os.path.isdir(item_path) and item not in exclude_dirs:
            dir_element = ET.SubElement(parent_element, "directory", name=item)
            append_files_and_dirs(dir_element, item_path, exclude_dirs)
        elif os.path.isfile(item_path):
            file_element = ET.SubElement(parent_element, "file", name=item)
            content = read_file_content(item_path)
            file_element.text = content

# Function to write the XML structure to a file
def write_xml_to_file(element, file_name):
    tree = ET.ElementTree(element)
    tree.write(file_name, encoding='utf-8', xml_declaration=True)

# Function to write the XML data to a text file
def write_xml_to_text_file(xml_element, text_file_name):
    with open(text_file_name, 'w', encoding='utf-8') as file:
        file.write(ET.tostring(xml_element, encoding='unicode'))

# Example of using the functions to generate an XML representation of a directory structure
root_dir = f"{clone_dir}/{os.path.basename(repository_url).rstrip('.git')}"  # Path of the directory to be analyzed
exclude_dirs = ['.git']
root_element = directory_to_xml(root_dir, exclude_dirs)
#xml_file_name = "directory_structure_with_content.xml"
text_file_name = "directory_structure_with_content.txt"

#write_xml_to_file(root_element, xml_file_name)
write_xml_to_text_file(root_element, text_file_name)
