In [41]:
import numpy as np
import os
import logging
from anytree import Node, RenderTree

class DataStorage:
    def __init__(self):
        self.data = {}
        self.key_map = {}

    def load_data(self, np_data):
        """从numpy数组加载数据"""
        if isinstance(np_data, np.ndarray) and np_data.size == 1 and isinstance(np_data.item(), dict):
            self.data = np_data.item()
            self._create_key_map()
        else:
            raise ValueError("Invalid data format. Expected a numpy array containing a dictionary.")

    def _create_key_map(self):
        """创建 key_map，将每个键映射到唯一的数字索引"""
        self.key_map = {key: idx for idx, key in enumerate(self.data.keys())}

    def get_data_by_key(self, key):
        """通过键获取数据"""
        return self.data.get(key, None)

    def get_data_by_index(self, index):
        """通过索引获取数据"""
        key = self.get_key_by_index(index)
        return self.get_data_by_key(key)

    def get_key_by_index(self, index):
        """通过索引获取键"""
        return next((key for key, idx in self.key_map.items() if idx == index), None)

    def get_all_keys(self):
        """获取所有数据键"""
        return list(self.data.keys())

    def print_data(self):
        """打印所有数据"""
        for key, value in self.data.items():
            print(f"{key}: {value}")
            

def build_forest(keys):
    # 按键长度排序
    sorted_keys = sorted(keys, key=len)
    nodes = {}
    
    for key in sorted_keys:
        # 初始化节点
        node = Node(key)
        nodes[key] = node
        
        # 找到最长匹配前缀作为父节点
        for i in range(len(key)-1, 0, -1):
            parent_key = key[:i]
            if parent_key in nodes:
                node.parent = nodes[parent_key]
                break

    return nodes

def print_subtree(node):
    """打印以指定节点为根的子树"""
    for pre, fill, child in RenderTree(node):
        print(f"{pre}{child.name}")
        if hasattr(child, 'data') and child.data:
            print(f"{pre}Data: {child.data}")
            
HOME = os.environ['HOME'] + '/'
datapath = f'{HOME}dataset/zhaokedata'
file_list = os.listdir(datapath)
# remove all '._' files
file_list = [f for f in file_list if not f.startswith('._')]

logging.basicConfig(level=logging.INFO)
logging.info(f'file_list: {file_list}')

# INFO:root:file_list: ['200113plc1p2.npy', '200113plc1p2_TD.npy', '200323plc1p1_Names.npy', '200113plc1p2_Names.npy', '200323plc1p1_TD.npy']

# get size of each file
for f in file_list:
    np_data = np.load(f'{datapath}/{f}', allow_pickle=True)
    logging.info(f'{f}: {np_data.shape}')
    

INFO:root:file_list: ['200113plc1p2.npy', '200113plc1p2_TD.npy', '200323plc1p1_Names.npy', '200113plc1p2_Names.npy', '200323plc1p1_TD.npy']
INFO:root:200113plc1p2.npy: ()
INFO:root:200113plc1p2_TD.npy: (1288, 1288)
INFO:root:200323plc1p1_Names.npy: (1208,)
INFO:root:200113plc1p2_Names.npy: (1288,)
INFO:root:200323plc1p1_TD.npy: (1208, 1208)


In [42]:
np_data = np.load(
    '/home/zihan/codes/zhaokedata/200113plc1p21.npy', allow_pickle=True
)
# 打印data的类型
print("Type of data:", type(np_data))

# 打印常用属性
print("Number of dimensions (ndim):", np_data.ndim)
print("Shape of the array (shape):", np_data.shape)
print("Total number of elements (size):", np_data.size)
print("Data type of elements (dtype):", np_data.dtype)
print("Size of each element (itemsize):", np_data.itemsize, "bytes")
print("Total bytes of the array (nbytes):", np_data.nbytes, "bytes")


Type of data: <class 'numpy.ndarray'>
Number of dimensions (ndim): 0
Shape of the array (shape): ()
Total number of elements (size): 1
Data type of elements (dtype): object
Size of each element (itemsize): 8 bytes
Total bytes of the array (nbytes): 8 bytes


In [43]:
# 创建DataStorage实例并加载数据
data_storage = DataStorage()
data_storage.load_data(np_data)

# 获取所有键并生成森林
all_keys = data_storage.get_all_keys()
nodes = build_forest(all_keys)

# 将数据添加到树节点中
for key, data in data_storage.data.items():
    nodes[key].data = data

# 找到所有根节点
roots = [node for node in nodes.values() if node.is_root]

# 打印森林结构和数据
# for root in roots:
#     for pre, fill, node in RenderTree(root):
#         print(f"{pre}{node.name}")
#         if hasattr(node, 'data') and node.data:
#             print(f"{pre}Data: {node.data}")
# 打印某个节点及其子树
print("\nSubtree rooted at 'AB':")
print_subtree(nodes['AB'])



Subtree rooted at 'AB':
AB
Data: [[125.0, 141.0, 91.0]]
├── ABa
├── Data: [[115.0, 143.0, 98.0], [115.0, 143.0, 96.0], [107.0, 139.0, 97.0], [105.0, 137.0, 101.0], [106.0, 120.0, 95.0], [108.0, 109.0, 86.0], [110.0, 113.0, 91.0], [117.0, 114.0, 96.0], [127.0, 111.0, 95.0], [136.0, 119.0, 106.0]]
│   ├── ABal
│   ├── Data: [[133.0, 122.0, 86.0], [117.0, 115.0, 86.0], [106.0, 115.0, 89.0], [100.0, 109.0, 84.0], [110.0, 107.0, 70.0], [118.0, 111.0, 69.0], [110.0, 107.0, 68.0], [97.0, 105.0, 78.0], [101.0, 100.0, 88.0], [101.0, 99.0, 101.0], [106.0, 106.0, 112.0]]
│   │   ├── ABala
│   │   ├── Data: [[126.0, 100.0, 95.0], [118.0, 93.0, 96.0], [122.0, 89.0, 98.0], [123.0, 84.0, 100.0], [125.0, 85.0, 95.0], [123.0, 86.0, 86.0], [119.0, 90.0, 80.0], [114.0, 92.0, 77.0], [114.0, 93.0, 80.0], [118.0, 89.0, 85.0], [120.0, 90.0, 90.0], [124.0, 92.0, 98.0]]
│   │   │   ├── ABalaa
│   │   │   ├── Data: [[124.0, 81.0, 100.0], [124.0, 78.0, 95.0], [128.0, 81.0, 87.0], [125.0, 77.0, 86.0], [125.0, 78

In [44]:
# save key map to file that can read
with open('key_map.txt', 'w') as f:
    for k, v in data_storage.key_map.items():
        f.write(f'{k} {v}\n')