In [1]:
import ezdxf
import numpy as np
import pandas as pd
from scipy.spatial import distance as spd

In [2]:
paths = [
    r'F:\02_projects\cads\dxfs\1-150.dxf',
    r'F:\02_projects\cads\dxfs\151-250.dxf'
]
vector = np.array([420.0, 0])
export_path = export_path = r'F:\02_projects\cads\export'
config = {
    '1': '人工填土',
    '2': '植物层',
    '3': '粉质粘土',
    '4': '粉质粘土_2',
    '5': '粉质粘土含圆砾',
    '6': '粉质粘土_3',
    '7': '强风化泥质粉砂岩',
    '8': '中风化泥质粉砂岩',
    '81': '强风化泥质粉砂岩_2',
    '82': '中风化泥质粉砂岩_2',
}

In [3]:
def load_texts():
    texts = msp.query('TEXT')
    tdata = list()

    for t in texts:
        coords = list(t.dxf.insert)[:-1]
        text = t.dxf.text

        if text not in ('', ' ', None):
            data = {
                'cad_x': coords[0],
                'cad_y': coords[1],
                'text': text
                }
            tdata.append(data)

    return pd.DataFrame(tdata)

In [4]:
def load_selections():
    bounders = msp.query('*[layer=="bounder"]')
    selections = list()

    for b in bounders:
        points = b.get_points()
        group = list()
        
        for p in points:
            coords = list(p)[:2]
            group.append(coords)
        
        selections.append(group)
    
    return selections

In [5]:
class MaskSpace:
    def __init__(self, polygons):
        polygons = np.array(polygons)
        self._polygons = sorted(polygons, key=lambda x: (np.sum(x, axis=1) / 4).tolist())
        self._original_data = list()
        self._core = pd.DataFrame()

    @property
    def base_point(self):
        arr = np.array(self._polygons)
        return [np.min(arr.T[0]), np.min(arr.T[1])]

    @property
    def data(self):
        self.manage_data()
        return self._core.copy()

    def manage_data(self):
        """
        update this.
        """
        pass
        
    def move(self, distance):
        self._polygons += distance

    def get(self, texts):
        for geo in self._polygons:
            xmin, ymin = np.min(geo, axis=0)
            xmax, ymax = np.max(geo, axis=0)
            data = texts[
                    (texts['cad_x'] > xmin) &
                    (texts['cad_x'] < xmax) &
                    (texts['cad_y'] > ymin) &
                    (texts['cad_y'] < ymax) 
                ]
            data = data.drop_duplicates(subset=['cad_x', 'cad_y', 'text'])
            data = data.sort_values(by=['cad_x', 'cad_y'])
            data = data.reset_index()
            self._original_data.append(data)

In [16]:
class MyMaskSpace(MaskSpace):
    def __init__(self, polygons):
        super().__init__(polygons)

    @staticmethod
    def get_distances(xs, ys):
        positions = np.array([xs, ys])
        distances = spd.cdist(positions.T, positions.T)
        return pd.DataFrame(distances)

    @staticmethod
    def check_index(dist, tor=2.25):
        length = 0
        indices = list()

        for i in range(len(dist)):
            data = dist.iloc[i]
            item = data[(data > 0) & (data < tor)]

            if len(item) > 0:
                length += 1
                indices.append(i)

        l = int(length / 2)
        return [[indices[i], indices[i + l]] for i in range(l)]

    @staticmethod
    def merge_labels_by_indices(df, indices):
        to_drop = list()

        for couple in indices:
            id1, id2 = couple[0], couple[1]
            to_drop.append(id2)
            value = df.iloc[id1]['text'] + df.iloc[id2]['text']
            df.set_value(id1, 'text', value)

        return df.drop(to_drop)
    
    def manage_data(self):
        ls = list()
        errors = list()

        for i in range(0, len(self._original_data), 4):
            try:
                a = pd.DataFrame()
                
                types = self._original_data[i]['text'].values
                heights = self._original_data[i+1]['text'].values
                thickness = self._original_data[i+2]['text'].values
                x = self._original_data[i+3].iloc[1]['text']
                y = self._original_data[i+3].iloc[0]['text']

                if len(types) != len(heights):
                    df = ms._original_data[i]
                    distances = self.get_distances(df['cad_x'], df['cad_y'])
                    indices = self.check_index(distances)
                    df = self.merge_labels_by_indices(df, indices)
                    ms._original_data[i] = df
                    types = df['text'].values

                a['type'] = types
                a['height'] = heights
                a['thickness'] = thickness
                a['x'], a['y'] = x, y
                ls.append(a)

            except Exception as e:
                print(f'>>>> {i} error!', e)
                print(a)
                print(heights)

        self._core = pd.concat(ls, ignore_index=True)

In [17]:
data = list()

for path in paths:
    doc = ezdxf.readfile(path)
    msp = doc.modelspace()

    df_texts = load_texts()
    selections = load_selections()

    ms = MyMaskSpace(selections)

    while ms.base_point[0] < df_texts['cad_x'].max():
        ms.get(df_texts)
        ms.move(vector)

    data.append(ms.data)

df = pd.concat(data, ignore_index=True)
df

Unnamed: 0,type,height,thickness,x,y
0,8,27.00,22.40,86004.29,50083.42
1,7,49.40,1.90,86004.29,50083.42
2,6,51.30,0.80,86004.29,50083.42
3,4,52.10,2.30,86004.29,50083.42
4,2,54.40,0.50,86004.29,50083.42
5,1,54.90,3.50,86004.29,50083.42
6,8,29.70,18.70,86012.86,50104.35
7,7,48.40,2.60,86012.86,50104.35
8,6,51.00,1.00,86012.86,50104.35
9,4,52.00,2.40,86012.86,50104.35


In [18]:
# export data
for key, value in config.items():
    p = f'{export_path}\{value}.json'
    dt = ms.data[ms.data['type']==key]
    dt.to_json(p)