In [1]:
import pandas as pd
import re

# 데이터를 저장할 리스트 초기화
data = []

# 텍스트 파일 읽기 (인코딩 지정)
with open('./data/GPSDATA1.TXT', 'r', encoding='UTF-8') as file:
    lines = file.readlines()

# 데이터 파싱
current_data = {}
for line in lines:
    line = line.strip()
    if re.match(r'^시간:', line):
        if current_data:
            data.append(current_data)
        current_data = {}
        current_data['시간'] = re.search(r'\d+:\d+:\d+', line).group()
    elif re.match(r'^위도:', line):
        current_data['위도'] = float(re.search(r'[-+]?\d*\.\d+|\d+', line).group())
    elif re.match(r'^경도:', line):
        current_data['경도'] = float(re.search(r'[-+]?\d*\.\d+|\d+', line).group())
    elif re.match(r'^가속도:', line):
        current_data['가속도'] = tuple(map(float, re.findall(r'[-+]?\d*\.\d+|\d+', line)))
    elif re.match(r'^자이로:', line):
        current_data['자이로'] = tuple(map(float, re.findall(r'[-+]?\d*\.\d+|\d+', line)))
    elif re.match(r'^지자계:', line):
        current_data['지자계'] = tuple(map(float, re.findall(r'[-+]?\d*\.\d+|\d+', line)))
    elif re.match(r'^속도 \(km/h\):', line):
        current_data['속도 (km/h)'] = float(re.search(r'[-+]?\d*\.\d+|\d+', line).group())

# 마지막 데이터 추가
if current_data:
    data.append(current_data)

# 데이터 프레임 생성
df = pd.DataFrame(data)


In [3]:
df

Unnamed: 0,시간,위도,경도,가속도,자이로,지자계,속도 (km/h)
0,0:0:0,0.000000,0.000000,"(-0.07, -0.09, 0.97)","(2.69, 1.22, -1.77)","(16.43, -27.54, -100.22)",0.00
1,0:0:0,0.000000,0.000000,"(-0.08, -0.12, 0.97)","(2.01, 0.73, -1.04)","(16.64, -26.78, -100.18)",0.00
2,0:0:0,0.000000,0.000000,"(-0.08, -0.15, 0.95)","(3.11, 2.44, 2.01)","(16.26, -25.59, -100.93)",0.00
3,0:0:0,0.000000,0.000000,"(-0.17, -0.03, 0.99)","(-7.32, 13.0, -19.96)","(17.19, -24.6, -100.52)",0.00
4,0:0:0,0.000000,0.000000,"(-0.05, -0.1, 1.0)","(-1.46, 4.58, -13.55)","(22.28, -28.27, -98.38)",0.00
...,...,...,...,...,...,...,...
325,8:57:12,37.629597,127.078903,"(-0.05, -0.06, 0.96)","(6.04, 9.34, 5.0)","(-33.28, 5.81, -99.23)",0.44
326,8:57:13,37.629597,127.078903,"(0.07, 0.04, 1.03)","(-3.3, 6.41, 0.0)","(-34.01, 2.88, -101.72)",0.35
327,8:57:14,37.629597,127.078903,"(-0.05, 0.0, 0.98)","(1.1, 1.22, -0.79)","(-34.25, 3.43, -102.55)",0.56
328,8:57:15,37.629601,127.078903,"(-0.05, 0.0, 0.98)","(0.92, 1.46, -0.67)","(-34.2, 3.47, -102.67)",0.06


In [4]:
# 위도와 경도가 0인 행 제거
df = df[(df['위도'] != 0) & (df['경도'] != 0)]

# 인덱스 재설정
df.reset_index(drop=True, inplace=True)


In [5]:
# 열 이름 변경
df.rename(columns={
    '시간': 'time',
    '위도': 'lat',
    '경도': 'long',
    '가속도': 'accel',
    '자이로': 'gyro',
    '지자계': 'mag',
    '속도 (km/h)': 'speed'
}, inplace=True)




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 219 entries, 0 to 218
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   time    219 non-null    object 
 1   lat     219 non-null    float64
 2   long    219 non-null    float64
 3   accel   219 non-null    object 
 4   gyro    219 non-null    object 
 5   mag     219 non-null    object 
 6   speed   219 non-null    float64
dtypes: float64(3), object(4)
memory usage: 12.1+ KB


In [7]:
# accel, gyro, mag 열의 데이터 형식을 리스트로 변경
df['accel'] = df['accel'].apply(lambda x: list(x))
df['gyro'] = df['gyro'].apply(lambda x: list(x))
df['mag'] = df['mag'].apply(lambda x: list(x))

# 변경된 데이터 형식 확인
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['accel'] = df['accel'].apply(lambda x: list(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['gyro'] = df['gyro'].apply(lambda x: list(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['mag'] = df['mag'].apply(lambda x: list(x))


Unnamed: 0,time,lat,long,accel,gyro,mag,speed
0,8:53:41,37.630974,127.079552,"[-0.06, -0.12, 0.84]","[32.41, -26.73, 13.18]","[12.49, -24.72, -97.35]",4.78
1,8:53:42,37.630959,127.079529,"[0.05, -0.11, 0.82]","[16.3, -5.86, 18.43]","[14.34, -23.75, -98.75]",4.72
2,8:53:43,37.630947,127.079529,"[-0.1, 0.07, 1.08]","[-5.43, -4.7, 14.04]","[15.12, -26.23, -94.13]",5.01
3,8:53:44,37.630939,127.079544,"[-0.06, 0.01, 1.12]","[0.43, -0.43, 14.77]","[14.07, -25.79, -94.21]",3.68
4,8:53:45,37.630928,127.079544,"[-0.08, 0.01, 1.18]","[-26.49, -2.14, -6.59]","[14.23, -24.3, -94.42]",3.80
...,...,...,...,...,...,...,...
214,8:57:12,37.629597,127.078903,"[-0.05, -0.06, 0.96]","[6.04, 9.34, 5.0]","[-33.28, 5.81, -99.23]",0.44
215,8:57:13,37.629597,127.078903,"[0.07, 0.04, 1.03]","[-3.3, 6.41, 0.0]","[-34.01, 2.88, -101.72]",0.35
216,8:57:14,37.629597,127.078903,"[-0.05, 0.0, 0.98]","[1.1, 1.22, -0.79]","[-34.25, 3.43, -102.55]",0.56
217,8:57:15,37.629601,127.078903,"[-0.05, 0.0, 0.98]","[0.92, 1.46, -0.67]","[-34.2, 3.47, -102.67]",0.06


In [11]:
print(df.head())

      time        lat        long           accel     gyro   
0  8:53:41  37.630974  127.079552  [-0.06, -0.12]  [13.18]  \
1  8:53:42  37.630959  127.079529   [0.05, -0.11]  [18.43]   
2  8:53:43  37.630947  127.079529    [-0.1, 0.07]  [14.04]   
3  8:53:44  37.630939  127.079544   [-0.06, 0.01]  [14.77]   
4  8:53:45  37.630928  127.079544   [-0.08, 0.01]  [-6.59]   

                       mag  speed  
0  [12.49, -24.72, -97.35]   4.78  
1  [14.34, -23.75, -98.75]   4.72  
2  [15.12, -26.23, -94.13]   5.01  
3  [14.07, -25.79, -94.21]   3.68  
4   [14.23, -24.3, -94.42]   3.80  


In [10]:
df.to_csv("./data/processed__gps.csv")