![01_table_dataframe](https://pandas.pydata.org/docs/_images/01_table_dataframe.svg)


In [2]:
import pandas as pd
import numpy as np

# Create a Series
s = pd.Series([1, 3, 5, np.nan, 6, 8])
print("Series:")
print(s)
print()

# Create a DataFrame with different data types
dates = pd.date_range('20230101', periods=6)
df = pd.DataFrame({
    'A': np.random.randn(6),  # Random floats
    'B': ['one', 'two', 'three', 'four', 'five', 'six'],  # Strings
    'C': pd.Categorical(['a','b','c','a','b','c']),  # Categories
    'D': pd.date_range('20230101', periods=6),  # Dates
    'E': np.random.randint(0, 100, 6)  # Random integers
})

print("DataFrame:")
print(df)


Series:
0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

DataFrame:
          A      B  C          D   E
0 -1.497990    one  a 2023-01-01  59
1 -0.720751    two  b 2023-01-02   6
2 -1.409876  three  c 2023-01-03  27
3  0.035614   four  a 2023-01-04  28
4 -0.205097   five  b 2023-01-05  89
5  0.298994    six  c 2023-01-06  45


![02_io_readwrite](https://pandas.pydata.org/docs/_images/02_io_readwrite.svg)


In [4]:
import pandas as pd

# 读取CSV文件
file_path = 'data/600519.csv'
df = pd.read_csv(file_path)

# 显示前三行
print(df.head(3))

# 保存前三行到output/test.csv
output_path = 'output/test.csv'
df.head(3).to_csv(output_path, index=False)

print(f"前三行数据已保存到 {output_path}")

         day  STOCK_CODE   open  close  maximum  minimum  volume    TURNOVER
0  2001/8/27     6005191  34.51  35.55    37.78    32.85  406318  1410347008
1  2001/8/28     6005191  34.99  36.86    37.00    34.61  129647   463463008
2  2001/8/28     6005191  34.99  36.86    37.00    34.61  129647   463463008
前三行数据已保存到 output/test.csv


In [5]:
# 读取SPSS文件
df_spss = pd.read_spss('data/t-test.sav')

# 显示前三行
print(df_spss.head(3))

# 保存到Excel文件
df_spss.to_excel('output/test.xlsx', index=False)


   性别     年龄  生活压力  社会支持  身心倦怠
0  男生  25岁以下  15.0  40.0  18.0
1  男生  25岁以下  20.0  32.0  22.0
2  男生  25岁以下  24.0  42.0  20.0
数据已保存到 output/test.xlsx


![03_subset_columns](https://pandas.pydata.org/docs/_images/03_subset_columns.svg)


In [9]:
# 读取CSV文件
df = pd.read_csv('data/600519.csv')

# 使用query筛选close值在50-100之间的数据,并显示前5行
filtered_df = df.query('close > 50 and close < 100').head(10)
filtered_df


Unnamed: 0,day,STOCK_CODE,open,close,maximum,minimum,volume,TURNOVER
860,2005/4/6,6005191,50.17,51.16,51.29,49.59,5675,28795816
861,2005/4/7,6005191,51.2,50.08,52.49,50.0,12438,64340829
873,2005/4/25,6005191,49.1,52.0,52.5,48.61,12704,65092003
874,2005/4/26,6005191,51.99,52.27,53.53,51.8,13609,71600714
875,2005/4/27,6005191,51.88,52.82,53.12,51.81,7473,39437614
876,2005/4/28,6005191,52.82,53.0,53.5,52.23,8521,45118705
877,2005/4/29,6005191,52.3,53.0,53.15,52.01,6825,35950860
878,2005/5/9,6005191,52.5,51.52,52.95,51.31,7656,39655697
879,2005/5/10,6005191,51.0,50.66,51.3,49.8,6447,32384041
908,2005/6/20,6005191,49.0,51.8,53.1,48.5,12369,62906075


![04_plot_overview](https://pandas.pydata.org/docs/_images/04_plot_overview.svg)


In [11]:
# 读取CSV文件
df = pd.read_csv('data/600519.csv')

# 获取前5行数据
df_plot = df.head(50)

# 使用plotly绘制柱状图
import plotly.express as px

fig = px.bar(df_plot, x='day', y='close', 
             title='Close Price for First 5 Days',
             labels={'day': 'Date', 'close': 'Close Price'})

fig.show()
