

大家好,我是正在实战各种AI项目的程序员晚枫。
今天学习时间序列数据处理,这是金融、销售、运营等领域必备的技能。
无论是分析日活趋势、计算同比环比,还是预测未来销量,都需要掌握时间序列的处理方法。
DatetimeIndex:时间索引
创建时间序列
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| import pandas as pd import numpy as np
dates = pd.to_datetime(['2024-01-01', '2024-01-02', '2024-01-03'])
dates = pd.date_range(start='2024-01-01', periods=10, freq='D') dates = pd.date_range(start='2024-01-01', end='2024-12-31', freq='M') dates = pd.date_range(start='2024-01-01', periods=12, freq='MS')
|
创建时间序列DataFrame
1 2 3 4 5 6 7 8 9
| df = pd.DataFrame({ '日期': pd.date_range('2024-01-01', periods=365, freq='D'), '销售额': np.random.randint(1000, 5000, 365) })
df.set_index('日期', inplace=True) print(df.head())
|
时间索引切片
1 2 3 4 5 6 7 8 9 10
| df['2024-01'] df['2024-01':'2024-03'] df['2024-01-15':'2024-01-20']
df.loc['2024']
df.loc['2024-02']
|
提取时间组件
1 2 3 4 5 6 7 8 9 10
| df['年'] = df.index.year df['月'] = df.index.month df['日'] = df.index.day df['星期'] = df.index.dayofweek df['季度'] = df.index.quarter df['是否周末'] = df.index.dayofweek >= 5
df['月初'] = df.index.to_period('M').to_timestamp() df['月末'] = df.index.to_period('M').to_timestamp(how='end')
|
重采样Resample
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| monthly = df.resample('M').sum() monthly = df.resample('M').mean()
weekly = df.resample('W').sum() weekly = df.resample('W-MON').sum()
quarterly = df.resample('Q').sum()
stats = df.resample('M').agg({ '销售额': ['sum', 'mean', 'max', 'min'] })
|
移动窗口计算
1 2 3 4 5 6 7 8 9 10 11 12
| df['MA7'] = df['销售额'].rolling(window=7).mean()
df['MA30'] = df['销售额'].rolling(window=30).mean()
df['EMA'] = df['销售额'].ewm(span=7).mean()
df['7天总和'] = df['销售额'].rolling(window=7).sum() df['7天标准差'] = df['销售额'].rolling(window=7).std()
|
滞后与差分
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| df['昨日销售'] = df['销售额'].shift(1)
df['上周同日'] = df['销售额'].shift(7)
df['日环比'] = (df['销售额'] - df['昨日销售']) / df['昨日销售'] * 100
df['周同比'] = (df['销售额'] - df['上周同日']) / df['上周同日'] * 100
df['日增量'] = df['销售额'].diff()
|
实战:完整的销售时间分析
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
| import pandas as pd import numpy as np
np.random.seed(42) dates = pd.date_range('2024-01-01', '2024-12-31', freq='D') base_sales = 3000 trend = np.linspace(0, 1000, len(dates)) seasonal = 500 * np.sin(2 * np.pi * np.arange(len(dates)) / 365.25) noise = np.random.normal(0, 200, len(dates))
sales = base_sales + trend + seasonal + noise sales = np.maximum(sales, 1000)
df = pd.DataFrame({'销售额': sales}, index=dates)
print("=== 时间序列分析报告 ===\n")
print("【年度概览】") print(f"总销售额: ¥{df['销售额'].sum():,.0f}") print(f"日均销售: ¥{df['销售额'].mean():,.0f}") print(f"最高单日: ¥{df['销售额'].max():,.0f} ({df['销售额'].idxmax().date()})") print(f"最低单日: ¥{df['销售额'].min():,.0f} ({df['销售额'].idxmin().date()})")
print("\n【月度销售】") monthly = df.resample('M')['销售额'].sum() print(monthly)
print("\n【星期销售模式】") df['星期'] = df.index.day_name() weekly_pattern = df.groupby('星期')['销售额'].mean() print(weekly_pattern)
print("\n【7日移动平均(最近10天)】") df['MA7'] = df['销售额'].rolling(7).mean() print(df[['销售额', 'MA7']].tail(10))
|
性能对比:不同时间处理方式
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| import pandas as pd import numpy as np
dates = pd.date_range('2020-01-01', periods=100000, freq='H') df = pd.DataFrame({'value': np.random.randn(100000)}, index=dates)
%timeit df.resample('D').mean()
%timeit df.rolling('7D').mean()
|
进阶用法
重采样详解
1 2 3 4 5 6 7 8 9 10 11 12 13
| monthly = df.resample('M').agg({ 'open': 'first', 'close': 'last', 'high': 'max', 'low': 'min', 'volume': 'sum' })
df.resample('2W') df.resample('QS') df.resample('BA')
|
滚动窗口高级用法
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| rolling_stats = df['price'].rolling(20).agg({ 'mean': 'mean', 'std': 'std', 'min': 'min', 'max': 'max' })
df['ema_12'] = df['price'].ewm(span=12).mean() df['ema_26'] = df['price'].ewm(span=26).mean()
df['cummax'] = df['price'].expanding().max()
|
避坑指南
❌ 坑1:时区问题
1 2 3 4 5 6 7 8 9 10
| ts = pd.Timestamp('2025-01-01 10:00') print(ts.tz)
ts = ts.tz_localize('Asia/Shanghai')
ts_utc = ts.tz_convert('UTC') print(ts_utc)
|
❌ 坑2:resample的label和closed
1 2 3 4 5 6 7 8
| df.resample('M').mean()
df.resample('M', label='right').mean()
df.resample('M', closed='right', label='right').mean()
|
实战案例:分析股票价格数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
| import pandas as pd import numpy as np
np.random.seed(42)
dates = pd.bdate_range('2024-01-01', '2025-12-31') n = len(dates) price = 100 * np.cumprod(1 + np.random.normal(0.001, 0.02, n))
df = pd.DataFrame({ 'open': price * (1 + np.random.uniform(-0.01, 0.01, n)), 'close': price, 'high': price * (1 + np.abs(np.random.uniform(0, 0.02, n))), 'low': price * (1 - np.abs(np.random.uniform(0, 0.02, n))), 'volume': np.random.randint(1000000, 10000000, n) }, index=dates)
df['MA5'] = df['close'].rolling(5).mean() df['MA20'] = df['close'].rolling(20).mean() df['EMA12'] = df['close'].ewm(span=12).mean() df['EMA26'] = df['close'].ewm(span=26).mean() df['MACD'] = df['EMA12'] - df['EMA26'] df['returns'] = df['close'].pct_change()
monthly = df.resample('M').agg({ 'open': 'first', 'close': 'last', 'high': 'max', 'low': 'min', 'volume': 'sum' }) monthly['change_pct'] = monthly['close'].pct_change() * 100
print("=== 月度收益 ===") print(monthly[['close', 'change_pct']].tail(12).round(2))
df['volatility_20d'] = df['returns'].rolling(20).std() * np.sqrt(252) print(f"\n当前20日年化波动率: {df['volatility_20d'].iloc[-1]*100:.1f}%")
|
时间序列处理速查
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| df['date'] = pd.date_range('2025-01-01', periods=365, freq='D') df = df.set_index('date')
df['year'] = df.index.year df['month'] = df.index.month df['day'] = df.index.day df['weekday'] = df.index.day_name() df['is_weekend'] = df.index.dayofweek >= 5 df['quarter'] = df.index.quarter
df['yesterday'] = df.index - pd.Timedelta(days=1) df['next_month'] = df.index + pd.DateOffset(months=1) df['month_end'] = df.index + pd.offsets.MonthEnd(0)
df['days_since'] = (pd.Timestamp('today') - df.index).days
|
时区处理
1 2 3 4 5 6 7 8
| df.index = df.index.tz_localize('Asia/Shanghai')
df.index = df.index.tz_convert('UTC')
df.index = df.index.tz_localize(None)
|
时间序列处理场景速查
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year df['month'] = df['date'].dt.month df['weekday'] = df['date'].dt.day_name()
df['days_since'] = (pd.Timestamp('today') - df['date']).dt.days
df.resample('M', on='date')['sales'].sum()
df['ma7'] = df['sales'].rolling(7).mean()
df['yoy'] = df['sales'].pct_change(12)
df['mom'] = df['sales'].pct_change(1)
dates = pd.date_range('2025-01-01', '2025-12-31', freq='B')
|
下节预告
下一课我们将学习字符串处理技巧,掌握文本数据的分析方法。
👉 继续阅读:Pandas字符串处理技巧
💬 加入学习交流群
扫码加入Python学习交流群,和数千名同学一起进步:
👉 点击加入交流群
群里不定期分享:
- 数据分析实战案例
- Python学习资料
- 求职面试经验
- 行业最新动态
推荐:AI Python数据分析实战营
🎁 限时福利:送《利用Python进行数据分析》实体书
👉 点击了解详情
课程导航
上一篇: Pandas数据变换-分组聚合groupby
下一篇: Pandas字符串处理技巧
PS:时间序列是数据分析的重要领域。掌握这些技巧,你就能分析任何带时间戳的数据。
📚 推荐教材
主教材:《Excel+Python 飞速搞定数据分析与处理(图灵出品)》
💬 联系我
主营业务:AI 编程培训、企业内训、技术咨询
🎓 AI 编程实战课程
想系统学习 AI 编程?程序员晚枫的 AI 编程实战课 帮你从零上手!