1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
| import pandas as pd import numpy as np
np.random.seed(42) n_orders = 5000
orders = pd.DataFrame({ 'order_id': range(10001, 10001 + n_orders), 'user_id': np.random.randint(2001, 3000, n_orders), 'product_id': np.random.randint(3001, 3050, n_orders), 'quantity': np.random.randint(1, 5, n_orders), 'order_date': pd.date_range('2025-01-01', periods=n_orders, freq='2H') })
users = pd.DataFrame({ 'user_id': range(2001, 3000), 'name': [f'用户{i}' for i in range(999)], 'city': np.random.choice(['北京', '上海', '广州', '深圳', '成都'], 999), 'vip_level': np.random.choice([0, 1, 2, 3], 999, p=[0.5, 0.3, 0.15, 0.05]) })
products = pd.DataFrame({ 'product_id': range(3001, 3050), 'name': [f'商品{i:03d}' for i in range(49)], 'category': np.random.choice(['电子产品', '服装', '食品', '家居'], 49), 'price': np.random.uniform(50, 5000, 49).round(2) })
df = (orders .merge(users, on='user_id', how='left') .merge(products, on='product_id', how='left', suffixes=('_order', '_product')) )
df['amount'] = df['quantity'] * df['price']
city_sales = df.groupby('city').agg( total_amount=('amount', 'sum'), order_count=('order_id', 'count'), avg_amount=('amount', 'mean'), vip_ratio=('vip_level', lambda x: (x > 0).mean()) ).round(2)
print("各城市销售分析:") print(city_sales)
|