In [1]:
import pandas as pd
import numpy as np
import random
import json
from datetime import datetime, timedelta
import chinese_random

# 生成县域数据
def generate_county_data(n_counties=100):
    provinces = ['浙江省', '江苏省', '广东省', '山东省', '河南省']
    cities = {
        '浙江省': ['杭州市', '宁波市', '温州市', '嘉兴市'],
        '江苏省': ['南京市', '苏州市', '无锡市', '常州市'],
        '广东省': ['广州市', '深圳市', '东莞市', '佛山市'],
        '山东省': ['济南市', '青岛市', '烟台市', '潍坊市'],
        '河南省': ['郑州市', '洛阳市', '开封市', '许昌市']
    }
    
    counties = []
    for i in range(n_counties):
        province = random.choice(provinces)
        city = random.choice(cities[province])
        county = {
            'county_id': f'C{str(i+1).zfill(3)}',
            'county_name': f'{city[:-1]}第{i%4 + 1}县',
            'province': province,
            'city': city,
            'gdp': round(random.uniform(100, 1000), 2),  # 亿元
            'population': round(random.uniform(30, 150), 2),  # 万人
            'medical_institutions': random.randint(50, 300),
            'disposable_income': round(random.uniform(2, 6), 2),  # 万元/年
            'hospital_density': round(random.uniform(1, 5), 2)  # 每万人医疗机构数
        }
        counties.append(county)
    return counties

# 生成县掌门数据
def generate_master_data(n_masters=200):
    education_levels = ['大专', '本科', '硕士', '博士']
    backgrounds = ['医疗器械销售', '医院管理', '诊所经营', '医药代表', '其他']
    
    masters = []
    for i in range(n_masters):
        age = random.randint(25, 55)
        experience = max(0, age - random.randint(22, 25))  # 从毕业后开始计算
        master = {
            'master_id': f'M{str(i+1).zfill(3)}',
            'name': f'张{i+1}',
            'age': age,
            'gender': random.choice(['男', '女']),
            'education': random.choice(education_levels),
            'experience_years': experience,
            'medical_background': random.choice(backgrounds),
            'industry_resources': random.randint(1, 10)  # 行业资源评分
        }
        masters.append(master)
    return masters

# 生成招商历史数据
def generate_recruitment_data(counties, masters, start_date='2022-01-01', n_records=300):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    
    recruitments = []
    for i in range(n_records):
        county = random.choice(counties)
        master = random.choice(masters)
        
        # 根据县域和县掌门特征决定成功概率
        success_prob = 0.3  # 基础概率
        
        # 县域因素
        if county['gdp'] > 500: success_prob += 0.1
        if county['medical_institutions'] > 200: success_prob += 0.1
        if county['disposable_income'] > 4: success_prob += 0.1
        
        # 县掌门因素
        if 35 <= master['age'] <= 45: success_prob += 0.15
        if master['medical_background'] in ['医疗器械销售', '医院管理']: success_prob += 0.15
        if master['experience_years'] > 10: success_prob += 0.1
        
        recruitment = {
            'recruitment_id': f'R{str(i+1).zfill(3)}',
            'date': (start + timedelta(days=random.randint(0, 365))).strftime('%Y-%m-%d'),
            'county_id': county['county_id'],
            'master_id': master['master_id'],
            'success': random.random() < success_prob
        }
        recruitments.append(recruitment)
    return recruitments

# 生成销售历史数据
def generate_sales_data(recruitments, start_date='2022-01-01', n_records=500):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    
    sales = []
    successful_recruitments = [r for r in recruitments if r['success']]
    
    for i in range(n_records):
        recruitment = random.choice(successful_recruitments)
        
        # 随机生成销售数据，但保证成功的县掌门有更好的业绩
        sale = {
            'sale_id': f'S{str(i+1).zfill(3)}',
            'date': (start + timedelta(days=random.randint(30, 730))).strftime('%Y-%m-%d'),
            'county_id': recruitment['county_id'],
            'master_id': recruitment['master_id'],
            'amount': round(random.uniform(50, 200), 2),  # 万元
            'quantity': random.randint(1, 5)  # 台数
        }
        sales.append(sale)
    return sales

# 生成所有数据
counties = generate_county_data()
masters = generate_master_data()
recruitments = generate_recruitment_data(counties, masters)
sales = generate_sales_data(recruitments)

# 将数据组织成字典
mock_data = {
    'counties': counties,
    'masters': masters,
    'recruitments': recruitments,
    'sales': sales
}

# 保存为JSON文件
with open('/data/chats/5ovr7/workspace/mock_data.json', 'w', encoding='utf-8') as f:
    json.dump(mock_data, f, ensure_ascii=False, indent=2)

ModuleNotFoundError: No module named 'chinese_random'

In [2]:
import pandas as pd
import numpy as np
import random
import json
from datetime import datetime, timedelta

# 生成县域数据
def generate_county_data(n_counties=100):
    provinces = ['浙江省', '江苏省', '广东省', '山东省', '河南省']
    cities = {
        '浙江省': ['杭州市', '宁波市', '温州市', '嘉兴市'],
        '江苏省': ['南京市', '苏州市', '无锡市', '常州市'],
        '广东省': ['广州市', '深圳市', '东莞市', '佛山市'],
        '山东省': ['济南市', '青岛市', '烟台市', '潍坊市'],
        '河南省': ['郑州市', '洛阳市', '开封市', '许昌市']
    }
    
    counties = []
    for i in range(n_counties):
        province = random.choice(provinces)
        city = random.choice(cities[province])
        county = {
            'county_id': f'C{str(i+1).zfill(3)}',
            'county_name': f'{city[:-1]}第{i%4 + 1}县',
            'province': province,
            'city': city,
            'gdp': round(random.uniform(100, 1000), 2),  # 亿元
            'population': round(random.uniform(30, 150), 2),  # 万人
            'medical_institutions': random.randint(50, 300),
            'disposable_income': round(random.uniform(2, 6), 2),  # 万元/年
            'hospital_density': round(random.uniform(1, 5), 2)  # 每万人医疗机构数
        }
        counties.append(county)
    return counties

# 生成县掌门数据
def generate_master_data(n_masters=200):
    education_levels = ['大专', '本科', '硕士', '博士']
    backgrounds = ['医疗器械销售', '医院管理', '诊所经营', '医药代表', '其他']
    
    # 常见姓氏
    surnames = ['张', '王', '李', '赵', '陈', '周', '吴', '刘', '孙', '朱']
    
    masters = []
    for i in range(n_masters):
        age = random.randint(25, 55)
        experience = max(0, age - random.randint(22, 25))  # 从毕业后开始计算
        master = {
            'master_id': f'M{str(i+1).zfill(3)}',
            'name': f'{random.choice(surnames)}{random.randint(1,99)}',
            'age': age,
            'gender': random.choice(['男', '女']),
            'education': random.choice(education_levels),
            'experience_years': experience,
            'medical_background': random.choice(backgrounds),
            'industry_resources': random.randint(1, 10)  # 行业资源评分
        }
        masters.append(master)
    return masters

# 生成招商历史数据
def generate_recruitment_data(counties, masters, start_date='2022-01-01', n_records=300):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    
    recruitments = []
    for i in range(n_records):
        county = random.choice(counties)
        master = random.choice(masters)
        
        # 根据县域和县掌门特征决定成功概率
        success_prob = 0.3  # 基础概率
        
        # 县域因素
        if county['gdp'] > 500: success_prob += 0.1
        if county['medical_institutions'] > 200: success_prob += 0.1
        if county['disposable_income'] > 4: success_prob += 0.1
        
        # 县掌门因素
        if 35 <= master['age'] <= 45: success_prob += 0.15
        if master['medical_background'] in ['医疗器械销售', '医院管理']: success_prob += 0.15
        if master['experience_years'] > 10: success_prob += 0.1
        
        recruitment = {
            'recruitment_id': f'R{str(i+1).zfill(3)}',
            'date': (start + timedelta(days=random.randint(0, 365))).strftime('%Y-%m-%d'),
            'county_id': county['county_id'],
            'master_id': master['master_id'],
            'success': random.random() < success_prob
        }
        recruitments.append(recruitment)
    return recruitments

# 生成销售历史数据
def generate_sales_data(recruitments, start_date='2022-01-01', n_records=500):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    
    sales = []
    successful_recruitments = [r for r in recruitments if r['success']]
    
    for i in range(n_records):
        recruitment = random.choice(successful_recruitments)
        
        # 随机生成销售数据，但保证成功的县掌门有更好的业绩
        sale = {
            'sale_id': f'S{str(i+1).zfill(3)}',
            'date': (start + timedelta(days=random.randint(30, 730))).strftime('%Y-%m-%d'),
            'county_id': recruitment['county_id'],
            'master_id': recruitment['master_id'],
            'amount': round(random.uniform(50, 200), 2),  # 万元
            'quantity': random.randint(1, 5)  # 台数
        }
        sales.append(sale)
    return sales

# 生成所有数据
counties = generate_county_data()
masters = generate_master_data()
recruitments = generate_recruitment_data(counties, masters)
sales = generate_sales_data(recruitments)

# 将数据组织成字典
mock_data = {
    'counties': counties,
    'masters': masters,
    'recruitments': recruitments,
    'sales': sales
}

# 保存为JSON文件
with open('/data/chats/5ovr7/workspace/mock_data.json', 'w', encoding='utf-8') as f:
    json.dump(mock_data, f, ensure_ascii=False, indent=2)