本转换脚本适用于微信小程序 **v1.3.2**、Web App 后端 **v0.2.0**

**本脚本会抹除后端数据库中所有数据！！！！！！**如需要，请提前做好备份！！！

1. 首先需要将微信小程序云数据库的 `activity_info`、`album_info`、`comment_info` 和 `user_info` 表分别导出为 `activity_info.json`、`album_info.json`、`comment_info.json` 和 `user_info.json`，放在该 Jupyter Notebook 的同一目录。
2. 请提前做好对 `user_info.json` 的数据清洗。（可使用 `//` 注释错误的信息）
3. 如需迁移图片，还需要对 Django Server 进行交互，所以请提前在 localhost:8000 运行 Django Server

In [1]:
# 设置 superuser 的信息，请及时修改
superuser_name = '刘俨晖'
superuser_email = 'lyh543@outlook.com'
superuser_password = 'superuser'

baseUrl = 'http://localhost:8000/api'

In [2]:
import os
# 设置 Django 项目的路径
DJANGO_PROJECT_PATH = r"C:\Tools\Git\uestcmsc-webapp\uestcmsc_webapp_backend"
# 备份当前路径
try:
    os.chdir(WX_DATA_PATH)
except NameError:
    WX_DATA_PATH = os.path.abspath('.')

In [3]:
import json
def read_wx_json(filename: str) -> list[dict]: 
    li = []
    with open(filename, encoding='utf-8') as f:
        for line in f:
            if not line.startswith('//'):   # 支持注释
                li.append(json.loads(line))
    return li

wx_activity_info = read_wx_json('activity_info.json')
wx_gallery_info = read_wx_json('album_info.json')
# wx_comment_info = read_wx_json('comment_info.json')
wx_user_info = read_wx_json('user_info.json')

In [4]:
# 配置 Django
import django
os.chdir(DJANGO_PROJECT_PATH)
os.environ["DJANGO_SETTINGS_MODULE"]="uestcmsc_webapp_backend.settings"
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
django.setup()

In [5]:
from users.models import User, UserProfile
from activities.models import Activity
from activities_photos.models import ActivityPhoto
from activities_files.models import ActivityFile, ActivityFolder
from activities_links.models import ActivityLink

from cloud.onedrive import onedrive_activity_directory

In [6]:
from datetime import date, time, datetime
def str2date(string: str) -> date:
    return datetime.strptime(string, '%Y-%m-%d').date()
def str2time(string: str) -> time:
    return datetime.strptime(string, '%H:%M').time()
def str2datetime(string_date: str, string_time: str) -> datetime:
    return datetime.combine(str2date(string_date), str2time(string_time))

In [7]:
# 用户部分
# 本脚本会抹除后端数据库中所有数据！！！
import warnings
warnings.filterwarnings('ignore')

User.objects.all().delete()
webapp_user = dict()

for wx_user in wx_user_info:
    u = User(username=wx_user['_id'],
             is_staff=wx_user['is_admin'],
             first_name=wx_user['username'],
             is_superuser=wx_user['can_grant_admin'],
             date_joined=str2date(wx_user['register_date']))
    u.save()
    up = UserProfile(user=u,
                     student_id=wx_user['student_id'],
                     experience=wx_user['exp'])
    up.save()
    webapp_user[wx_user['_id']] = u

In [8]:
# 设置 superuser 的信息
from hashlib import md5
superuser = User.objects.filter(first_name=superuser_name)[0]
superuser.username = superuser_email
superuser.is_superuser = True
superuser.is_staff = True
md5_password = md5(superuser_password.encode()).hexdigest()
superuser.set_password(md5_password)
superuser.save()

In [9]:
# 活动部分
# 本脚本会抹除后端数据库中所有数据！！！
Activity.objects.all().delete()
webapp_activity = dict()

for wx_activity in wx_activity_info:
    if wx_activity['is_hidden']:
        continue
    activity = Activity(datetime = str2datetime(wx_activity['date'], wx_activity['time']),
                        location = wx_activity['location'],
                        title = wx_activity['title'],)
    activity.save()
    webapp_activity[wx_activity['_id']] = activity

    for wx_presenter_id in wx_activity['presenter_list']:
        activity.presenter.add(webapp_user[wx_presenter_id])
    for wx_attender_id in wx_activity['check_in_list']:
        if wx_attender_id in webapp_user: # 可能有用户参加了活动，但没有绑定信息，只能忽略掉此次签到
            activity.attender.add(webapp_user[wx_attender_id])

In [10]:
# 将微信链接："cloud://uestc-msc-activities.7565-uestc-msc-activities-1302714164/album/1b64dd7b5f92dd3801e7dfc40da7bd69/1603627997528_o42P84g3DmWZIj-qWwPg7tRCT77o_0.jpg"
# 转为："https://7565-uestc-msc-activities-1302714164.tcb.qcloud.la/album/1b64dd7b5f92dd3801e7dfc40da7bd69/1603627997528_o42P84g3DmWZIj-qWwPg7tRCT77o_0.jpg"
def convert_wxcloud_url(wxcloud_url: str) -> str:
    eTLD = '.tcb.qcloud.la' # 可能需要根据自己的配置进行修改
    domain_path = wxcloud_url[8 :]

    first_slash = domain_path.find('/')
    domain = domain_path[0 : first_slash]
    path = domain_path[first_slash:]

    domain_without_appname = domain[domain.find('.')+1 :]
    url = f'https://{domain_without_appname}{eTLD}{path}'
    return url

# 测试，无输出即正确
assert convert_wxcloud_url("cloud://uestc-msc-activities.7565-uestc-msc-activities-1302714164/album/1b64dd7b5f92dd3801e7dfc40da7bd69/1603500082323_o42P84rWrKnSMDk-kskAJZXuwzlM_0.jpg") == "https://7565-uestc-msc-activities-1302714164.tcb.qcloud.la/album/1b64dd7b5f92dd3801e7dfc40da7bd69/1603500082323_o42P84rWrKnSMDk-kskAJZXuwzlM_0.jpg"
assert convert_wxcloud_url("cloud://uestc-msc-activities.7565-uestc-msc-activities-1302714164/album/1b64dd7b5f92dd3801e7dfc40da7bd69/1603627997528_o42P84g3DmWZIj-qWwPg7tRCT77o_0.jpg") == "https://7565-uestc-msc-activities-1302714164.tcb.qcloud.la/album/1b64dd7b5f92dd3801e7dfc40da7bd69/1603627997528_o42P84g3DmWZIj-qWwPg7tRCT77o_0.jpg"

In [11]:
import requests
# 提前下载所有照片
# 就存在内存里，不会有人内存存不下吧 不会吧不会吧
for index, photo in enumerate(wx_gallery_info):
    print(f'下载 {index+1} / {len(wx_gallery_info)}')
    if 'content' in photo:
        continue
    url = convert_wxcloud_url(photo['url'])
    response = requests.get(url)
    assert response.ok
    photo['content'] = response.content

下载 1 / 26
下载 2 / 26
下载 3 / 26
下载 4 / 26
下载 5 / 26
下载 6 / 26
下载 7 / 26
下载 8 / 26
下载 9 / 26
下载 10 / 26
下载 11 / 26
下载 12 / 26
下载 13 / 26
下载 14 / 26
下载 15 / 26
下载 16 / 26
下载 17 / 26
下载 18 / 26
下载 19 / 26
下载 20 / 26
下载 21 / 26
下载 22 / 26
下载 23 / 26
下载 24 / 26
下载 25 / 26
下载 26 / 26


In [12]:
# 先把云盘的活动数据 rm -rf 了嗷
# 本脚本会抹除后端数据库中所有数据！！！
response = onedrive_activity_directory.delete(fail_silently=True)
if response.status_code not in [204, 404]:
    raise Exception

In [13]:
ActivityFile.objects.all().delete()
ActivityPhoto.objects.all().delete()
ActivityFolder.objects.all().delete()

(0, {})

In [14]:
# 上号！
session = requests.Session()
response = session.post(baseUrl + '/accounts/login/', {'username': superuser_email, 'password': md5_password})
assert response.ok
session.headers['X-CSRFToken'] = session.cookies['csrftoken']

In [15]:
def upload_picture(file_data: bytes, activity_id: int, filename="俱乐部早期珍贵影像.jpg") -> str:
    # 创建上传会话
    response = session.post(baseUrl + '/cloud/file/', {"filename": filename})
    assert response.ok
    content = eval(response.content)
    if type(content) is str:    # 有时候是 dict 有时候是 str 我也不知道为什么会这样
        content = eval(content)
    else:
        print('content is dict')
    assert type(content) == dict
    upload_url = content['uploadUrl']
    # 上传文件
    file_length = len(file_data)
    assert file_length <= 60 * (2 ** 20), "上传文件应当需要小于 60 MiB"
    headers = {"Content-Range": f"bytes 0-{file_length - 1}/{file_length}"}
    response = requests.put(upload_url, data=file_data, headers=headers)
    assert response.ok
    file_id = response.json()['id']
    # 写入数据库
    response = session.post(baseUrl + '/activities/photo/', { "file_id": file_id, "activity_id": activity_id })
    assert response.ok
    return file_id

In [16]:
# 照片部分，使用 requests 以 superuser 的身份访问 Django Server 完成上传
i = 0
webapp_photo = dict()

In [21]:
while i < len(wx_gallery_info):
    print(f'尝试上传第 {i+1} 张图片...', end='')
    photo = wx_gallery_info[i]
    if photo['album_id'] not in webapp_activity:
        print('活动不存在，跳过')
        i += 1
        continue
    activity_id = webapp_activity[photo['album_id']].id
    for retry in range(5):
        try:
            # 先完成照片上传
            photo_id = upload_picture(photo['content'], activity_id)
            webapp_photo[photo['_id']] = ActivityPhoto.objects.get(id=photo_id)
            # 从数据库修改上传者
            u = webapp_user[photo['_openid']] if photo['_openid'] in webapp_user else None
            webapp_photo[photo['_id']].user = u
            webapp_photo[photo['_id']].save()
            print('成功')
            i += 1
            break
        except ConnectionError:
            if retry == 4:
                print(f'\n第 {i+1} 张图片第 {retry+1} 次上传失败，退出')
                raise Exception
            print(f'\n第 {i+1} 张图片第 {retry+1} 次上传失败，重试')

尝试上传第 1 张图片...成功
尝试上传第 2 张图片...活动不存在，跳过
尝试上传第 3 张图片...成功
尝试上传第 4 张图片...成功
尝试上传第 5 张图片...

KeyboardInterrupt: 