从 hexo 转移到 Typecho

最近想从hexo转移到typecho，但是typecho是没有一键导入md文章，手动导入又很烦，怎么办呢。于是我就想用 Python 来写一个自动解析md导入typecho的脚本。

于是就开始编码了，首先是用正则表达式提取，hexo的md头部是YAML格式的，只要解析 title date tags categories 就行了，这里就是tags和category难解析，他们可能不止一个标签。但也可能没有标签。

对于上面的解析，我采用两个判断，以及捕获异常的方式解析，tags和category的方式有一些不同。实现如下：

 # 标题提取
title = re.search(r'title: (.*?)\n', s, re.S).group(1)
# 时间转化时间截
date = re.search(r'date: (.*?)\n', s, re.S).group(1)
date = time.strptime(date, "%Y-%m-%d %H:%M:%S")
date = int(time.mktime(date))
try:
    if not re.search(r'tags:[ ]*(.*?)\n', s).group(1):
        if re.search(r'tags:[ ]*\n(.*?)\nca', s, re.S):
            items = re.search(r'tags:[ ]*\n(.*?)\nca', s, re.S).group(1)
            tags = re.findall(r'- (.*?)\n', items)
        else:
            tags = ''
    else:
        tags = re.search(r'tags:[ ]*(.*?)\n', s).group(1)
except AttributeError as e:
    print(e)
    tags = ''

try:
    if not re.search(r'categories:[ ]*(.*?)\n', s).group(1):
        if re.search(r'categories:[ ]*\n(.*?)\n---', s, re.S):
            items = re.search(r'categories:[ ]*\n(.*?)\n---', s, re.S).group(1)
            categories = re.findall(r'- (.*?)\n', items)
        else:
            categories = ''
    else:
        categories = re.search(r'categories:[ ]*(.*?)\n', s).group(1)
except AttributeError as e:
    print(e)
    categories = ''
# 正文提取
post = re.search(r'---\n\n(.*?)$', s, re.S).group(1)

这里的踩坑点主要是正则中的 \s 他不仅匹配一个空格，也可以是换行符等等，所以我采用了 [ ]* 匹配多个空格。如果匹配不到呢，则时会抛出异常 AttributeError 然后捕获异常，让 tags(category) = ‘’ 就行了。

然后是插表方面，连接之后，批量解析文件，将字段插入表中，这里需要插入三个表，分别是 typecho_metas typecho_contents typecho_relationships

从 typecho_contents 中插入文章内容，从 typecho_metas 中插入分类和标签，从 typecho_relationships 中建立文章和分类和标签的关系。

这里需要为 typecho_metas 表中的 name tpye 建立联合主键，避免重复插入。在重复插入时进行更新操作，使得 count = count + 1 。

def insert_post(self, file):
       data = self.parse_hexo_md(file)
       self.data = data
       db = self.db
       cur = self.cur
       modified = int(time.mktime(time.localtime(os.stat('_posts/' + file).st_mtime)))
       sql = '''
       INSERT INTO typecho_contents(title,slug, created,modified, text,type,status,allowComment,allowFeed,allowPing,authorId) VALUES (%s,%s,%s,%s,%s,'post','publish',1,1,1,1) 
       '''

       try:
           cur.execute(sql, (data[0], file.split('.md')[0], data[1], modified, data[4]))
           db.commit()
       except Exception as e:
           print(e)
           db.rollback()

   def insert_tags_category(self):
       data = self.data
       cur = self.cur
       # cur.execute('ALTER TABLE typecho_metas ADD UNIQUE KEY(name,type)')
       sql = '''
       INSERT INTO typecho_metas(name,slug,type,count) VALUES (%s,%s,'tag',1) ON DUPLICATE KEY UPDATE count = count + 1
       '''
       # tags导入
       try:
           # (title, date, tags, categories, '<!--markdown-->' + post)
           if isinstance(data[2], list):
               for i in data[2]:
                   cur.execute(sql, (i, i))
                   self.db.commit()
           else:
               if data[2]:
                   cur.execute(sql, (data[2], data[2]))
                   self.db.commit()
       except pymysql.DatabaseError as e:
           print(e)
           self.db.rollback()

       # category 导入
       sql = '''
               INSERT INTO typecho_metas(name,slug,type,count) VALUES (%s,%s,'category',1) ON DUPLICATE KEY UPDATE count = count + 1
             '''
       try:
           # (title, date, tags, categories, '<!--markdown-->' + post)
           if isinstance(data[3], list):
               for i in data[3]:
                   cur.execute(sql, (i, i))
                   self.db.commit()
           else:
               if data[3]:
                   cur.execute(sql, (data[3], data[3]))
                   self.db.commit()
       except pymysql.DatabaseError as e:
           print(e)
           self.db.rollback()

   def relationships(self):
       db = self.db
       cur = self.cur
       data = self.data
       print('tag = ', data[2], 'type = ', type(data[2]), 'cet = ', data[3])
       # 映射 tags
       select_mid = '''
               SELECT mid FROM typecho_metas WHERE name = %s AND type = %s
           '''
       select_cid = '''
                       SELECT cid FROM typecho_contents WHERE title = %s
                   '''
       add_relationship = '''
               INSERT INTO typecho_relationships(cid,mid) VALUES (%s,%s)
       '''

       try:
           cur.execute(select_cid, (data[0]))

           cid = cur.fetchall()[0][0]  # 获取 cid

           if isinstance(data[2], list):
               for i in data[2]:
                   cur.execute(select_mid, (i, 'tag'))
                   tu = cur.fetchall()
                   # print('mid = ', tu[0][0])  # mid 获取
                   mid = tu[0][0]

                   cur.execute(add_relationship, (cid, mid))
           else:
               cur.execute(select_mid, (data[2], 'tag'))
               tu = cur.fetchall()
               print('mid = ', tu)  # mid 获取
               mid = tu[0][0]
               cur.execute(add_relationship, (cid, mid))
       except pymysql.DatabaseError as e:
           print(e)
           db.rollback()
       except IndexError as e:
           print('不能建立关系', data[2])
           return

           # categories
       # (title, date, tags, categories, '<!--markdown-->' + post)
       try:
           if isinstance(data[3], list):
               for i in data[3]:
                   cur.execute(select_mid, (i, 'category'))
                   tu = cur.fetchall()
                   # print('mid = ', tu[0][0])  # mid 获取
                   mid = tu[0][0]

                   cur.execute(add_relationship, (cid, mid))
           else:
               cur.execute(select_mid, (data[3], 'category'))
               tu = cur.fetchall()
               # print(tu)  # mid 获取
               mid = tu[0][0]
               cur.execute(add_relationship, (cid, mid))
       except pymysql.DatabaseError as e:
           print(e)
           db.rollback()
       except IndexError as e:
           print('不能建立关系', data[3])
           return

全部源码见： https://github.com/Innei/move-hexo-to-typecho

欢迎交流