从 hexo 转移到 Typecho
最近想从hexo转移到typecho,但是typecho是没有一键导入md文章,手动导入又很烦,怎么办呢。于是我就想用 Python 来写一个自动解析md导入typecho的脚本。
于是就开始编码了,首先是用正则表达式提取,hexo的md头部是YAML格式的,只要解析 title
date
tags
categories
就行了,这里就是tags和category难解析,他们可能不止一个标签。但也可能没有标签。
对于上面的解析,我采用两个判断,以及捕获异常的方式解析,tags和category的方式有一些不同。实现如下:
# 标题提取
title = re.search(r'title: (.*?)\n', s, re.S).group(1)
# 时间转化时间截
date = re.search(r'date: (.*?)\n', s, re.S).group(1)
date = time.strptime(date, "%Y-%m-%d %H:%M:%S")
date = int(time.mktime(date))
try:
if not re.search(r'tags:[ ]*(.*?)\n', s).group(1):
if re.search(r'tags:[ ]*\n(.*?)\nca', s, re.S):
items = re.search(r'tags:[ ]*\n(.*?)\nca', s, re.S).group(1)
tags = re.findall(r'- (.*?)\n', items)
else:
tags = ''
else:
tags = re.search(r'tags:[ ]*(.*?)\n', s).group(1)
except AttributeError as e:
print(e)
tags = ''
try:
if not re.search(r'categories:[ ]*(.*?)\n', s).group(1):
if re.search(r'categories:[ ]*\n(.*?)\n---', s, re.S):
items = re.search(r'categories:[ ]*\n(.*?)\n---', s, re.S).group(1)
categories = re.findall(r'- (.*?)\n', items)
else:
categories = ''
else:
categories = re.search(r'categories:[ ]*(.*?)\n', s).group(1)
except AttributeError as e:
print(e)
categories = ''
# 正文提取
post = re.search(r'---\n\n(.*?)$', s, re.S).group(1)
这里的踩坑点主要是正则中的 \s
他不仅匹配一个空格,也可以是换行符等等,所以我采用了 [ ]*
匹配多个空格。如果匹配不到呢,则时会抛出异常 AttributeError
然后捕获异常,让 tags(category) = ‘’
就行了。
然后是插表方面,连接之后,批量解析文件,将字段插入表中,这里需要插入三个表,分别是 typecho_metas
typecho_contents
typecho_relationships
从 typecho_contents
中插入文章内容,从 typecho_metas
中插入分类和标签,从 typecho_relationships
中建立文章和分类和标签的关系。
这里需要为 typecho_metas
表中的 name
tpye
建立联合主键,避免重复插入。在重复插入时进行更新操作,使得 count = count + 1
。
def insert_post(self, file):
data = self.parse_hexo_md(file)
self.data = data
db = self.db
cur = self.cur
modified = int(time.mktime(time.localtime(os.stat('_posts/' + file).st_mtime)))
sql = '''
INSERT INTO typecho_contents(title,slug, created,modified, text,type,status,allowComment,allowFeed,allowPing,authorId) VALUES (%s,%s,%s,%s,%s,'post','publish',1,1,1,1)
'''
try:
cur.execute(sql, (data[0], file.split('.md')[0], data[1], modified, data[4]))
db.commit()
except Exception as e:
print(e)
db.rollback()
def insert_tags_category(self):
data = self.data
cur = self.cur
# cur.execute('ALTER TABLE typecho_metas ADD UNIQUE KEY(name,type)')
sql = '''
INSERT INTO typecho_metas(name,slug,type,count) VALUES (%s,%s,'tag',1) ON DUPLICATE KEY UPDATE count = count + 1
'''
# tags导入
try:
# (title, date, tags, categories, '<!--markdown-->' + post)
if isinstance(data[2], list):
for i in data[2]:
cur.execute(sql, (i, i))
self.db.commit()
else:
if data[2]:
cur.execute(sql, (data[2], data[2]))
self.db.commit()
except pymysql.DatabaseError as e:
print(e)
self.db.rollback()
# category 导入
sql = '''
INSERT INTO typecho_metas(name,slug,type,count) VALUES (%s,%s,'category',1) ON DUPLICATE KEY UPDATE count = count + 1
'''
try:
# (title, date, tags, categories, '<!--markdown-->' + post)
if isinstance(data[3], list):
for i in data[3]:
cur.execute(sql, (i, i))
self.db.commit()
else:
if data[3]:
cur.execute(sql, (data[3], data[3]))
self.db.commit()
except pymysql.DatabaseError as e:
print(e)
self.db.rollback()
def relationships(self):
db = self.db
cur = self.cur
data = self.data
print('tag = ', data[2], 'type = ', type(data[2]), 'cet = ', data[3])
# 映射 tags
select_mid = '''
SELECT mid FROM typecho_metas WHERE name = %s AND type = %s
'''
select_cid = '''
SELECT cid FROM typecho_contents WHERE title = %s
'''
add_relationship = '''
INSERT INTO typecho_relationships(cid,mid) VALUES (%s,%s)
'''
try:
cur.execute(select_cid, (data[0]))
cid = cur.fetchall()[0][0] # 获取 cid
if isinstance(data[2], list):
for i in data[2]:
cur.execute(select_mid, (i, 'tag'))
tu = cur.fetchall()
# print('mid = ', tu[0][0]) # mid 获取
mid = tu[0][0]
cur.execute(add_relationship, (cid, mid))
else:
cur.execute(select_mid, (data[2], 'tag'))
tu = cur.fetchall()
print('mid = ', tu) # mid 获取
mid = tu[0][0]
cur.execute(add_relationship, (cid, mid))
except pymysql.DatabaseError as e:
print(e)
db.rollback()
except IndexError as e:
print('不能建立关系', data[2])
return
# categories
# (title, date, tags, categories, '<!--markdown-->' + post)
try:
if isinstance(data[3], list):
for i in data[3]:
cur.execute(select_mid, (i, 'category'))
tu = cur.fetchall()
# print('mid = ', tu[0][0]) # mid 获取
mid = tu[0][0]
cur.execute(add_relationship, (cid, mid))
else:
cur.execute(select_mid, (data[3], 'category'))
tu = cur.fetchall()
# print(tu) # mid 获取
mid = tu[0][0]
cur.execute(add_relationship, (cid, mid))
except pymysql.DatabaseError as e:
print(e)
db.rollback()
except IndexError as e:
print('不能建立关系', data[3])
return
全部源码见: https://github.com/Innei/move-hexo-to-typecho
欢迎交流