-
Notifications
You must be signed in to change notification settings - Fork 6
/
日语注音使用html上标注音格式ruby和rt(拖拽,递归,多个文件或文件夹).py
100 lines (91 loc) · 3.23 KB
/
日语注音使用html上标注音格式ruby和rt(拖拽,递归,多个文件或文件夹).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# encoding:utf-8
# https://github.com/wangandi520/andyspythonscript
# by Andy
# v0.1
# pip install pykakasi
from pathlib import Path
from pykakasi import kakasi
from collections import Counter
import sys
import re
def readfile(filename):
with open(filename, mode='r', encoding='UTF-8') as file:
filereadlines = file.readlines()
return filereadlines
def writefile(filename,filereadlines):
newfile = open(filename, mode='w', encoding='UTF-8')
newfile.writelines(filereadlines)
newfile.close()
def ifIsChinese(eachChar):
if '\u4e00' <= eachChar <= '\u9fff':
return True
else:
return False
def convertLine(kakasiPart):
allLine = ''
for eachPart in kakasiPart:
orig = eachPart['orig']
hira = eachPart['hira']
newLine = ''
if orig == '\n':
break
else:
if ifIsChinese(orig[-1]):
# 全是汉字
newLine = newLine + orig + '<rt>' + hira + '</rt>'
elif len(orig) == len(hira) and not ifIsChinese(orig[0]):
# 全是平片假名
for eachChar in orig:
newLine = newLine + eachChar + '<rt></rt>'
else:
for tempIndex in range(0, len(orig)):
if not ifIsChinese(orig[tempIndex]):
getIndex = tempIndex
break
# 对比转换前后相同的部分
myCompare = Counter(hira) & Counter(orig)
getSuffix = ''.join(myCompare.keys())
newLine = newLine + orig[0:orig.find(getSuffix)] + '<rt>' + hira[0:hira.find(getSuffix)] + '</rt>'
for eachChar in getSuffix:
newLine = newLine + eachChar + '<rt></rt>'
allLine = allLine + newLine
return allLine
def convertToHTML(filename):
# 是否在每行首尾添加<p></p>
addPTag = False
readFileContent = readfile(filename)
outputFileContent = []
mykakasi = kakasi()
for eachLine in readFileContent:
if addPTag:
newLine = '<p><ruby>'
else:
newLine = '<ruby>'
if eachLine == '\n':
outputFileContent.append('\n')
else:
newLine = newLine.replace('\n','') + convertLine(mykakasi.convert(eachLine))
if addPTag:
newLine = newLine.replace('\n','') + '</ruby></p>\n'
else:
newLine = newLine.replace('\n','') + '</ruby>\n'
outputFileContent.append(newLine)
newFileName = filename.parent.joinpath(Path(filename).stem + '.html')
if not Path(newFileName).exists():
writefile(newFileName, outputFileContent)
def main(inputPath):
fileType = ['.txt', '.md', '.html']
for aPath in inputPath[1:]:
if Path.is_dir(Path(aPath)):
for eachFile in Path(aPath).glob('**/*'):
if (Path(eachFile).suffix in fileType):
convertToHTML(Path(eachFile))
if Path.is_file(Path(aPath)):
if (Path(aPath).suffix in fileType):
convertToHTML(Path(aPath))
if __name__ == '__main__':
try:
if len(sys.argv) >= 2:
main(sys.argv)
except IndexError:
pass