-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtxt2md.py
107 lines (87 loc) · 3.07 KB
/
txt2md.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
from umihico.io_ import load_from_txt
import re
import codecs
import itertools
def gen_header(html_title):
src = '''---
layout: default
title: html_title
---
'''
return src.replace('html_title', html_title)
def _write_md(filename, md_text):
with codecs.open(filename, 'w', 'utf-8', 'ignore')as f:
f.write(md_text)
def _beautify_date(raw_date):
# '水曜日 7月 15, 2015','2015年7月15日'
nums = re.findall(r'[0-9]+', raw_date)
# print(nums)
if '年' in raw_date:
return int(nums[0] + nums[1].zfill(2) + nums[2].zfill(2))
else:
return int(nums[2] + nums[0].zfill(2) + nums[1].zfill(2))
def _test_bautify_date():
tests = ['水曜日 7月 15, 2015', '2015年7月15日']
for test in tests:
print(_beautify_date(test))
# _test_bautify_date()
# raise
def _gen_index(txt_dirname, filenames):
asins_imgurls_dates = []
dicts = [load_from_txt(txt_dirname + '/' + filename)
for filename in filenames]
dicts.sort(key=lambda d: _beautify_date(d['date']), reverse=True)
md_text = gen_header("my kindle-highlights")
md_text += "| | | | | | |\n"
md_text += "|---|---|---|---|---|---|\n"
row_texts = []
for col, d in zip(itertools.cycle([0, 1, 2]), dicts):
url = f"http://umihi.co/kindle-highlights/md/{d['asin']}.html"
imgurl = d['amazon_image_url']
image = f"[![]({imgurl})]({url})"
date = _beautify_date(d['date'])
# title_author = d['booktitle'] + "\n" + d['author']
row_texts.append(f"{image}|{date}")
if col == 2:
md_text += f"|{'|'.join(row_texts)}|\n"
row_texts.clear()
if not row_texts:
md_text += f"|{'|'.join(row_texts)}|\n"
_write_md('index.md', md_text)
def _each_txt2md(dirname, filename):
# print(filename)
d = load_from_txt(dirname + '/' + filename)
# print(d.keys())
asin = d['asin']
amazon_url = d.get('amazon_url', "https://www.amazon.co.jp/dp/" + asin)
amazon_image_url = d['amazon_image_url']
author = d['author']
# print(d)
title = d['booktitle']
date = d['date']
highlights = d['highlights']
cover = f"[![cover_img]({amazon_image_url})]({amazon_url})"
md_texts = [cover + ' ']
md_texts.append(f"### Author:{author} ")
md_texts.append(f"### Title:{title} ")
md_texts.append(
f"### Date:{date}, {len(highlights)} highlights")
tupled_highlights = [x for x in highlights.items() if x[0] != '']
for page_pos, text in sorted(tupled_highlights, key=lambda x: int(x[0])):
md_texts.append(' ')
md_texts.append(f'@{page_pos} ')
md_texts.append(f'{text} ')
md_texts.append(f'----')
md_text = gen_header(
title + ' by ' + author)
md_text += '\n'.join(md_texts)
_write_md('md/' + filename.replace('.txt', '.md'), md_text)
def txt2md():
txt_dirname = 'txt'
filenames = os.listdir(txt_dirname)
for filename in filenames:
_each_txt2md(txt_dirname, filename)
_gen_index(txt_dirname, filenames)
if __name__ == '__main__':
txt2md()