- Before
- After
- Code
<ul><li>level 1</li><li>level 2</li><ul><li>level 21</li><li>level 22</li></ul></ul>
<ul>
<li>level 1</li>
<li>level 2</li>
<ul>
<li>level 21</li>
<li>level 22</li>
</ul>
</ul>
input_file = r'z:\input.html'
output_file = r'z:\output.html'
encoding = 'utf-8'
with open(input_file, 'r', encoding=encoding) as f1:
ss = [] # strings
le = True # last end
dn = False # do nothing
for s in f1.read().split('<'):
if s.startswith('pre>'):
dn = True
ss.append('')
if dn:
ss[-1] += '<'+s
else:
s = s.strip()
if s:
if s[0] != '/':
if s.startswith('br'):
ss[-1] += '<'+s
le = True
else:
ss.append('<'+s)
le = False
else:
if le:
ss.append('<'+s)
else:
ss[-1] += '<'+s
le = True
if s.startswith('/pre>'):
le = True
dn = False
with open(output_file, 'w', encoding=encoding) as f2:
f2.write('\n'.join(ss))
No comments:
Post a Comment