engl-2311-blog/regex-chicanery.py

25 lines
678 B
Python
Raw Permalink Normal View History

2024-08-25 22:52:09 -05:00
#!/usr/bin/env python3
import re
import sys
2024-08-26 09:12:25 -05:00
# TODO: rewrite in bash
2024-08-25 22:52:09 -05:00
# add title attribute to img tags
filename = sys.argv[1]
with open(filename, 'r+') as f:
contents = ''.join(f.readlines())
regexp = re.compile('alt="(.*?)"')
2024-09-01 23:14:28 -05:00
# set title to same as alt text
2024-08-25 22:52:09 -05:00
for match in regexp.finditer(contents):
2024-09-01 23:14:28 -05:00
contents = contents.replace(
match.group(0), f'title="{match.group(1)}" {match.group(0)}'
)
regexp = re.compile('<figure>.*?(<img.*?/>).*?</figure>', re.DOTALL)
for match in regexp.finditer(contents):
contents = contents.replace(match.group(0), match.group(1))
2024-08-25 22:52:09 -05:00
with open(filename, 'wt') as f:
2024-09-01 23:14:28 -05:00
f.write(contents)