Compilation du PDF via un script python

Permet de gérer automatiquement le nombre de pages de la table des matières, le nombre de pages total du PDF, et de ne le re-généré que si le nombre de pages a changé dans le CSS.
This commit is contained in:
Youen 2023-05-10 23:39:35 +02:00
parent 1e1fb9c195
commit 2b5e828c59
4 changed files with 83 additions and 70 deletions

View file

@ -1,25 +0,0 @@
import subprocess
import re
import sys
pdf_filename = sys.argv[1]
css_filename = sys.argv[2]
additional_pages = 2
# count pages in index.pdf
pdfinfo = subprocess.run(['pdfinfo', pdf_filename], stdout=subprocess.PIPE)
pages_match = re.search('\\nPages:\s+([0-9]+)\\n', pdfinfo.stdout.decode())
num_pages = int(pages_match.group(1))
print('index.pdf: ' + str(num_pages) + ' pages')
num_pages = num_pages + additional_pages # account for table of content that will be added later
# update the CSS file with the correct number of pages
with open(css_filename) as css_file:
css = css_file.read()
css = re.sub('content: counter\(page\) "/[0-9]+";', 'content: counter(page) "/'+str(num_pages)+'";', css)
with open(css_filename, 'w') as css_file:
css_file.write(css)

82
sphinx-tools/make_pdf.py Normal file
View file

@ -0,0 +1,82 @@
import sys
import subprocess
import re
import os
source_dir = sys.argv[1]
build_dir = sys.argv[2]
insert_toc_after_page = 1
index_pdf_filename = build_dir + '/weasyprint/index.pdf'
css_filename = source_dir + '/css/print-theme.css'
script_dir = os.path.dirname(__file__)
# Compile PDF
assert(subprocess.run(['weasyprint', build_dir + '/weasyprint/index.html', index_pdf_filename, '-s', css_filename]).returncode == 0)
# Generate table of content (TOC)
assert(subprocess.run(['sh', '-c', script_dir + '/../pdftoc-to-latex "' + index_pdf_filename + '" > "' + build_dir + '/weasyprint/toc.tex"']).returncode == 0)
assert(subprocess.run(['pdflatex', '-interaction', 'nonstopmode', '-output-directory=' + build_dir + '/weasyprint', build_dir + '/weasyprint/toc.tex']).returncode == 1)
# Count TOC pages
toc_pdfinfo = subprocess.run(['pdfinfo', build_dir + '/weasyprint/toc.pdf'], stdout=subprocess.PIPE)
assert(toc_pdfinfo.returncode == 0)
toc_pages_match = re.search('\\nPages:\s+([0-9]+)\\n', toc_pdfinfo.stdout.decode())
toc_num_pages = int(toc_pages_match.group(1))
print('toc.pdf: ' + str(toc_num_pages) + ' page(s)')
# Count pages in index.pdf
pdfinfo = subprocess.run(['pdfinfo', index_pdf_filename], stdout=subprocess.PIPE)
assert(pdfinfo.returncode == 0)
pages_match = re.search('\\nPages:\s+([0-9]+)\\n', pdfinfo.stdout.decode())
num_pages = int(pages_match.group(1))
print('index.pdf: ' + str(num_pages) + ' pages')
num_pages = num_pages + toc_num_pages # account for table of content that will be added later
# If needed, update the CSS file with the correct number of pages
with open(css_filename) as css_file:
original_css = css_file.read()
modified_css = re.sub('content: counter\(page\) "/[0-9]+";', 'content: counter(page) "/'+str(num_pages)+'";', original_css)
if modified_css != original_css:
with open(css_filename, 'w') as css_file:
css_file.write(modified_css)
# We need to compile again with the modified CSS (this won't impact the TOC)
print('Number of pages has changed, rebuilding PDF...')
assert(subprocess.run(['weasyprint', build_dir + '/weasyprint/index.html', index_pdf_filename, '-s', css_filename]).returncode == 0)
# Insert TOC in the PDF
assert(subprocess.run(['pdftk', 'A='+index_pdf_filename, 'B='+build_dir+'/weasyprint/toc.pdf', 'cat', 'A'+str(insert_toc_after_page), 'B', 'A'+str(insert_toc_after_page+1)+'-end', 'output', build_dir + '/weasyprint/vheliotech-without-bookmarks.pdf']).returncode == 0)
# Restore bookmarks
extract_bookmarks_from = index_pdf_filename
source_pdf_filename = build_dir + '/weasyprint/vheliotech-without-bookmarks.pdf'
output_filename = build_dir + '/weasyprint/vheliotech.pdf'
bookmarks_filename = extract_bookmarks_from.replace('.pdf', '.txt')
assert(bookmarks_filename != extract_bookmarks_from)
# extract PDF metadata into a text file
assert(subprocess.run(['pdftk', extract_bookmarks_from, 'dump_data', 'output', bookmarks_filename]).returncode == 0)
with open(bookmarks_filename) as bookmarks_file:
metadata = bookmarks_file.read()
# Offset page numbers of bookmarks
def replaceBookmarkPageNumber(match):
initial_page = int(match.group(1))
final_page = initial_page + toc_num_pages if initial_page > insert_toc_after_page else initial_page
return 'BookmarkPageNumber: ' + str(final_page)
metadata = re.sub('BookmarkPageNumber:\s+([0-9]+)', replaceBookmarkPageNumber, metadata)
with open(bookmarks_filename, 'w') as bookmarks_file:
bookmarks_file.write(metadata)
# generate the output PDF
assert(subprocess.run(['pdftk', source_pdf_filename, 'update_info', bookmarks_filename, 'output', output_filename]).returncode == 0)
print('Generated file: ' + output_filename)

View file

@ -1,29 +0,0 @@
import sys
import subprocess
import re
extract_bookmarks_from = sys.argv[1]
source_pdf_filename = sys.argv[2]
output_filename = sys.argv[3]
bookmarks_filename = extract_bookmarks_from.replace('.pdf', '.txt')
assert(bookmarks_filename != extract_bookmarks_from)
# extract PDF metadata into a text file
subprocess.run(['pdftk', extract_bookmarks_from, 'dump_data', 'output', bookmarks_filename])
with open(bookmarks_filename) as bookmarks_file:
metadata = bookmarks_file.read()
# Offset page numbers
def replaceBookmarkPageNumber(match):
initial_page = int(match.group(1))
final_page = initial_page + 2 if initial_page > 1 else initial_page
return 'BookmarkPageNumber: ' + str(final_page)
metadata = re.sub('BookmarkPageNumber:\s+([0-9]+)', replaceBookmarkPageNumber, metadata)
with open(bookmarks_filename, 'w') as bookmarks_file:
bookmarks_file.write(metadata)
# generate the output PDF
subprocess.run(['pdftk', source_pdf_filename, 'update_info', bookmarks_filename, 'output', output_filename])