Ajout d'un titre sur la première page, mais qui n'est pas pris en compte dans la hiérarchie globale Ajout d'un fichier custom.css pour les règles communes à la version PDF et HTML Limitation de la profondeur de titres affichés dans la table des matières sur la première page de la version HTML
106 lines
4.5 KiB
Python
106 lines
4.5 KiB
Python
import sys
|
|
import subprocess
|
|
import re
|
|
import os
|
|
|
|
source_dir = sys.argv[1]
|
|
build_dir = sys.argv[2]
|
|
insert_toc_after_page = 2
|
|
max_bookmark_level = 2
|
|
numbered_levels = 2
|
|
|
|
index_pdf_filename = build_dir + '/weasyprint/index.pdf'
|
|
css_filename = source_dir + '/css/print-theme.css'
|
|
|
|
script_dir = os.path.dirname(__file__)
|
|
|
|
# Compile PDF
|
|
pdf_compile_command = ['weasyprint', build_dir + '/weasyprint/index.html', index_pdf_filename, '-s', source_dir + '/_static/custom.css', '-s', css_filename]
|
|
assert(subprocess.run(pdf_compile_command).returncode == 0)
|
|
|
|
# Generate table of content (TOC)
|
|
assert(subprocess.run(['sh', '-c', script_dir + '/../pdftoc-to-latex "' + index_pdf_filename + '" > "' + build_dir + '/weasyprint/toc.tex"']).returncode == 0)
|
|
subprocess.run(['pdflatex', '-interaction', 'nonstopmode', '-output-directory=' + build_dir + '/weasyprint', build_dir + '/weasyprint/toc.tex'])
|
|
|
|
# Count TOC pages
|
|
toc_pdfinfo = subprocess.run(['pdfinfo', build_dir + '/weasyprint/toc.pdf'], stdout=subprocess.PIPE)
|
|
assert(toc_pdfinfo.returncode == 0)
|
|
toc_pages_match = re.search('\\nPages:\s+([0-9]+)\\n', toc_pdfinfo.stdout.decode())
|
|
toc_num_pages = int(toc_pages_match.group(1))
|
|
print('toc.pdf: ' + str(toc_num_pages) + ' page(s)')
|
|
|
|
# Count pages in index.pdf
|
|
pdfinfo = subprocess.run(['pdfinfo', index_pdf_filename], stdout=subprocess.PIPE)
|
|
assert(pdfinfo.returncode == 0)
|
|
pages_match = re.search('\\nPages:\s+([0-9]+)\\n', pdfinfo.stdout.decode())
|
|
num_pages = int(pages_match.group(1))
|
|
print('index.pdf: ' + str(num_pages) + ' pages')
|
|
|
|
num_pages = num_pages + toc_num_pages # account for table of content that will be added later
|
|
|
|
# If needed, update the CSS file with the correct number of pages
|
|
with open(css_filename) as css_file:
|
|
original_css = css_file.read()
|
|
|
|
modified_css = re.sub('content: counter\(page\) "/[0-9]+";', 'content: counter(page) "/'+str(num_pages)+'";', original_css)
|
|
|
|
if modified_css != original_css:
|
|
with open(css_filename, 'w') as css_file:
|
|
css_file.write(modified_css)
|
|
|
|
# We need to compile again with the modified CSS (this won't impact the TOC)
|
|
print('Number of pages has changed, rebuilding PDF...')
|
|
assert(subprocess.run(pdf_compile_command).returncode == 0)
|
|
|
|
# Insert TOC in the PDF
|
|
assert(subprocess.run(['pdftk', 'A='+index_pdf_filename, 'B='+build_dir+'/weasyprint/toc.pdf', 'cat', 'A1-'+str(insert_toc_after_page), 'B', 'A'+str(insert_toc_after_page+1)+'-end', 'output', build_dir + '/weasyprint/vheliotech-without-bookmarks.pdf']).returncode == 0)
|
|
|
|
# Restore bookmarks
|
|
extract_bookmarks_from = index_pdf_filename
|
|
source_pdf_filename = build_dir + '/weasyprint/vheliotech-without-bookmarks.pdf'
|
|
output_filename = build_dir + '/weasyprint/vheliotech.pdf'
|
|
|
|
bookmarks_filename = extract_bookmarks_from.replace('.pdf', '.txt')
|
|
assert(bookmarks_filename != extract_bookmarks_from)
|
|
|
|
# extract PDF metadata into a text file
|
|
assert(subprocess.run(['pdftk', extract_bookmarks_from, 'dump_data', 'output', bookmarks_filename]).returncode == 0)
|
|
|
|
with open(bookmarks_filename) as bookmarks_file:
|
|
metadata = bookmarks_file.read()
|
|
|
|
# Remove link icon character at the end of each bookmark name (these are added by sphinx but make no sense in a PDF bookmark)
|
|
metadata = metadata.replace('', '')
|
|
|
|
# Remove bookmarks for small titles, adjust page number for remaining ones, and add numbering if level is not above numbered_levels
|
|
title_counters = []
|
|
for level in range(numbered_levels):
|
|
title_counters.append(0)
|
|
def filterBookmark(match):
|
|
#print('bookmark: "' + match.group(0) + '"')
|
|
level = int(match.group(2))
|
|
if level > max_bookmark_level:
|
|
return ''
|
|
initial_page = int(match.group(3))
|
|
final_page = initial_page + toc_num_pages if initial_page > insert_toc_after_page else initial_page
|
|
|
|
title_counters[level - 1] = title_counters[level - 1] + 1
|
|
for l in range(level, numbered_levels):
|
|
title_counters[l] = 0
|
|
|
|
title_number = ''
|
|
if level <= numbered_levels:
|
|
for l in range(level):
|
|
title_number += str(title_counters[l]) + '.'
|
|
title_number += ' '
|
|
|
|
return 'BookmarkBegin\nBookmarkTitle: ' + title_number + match.group(1).replace(' ', ' ')+'\nBookmarkLevel: '+match.group(2)+'\nBookmarkPageNumber: '+str(final_page)+'\n'
|
|
metadata = re.sub('BookmarkBegin\nBookmarkTitle: (.*)\nBookmarkLevel: ([0-9]+)\nBookmarkPageNumber: ([0-9]+)\n', filterBookmark, metadata)
|
|
|
|
with open(bookmarks_filename, 'w') as bookmarks_file:
|
|
bookmarks_file.write(metadata)
|
|
|
|
# generate the output PDF
|
|
assert(subprocess.run(['pdftk', source_pdf_filename, 'update_info', bookmarks_filename, 'output', output_filename]).returncode == 0)
|
|
|
|
print('Generated file: ' + output_filename)
|