shithub: riscv

ref: 6184a4ef2eba1d49f0386e9ce94f4877d3d9edb2
dir: /sys/src/cmd/python/Doc/tools/toc2bkm.py/

View raw version
#! /usr/bin/env python

"""Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline.

The output file has an extension of '.bkm' instead of '.out', since hyperref
already uses that extension.
"""

import getopt
import os
import re
import string
import sys


# Ench item in an entry is a tuple of:
#
#   Section #,  Title String,  Page #,  List of Sub-entries
#
# The return value of parse_toc() is such a tuple.

cline_re = r"""^
\\contentsline\ \{([a-z]*)}             # type of section in $1
\{(?:\\numberline\ \{([0-9.A-Z]+)})?     # section number
(.*)}                                   # title string
\{(\d+)}$"""                            # page number

cline_rx = re.compile(cline_re, re.VERBOSE)

OUTER_TO_INNER = -1

_transition_map = {
    ('chapter', 'section'): OUTER_TO_INNER,
    ('section', 'subsection'): OUTER_TO_INNER,
    ('subsection', 'subsubsection'): OUTER_TO_INNER,
    ('subsubsection', 'subsection'): 1,
    ('subsection', 'section'): 1,
    ('section', 'chapter'): 1,
    ('subsection', 'chapter'): 2,
    ('subsubsection', 'section'): 2,
    ('subsubsection', 'chapter'): 3,
    }

INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection")


class BadSectionNesting(Exception):
    """Raised for unsupported section level transitions."""

    def __init__(self, level, newsection, path, lineno):
        self.level = level
        self.newsection = newsection
        self.path = path
        self.lineno = lineno

    def __str__(self):
        return ("illegal transition from %s to %s at %s (line %s)"
                % (self.level, self.newsection, self.path, self.lineno))


def parse_toc(fp, bigpart=None):
    toc = top = []
    stack = [toc]
    level = bigpart or 'chapter'
    lineno = 0
    while 1:
        line = fp.readline()
        if not line:
            break
        lineno = lineno + 1
        m = cline_rx.match(line)
        if m:
            stype, snum, title, pageno = m.group(1, 2, 3, 4)
            title = clean_title(title)
            entry = (stype, snum, title, int(pageno), [])
            if stype == level:
                toc.append(entry)
            else:
                if stype not in INCLUDED_LEVELS:
                    # we don't want paragraphs & subparagraphs
                    continue
                try:
                    direction = _transition_map[(level, stype)]
                except KeyError:
                    raise BadSectionNesting(level, stype, fp.name, lineno)
                if direction == OUTER_TO_INNER:
                    toc = toc[-1][-1]
                    stack.insert(0, toc)
                    toc.append(entry)
                else:
                    for i in range(direction):
                        del stack[0]
                        toc = stack[0]
                    toc.append(entry)
                level = stype
        else:
            sys.stderr.write("l.%s: " + line)
    return top


hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}")
raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}")
title_rx = re.compile(r"\\([a-zA-Z])+\s+")
title_trans = string.maketrans("", "")

def clean_title(title):
    title = raisebox_rx.sub("", title)
    title = hackscore_rx.sub(r"\\_", title)
    pos = 0
    while 1:
        m = title_rx.search(title, pos)
        if m:
            start = m.start()
            if title[start:start+15] != "\\textunderscore":
                title = title[:start] + title[m.end():]
            pos = start + 1
        else:
            break
    title = title.translate(title_trans, "{}")
    return title


def write_toc(toc, fp):
    for entry in toc:
        write_toc_entry(entry, fp, 0)

def write_toc_entry(entry, fp, layer):
    stype, snum, title, pageno, toc = entry
    s = "\\pdfoutline goto name{page%03d}" % pageno
    if toc:
        s = "%s count -%d" % (s, len(toc))
    if snum:
        title = "%s %s" % (snum, title)
    s = "%s {%s}\n" % (s, title)
    fp.write(s)
    for entry in toc:
        write_toc_entry(entry, fp, layer + 1)


def process(ifn, ofn, bigpart=None):
    toc = parse_toc(open(ifn), bigpart)
    write_toc(toc, open(ofn, "w"))


def main():
    bigpart = None
    opts, args = getopt.getopt(sys.argv[1:], "c:")
    if opts:
        bigpart = opts[0][1]
    if not args:
        usage()
        sys.exit(2)
    for filename in args:
        base, ext = os.path.splitext(filename)
        ext = ext or ".toc"
        process(base + ext, base + ".bkm", bigpart)


if __name__ == "__main__":
    main()