% -*- mode: Noweb; noweb-code-mode: icon-mode -*- \documentstyle[noweb]{article} \pagestyle{noweb} \begin{document} @ \section{Converting {\tt noweb} markup to {\tt HTML}} This copyright applies both to the {\tt noweb} source and to the generated code. Thanks to Bill Trost for getting me started with an early version. <<copyright notice>>= # Copyright 1994 by Norman Ramsey. All rights reserved. # See file COPYRIGHT for more information. @ The [[-raw]] option brackets HTML with [[\begin{rawhtml}]] and [[\end{rawhtml}]]; the purpose is to embed HTML in a {\LaTeX} document before converting the document with {\tt latex2html}. [[braw]] and [[eraw]] hold those delimiters (or else empty strings). <<*>>= <<copyright notice>> # Don't try to understand this file! Look at icon/tohtml.nw in the noweb source! global braw, eraw procedure main(args) local delay, raw, where, localindex, noindex, shortxref <<initialization>> every braw | eraw := "" delay := !args == "-delay" noindex := !args == "-noindex" shortxref := if !args == "-longxref" then &null else 1 localindex := (/noindex, !args == "-localindex") # produce local identifier xref? raw := !args == "-raw" if \raw then {braw := "\\begin{rawhtml}"; eraw := "\\end{rawhtml}"} if !args == "-no-gen-comment" then &null else write(braw, "<!-- this file was generated automatically by noweave;", " better not edit it-->", eraw) while inputline := read() do inputline ? { <<scan and convert>> } write() end <<scan and convert>>= <<@text>> <<@nl>> <<code chunks>> <<@defn>> <<docs chunks>> <<@use>> <<@xref>> <<@index>> <<others>> if match("@fatal ") then { # follows last else exit(1) } else if ="@" then warn_unknown(1(tab(upto(' ')|0), pos(0) | move(1))) else fatal("Botched line in noweb pipeline: ", tab(0)) @ [[ecode]] is the marker used at the end of the current code chunk. If there is no cross-reference stuff at the end, we just use [[</pre>]]; otherwise we terminate whatever environment is used for the cross-reference stuff. <<code chunks>>= if ="@begin code " then { code := 1 ; thischunk := &null <<reset cross-reference info>> writes(braw, "<pre>"); ecode := "</pre>" } else if ="@end code " then { <<dump pending cross-reference info (long form)>> code := nil ; previscode := 1 writes(ecode, eraw) } else @ We maintain [[thischunk]] null until we've seen [[@defn]]. We want to try to avoid emitting paragraph elements when the preceding chunk is a code chunk, as tracked by [[previscode]]. Also, if we do slip in a paragraph, we may use the {\LaTeX} style. <<docs chunks>>= if ="@begin docs " then { if \previscode then writes(if /raw then "<p>" else "\\par") previscode := &null text := 0 } else @ Sometimes it happens that a document-chunk anchor is put in a document chunk that contains no text. In that case, we put in a phony anchor at the end of the chunk so we won't lose the cross-reference. <<docs chunks>>= if ="@end docs " then { write(linklabel(\lastxreflabel, "*")) lastxreflabel := &null } else @ Normally, if there's a pending anchor, we put it on the first available text line. There's a bit of a fine point that crops up if the very first piece of text is quoted code. In that case we have to attach both the label for the {\it docs} anchor and the ref for the {\it index} anchor. <<@text>>= if ="@text " then { text +:= *(line := tab(0)) if \code then writes((<<index anchor>>) | escapeSpecials(line)) else if \quoting then writes((<<index and docs anchor>>) | escapeSpecials(line)) else writes((<<docs anchor>>) | line) } else <<index anchor>>= 2(line ? {tab(many(' \t')); not pos(0)}, linkto(\lastindexref, escapeSpecials(line)), lastindexref := &null) @ We anchor on the first nonblank character of the line, unless that's an SGML tag, in which case we have to skip past. None of this crap would be necessary if HTML could anchor to empty text. <<docs anchor>>= 1(line ? skip_white_tags() || linklabel(\lastxreflabel, skip_chars()) || tab(0), insert(defns_above, \lastxreflabel), lastxreflabel := &null) <<old docs anchor>>= 2(line ? {tab(many(' \t')); not pos(0)}, line ? linklabel(\lastxreflabel, (tab(many(' \t')) | "") || (skip_tags_and_char() | "*")) || tab(0), insert(defns_above, \lastxreflabel), lastxreflabel := &null) @ An indexed identifier is not empty and contains no tags, so we don't have to worry. <<index and docs anchor>>= 2(line ? {tab(many(' \t')); not pos(0)}, linklabelto(lastxreflabel, lastindexref, escapeSpecials(line), "bare"), lastxreflabel := lastindexref := &null) @ Skip as many tags as possible. Argument is number of tags already skipped; if nonzero, we're willing to succeed at the end of the string. <<*>>= procedure skip_white_tags() suspend thewhite() || ((="<" || thewhite() || (="/" || thewhite() | "") || (tab(many(&letters)) | ="!--") || tab(upto(">")) || =">" || skip_white_tags() | "") \ 1) end <<*>>= procedure skip_chars() if not any('<') & upto('<') then suspend tab(upto('<')) else if any('&') & upto(';') then suspend (tab(upto(';')) || =";") else if (any('<') & upto('>')) | any(' \t') then (write(&errout, "broken skip_chars, report to nr@eecs.harvard.edu: ", image(tab(0))), &fail) else if any('<') then (write(&errout, "warning: can't find end of HTML tag: ", image(tab(0)), "\n\tquoted code embedded in comment?"), &fail) else if pos(0) then fail else suspend tab(0) end procedure optwhite() suspend tab(many(' \t')) | "" end procedure thewhite() return optwhite() \ 1 end <<*>>= procedure skip_tags_and_char(count) local tag /count := 0 if any('&') & upto(';') then suspend(tab(upto(';')) || =";") else if any('<') then suspend ="<" || (tab(many(' \t')) | "") || (="/" | (tag := tab(many(&letters)))) || tab(upto(">")) || =">" || (if map(tag) == "a" then tab(upto("<")) || ="<" || optwhite() || ="/" || optwhite() || (("A"|"a") == tab(many(&letters))) || optwhite || =">" else "") || skip_tags_and_char(count+1) else suspend (tab(many(' \t')) | "") || ((move(1) | if count > 0 then "")\1) # succeed at end if count > 0 end @ The preceding scheme sometimes wraps an anchor around a tag. If that turns out to be bad, we could try the following alternative: <<funky new docs anchor>>= 2(line ? {tab(many(' \t')); not pos(0)}, line ? scan_initial_tags() || linklabel(\lastxreflabel, (tab(many(' \t')) | "") || (scan_past_char() | "*")) || tab(0), insert(defns_above, \lastxreflabel), lastxreflabel := &null) <<procedures to support new docs anchor>>= procedure scan_initial_tags() suspend (="<" || (tab(many(' \t')) | "") || (="/" | (tag := tab(many(&letters)), map(tag) ~== "a", tag)) || tab(upto(">")) || =">" || scan_initial_tags()) | "" end procedure scan_past_char() if any('&') & upto(';') then suspend(tab(upto(';')) || =";") else suspend (tab(many(' \t')) | "") || tab(any(~'<')) end <<@nl>>= if ="@nl" & pos(0) then { <<show definitions and uses on definition line---once>> write() } else <<@defn>>= if ="@defn " then { writechunk(lastxreflabel, lastxrefref, "dfn", thischunk := tab(0), defns[thischunk] || "=") insert(defns_above, \lastxreflabel) useitems := \useitemstab[thischunk] pendinguses := 1 <<clear [[lastxref*]]>> defns[thischunk] := "+" } else <<initialization>>= useitemstab := table() defns := table("") defns_above := set() # keep track of defining chunks we've seen <<@use>>= if ="@use " then { writechunk(lastxreflabel, lastxrefref, "i", tab(0)) <<clear [[lastxref*]]>> } else @ Writing a chunk involves creating an anchor for it. The anchor is ``bare'' because it's always in an HTML-containing place. <<*>>= procedure writechunk(label, ref, tag, name, suffix) /suffix := "" writes(linklabelto(label, ref, sgmlwrap(tag, "<" || convquotes(name) || ">" || suffix), "bare")) return end @ <<others>>= if ="@quote" & pos(0) then { quoting := 1 ; writes(braw, "<code>") } else if ="@endquote" & pos(0) then { quoting := nil ; writes("</code>", eraw) } else <<others>>= if ="@file " then { filename := tab(0); <<clear [[lastxref*]]>> } else if ="@literal " then { writes(tab(0)) } else if ="@header html " then { <<write HTML header>> } else if ="@trailer html" & pos(0) then { <<write HTML trailer>> } else @ <<write HTML header>>= writes("<html><head><title>", tab(0), "</title></head><body>") <<write HTML trailer>>= write("</body></html>") @ <<@xref>>= if ="@xref " then { if fun := tab(upto(' ')) then {move(1); arg := tab(0)} else { fun := tab(0); arg := &null } case fun of { <<cases for @xref>> default : (\arg|"") ? warn_unknown("xref " || fun) } } else @ [[useitemstab]] seems to exist to enable us to repeat the uses for each chunk, even though the [[noidx]] filter only provides them once. I think it may prove better to provide use information for each instance of a chunk. <<cases for @xref>>= "label" : { <<warn if unused [[lastxreflabel]]>>; lastxreflabel := arg } "ref" : { <<warn if unused [[lastxrefref]]>>; lastxrefref := arg } "prevdef" : pendingprev := arg "nextdef" : pendingnext := arg "beginuses" : useitems := [] "useitem" : put(useitems, arg) "enduses" : useitemstab[\thischunk] := useitems "notused" : notused := arg <<clear [[lastxref*]]>>= every lastxreflabel | lastxrefref := &null <<warn if unused [[lastxreflabel]]>>= warn_unused_xref("label", \lastxreflabel) <<warn if unused [[lastxrefref]]>>= warn_unused_xref("ref", \lastxrefref) <<*>>= procedure warn_unused_xref(tag, label) static warned initial warned := set() if not member(warned, tag) then { insert(warned, tag) write(&errout, "Warning: internal inconsistency in noweb (not urgent)---") write(&errout, "\tnever used @xref ", tag, " ", label) } return end @ <<show definitions and uses on definition line---once>>= if \shortxref & (\pendinguses | \pendingprev | \pendingnext | \notused) then { <<write out definition-line markup>> pendinguses := pendingprev := pendingnext := notused := &null } <<write out definition-line markup>>= if \pendinguses & *\useitems > 0 then { writes(" <b>(") every i := 1 to *\useitems do { usedir := if member(defns_above, useitems[i]) then "<-U" else "U->" writes(if i > 1 then " " else "", linkto(useitems[i], usedir)) } writes(")</b>") } if \pendingprev | \pendingnext then { writes(" <b>[") writes(linkto(\pendingprev, "<-")) writes("D") writes(linkto(\pendingnext, "->")) writes("]</b>") } <<dump pending cross-reference info (long form)>>= if /shortxref then { useitems := useitemstab[\thischunk] if \pendingprev | \pendingnext | (\pendinguses, (*\useitems > 0 | \notused)) then { <<code-to-blockquote>> <<write out uses with links>> useitemstab[\thischunk] if *\useitems > 0 & (\pendingprev | \pendingnext) then writes("; ") p := if *\useitems > 0 then "previous" else "Previous" n := if *\useitems > 0 then "next" else "Next" if \pendingprev then if \pendingnext then writes(linkto(pendingprev, p), " and ", linkto(pendingnext, "next"), " definitions") else writes(linkto(pendingprev, p), " definition") else if \pendingnext then writes(linkto(pendingnext, n), " definition") if \pendingprev | \pendingnext | *\useitems > 0 then writes(".") if \notused then writes("<br>\nThis code is written to a file (or else not used).") write("<p>") pendinguses := pendingprev := pendingnext := useitems := notused := &null } } else &null <<write out uses with links>>= useprefix := "Used " every i := 1 to *\useitems do { usedir := if member(defns_above, useitems[i]) then "above" else "below" usesuffix := if *\useitems > 1 then " (" || i || ")" else "" writes(useprefix, linkto(useitems[i], usedir || usesuffix)) useprefix := ", " } <<reset cross-reference info>>= useitems := [] notused := &null @ The hack here is to put the supplementary information in a blockquote area after the code. <<code-to-blockquote>>= if ecode == "</pre>" then { writes("</pre><blockquote>") ecode := "</blockquote>" } @ The HTML back end ignores [[@xref begindefs]], [[@xref defitem]], and [[@xref enddefs]]; it uses the [[nextdef]] and [[prevdef]] links instead. <<cases for @xref>>= "begindefs" | "defitem" | "enddefs" : &null @ <<cases for @xref>>= "beginchunks" : { write(braw, "<ul>") } "chunkbegin" : { writes("<li>"); comma := ": "; count := 0 arg ? { ref := tab(upto(' ')); =" "; name := tab(0) } writechunk(&null, ref, "i", name) } "chunkuse" : { writes(comma, linkto(arg, "U" || (count +:= 1))); comma := ", " } "chunkdefn" : { writes(comma, linkto(arg, "D" || (count +:= 1))); comma := ", " } "chunkend" : write() "endchunks" : write("</ul>", eraw) <<cases for @index>>= "beginindex" : { write(braw, "<ul>") } "entrybegin" : { writes("<li>"); comma := ": "; count := 0 arg ? { ref := tab(upto(' ')); =" "; name := tab(0) } writes(linklabelto("NWI-" || escapeSpecials(name), ref, name, "b")) } "entryuse" : { writes(comma, linkto(arg, "U" || (count +:= 1))); comma := ", " } "entrydefn" : { writes(comma, linkto(arg, "D" || (count +:= 1))); comma := ", " } "entryend" : write() "endindex" : write("</ul>", eraw) <<@index>>= if ="@index " then { if /noindex then { if fun := tab(upto(' ')) then {move(1); arg := tab(0)} else { fun := tab(0); arg := &null } case fun of { <<cases for @index>> default : (\arg|"") ? warn_unknown("index " || fun) } } # don't get any warnings if not doing indexing } else @ The local identifier cross-reference doesn't show each use; it just shows the identifiers that are defined, with links to the full index. <<cases for @index>>= "use" : { lastindexref := lastxrefref; lastxrefref := &null } "defn" : { <<clear [[lastxref*]]>> } "localdefn" : { <<clear [[lastxref*]]>> } "nl" : &null # do nothing -- no hope of keeping line numbering @ I tried an abbreviated index format at the end of the chunk, but it looks so bad that I've punted on it (for now). Hence the [[\uglyindex]]. <<cases for @index>>= "begindefs" : if \localindex then { <<dump pending cross-reference info (long form)>> if /uglyindex | /shortxref then { <<code-to-blockquote>> writes("Defines ") } else { writes("<hr><b>[</b>") } comma := "" } "isused" : &null "defitem" : if \localindex then { writes(comma, linkto("NWI-" || escapeSpecials(arg), sgmlwrap("code", escapeSpecials(arg)))) comma := if /uglyindex | /shortxref then ", " else "<b>,</b> " } "enddefs" : if \localindex then write(if /uglyindex | /shortxref then " (links are to index).<p>" else "<b>]</b>") "beginuses" | "isdefined" | "useitem" | "enduses" : &null # use local links @ \subsection{Support functions} Here's all our anchor support goo. Unless the anchor is deemed ``bare,'' the beginning and ending tags are protected with [[braw]] and [[eraw]]. This step is necessary to make \verb=noweave -latex+html= work without trapping {\LaTeX} markup in a \texttt{rawhtml} environment. <<*>>= procedure linklabelto(label, ref, contents, bare) local s s := if \bare then "" else braw s ||:= if \label | \ref then "<a" else "" s ||:= " name=" || image(\label) s ||:= " href=" || image("#" || \ref) s ||:= if \label | \ref then ">" else "" s ||:= if \bare then "" else eraw s ||:= contents s ||:= if \bare then "" else braw s ||:= if \label | \ref then "</a>" else "" s ||:= if \bare then "" else eraw return s end @ Here are two abbreviations. <<*>>= procedure linklabel(label, contents, bare) return linklabelto(label, &null, contents, bare) end procedure linkto(ref, contents) return linklabelto(&null, ref, contents, "bare") end @ [[linkto]] is always used ``bare,'' and I've decided that the inconsistency in calling conversions is worth the savings from not having to write a meaningless argument everywhere. Reasonable people could disagree. @ Another support function is used for wrapping tags around text: <<*>>= procedure sgmlwrap(tag, s) return "<" || tag || ">" || s || "</" || tag || ">" end @ Lucky for us, {\tt HTML} has few special characters. Unlucky for us, we have to deal with each one seperately. Nothing much to whine about, really. <<*>>= procedure escapeSpecials (l) static escapes, specials initial {escapes := table(); escapes["&"] := "&" escapes["<"] := "<" escapes[">"] := ">" escapes["\""] := """ specials := '' every specials ++:= key(escapes) } s := "" l ? { while s ||:= tab(upto(specials)) do s ||:= escapes[move(1)] return s || tab(0) } end @ A special function is used to implement {\tt noweb}'s quoting convention within chunk names. <<*>>= procedure convquotes(s) r := "" s ? { # don't call escapeSpecials on s; it destroys internal markup while r ||:= tab(find("[[")) do { ="[[" | fatal("impossible missing [[") r ||:= "<code>" || tab(find("]]")) r ||:= tab(many(']')-2) ="]]" | fatal("impossible missing ]] in ", image(s)) r ||:= "</code>" } return r || tab(0) } end @ <<*>>= procedure warn_unknown(tag) static warned initial warned := set() if not member(warned, tag) then { write(&errout, "Warning: unrecognized escape @", tag, " ", tab(0)) insert(warned, tag) } return end @ <<*>>= procedure fatal(L[]) write!([&errout, "noweb error in tohtml: "] ||| L) exit(1) end @ \end{document} <<*>>= procedure rcsinfo () return "$Id: tohtml.nw,v 1.19 2006/06/12 21:03:54 nr Exp nr $" || "$Name: v2_11b $" end @