% -*- mode: Noweb; noweb-code-mode: icon-mode -*-
\documentstyle[noweb]{article}
\pagestyle{noweb}
\begin{document}
@
\section{Converting {\tt noweb} markup to {\tt HTML}}
This copyright applies both to the {\tt noweb} source and to the
generated code.
Thanks to Bill Trost for getting me started with an early version.
<<copyright notice>>=
# Copyright 1994 by Norman Ramsey.  All rights reserved.
# See file COPYRIGHT for more information.
@
The [[-raw]] option brackets HTML with [[\begin{rawhtml}]] and 
[[\end{rawhtml}]]; the purpose is to embed HTML in a {\LaTeX} 
document before converting the document with {\tt latex2html}.
[[braw]] and [[eraw]] hold those delimiters (or else empty strings).
<<*>>=
<<copyright notice>>
# Don't try to understand this file!  Look at icon/tohtml.nw in the noweb source!

global braw, eraw
procedure main(args)
  local delay, raw, where, localindex, noindex, shortxref
  <<initialization>>
  every braw | eraw := ""
  delay := !args == "-delay"
  noindex := !args == "-noindex"
  shortxref := if !args == "-longxref" then &null else 1
  localindex := (/noindex, !args == "-localindex") # produce local identifier xref?
  raw := !args == "-raw"
  if \raw then {braw := "\\begin{rawhtml}"; eraw := "\\end{rawhtml}"}
  if !args == "-no-gen-comment" then &null
  else 
    write(braw, "<!-- this file was generated automatically by noweave;",
          " better not edit it-->", eraw)
  while inputline := read() do inputline ? {
    <<scan and convert>>
  }
  write()
end
<<scan and convert>>=
<<@text>>
<<@nl>>
<<code chunks>>
<<@defn>>
<<docs chunks>>
<<@use>>
<<@xref>>
<<@index>>
<<others>>
if match("@fatal ") then {				# follows last else
  exit(1)
} else if ="@" then 
  warn_unknown(1(tab(upto(' ')|0), pos(0) | move(1)))
else
  fatal("Botched line in noweb pipeline: ", tab(0))
@ 
[[ecode]] is the marker used at the end of the current code chunk.
If there is no cross-reference stuff at the end, we just use [[</pre>]];
otherwise we terminate whatever environment is used for the cross-reference stuff.
<<code chunks>>=
if ="@begin code " then	{ code := 1   ; thischunk := &null
			  <<reset cross-reference info>>
                          writes(braw, "<pre>"); ecode := "</pre>"               } else
if ="@end code "   then { <<dump pending cross-reference info (long form)>>
                          code := nil ; previscode := 1 
                          writes(ecode, eraw)                                    } else
@ We maintain [[thischunk]] null until we've seen [[@defn]].

We want to try to avoid emitting paragraph elements when the
preceding chunk is a code chunk, as tracked by [[previscode]].
Also, if we do slip in a paragraph, we may use the {\LaTeX} style.
<<docs chunks>>=
if ="@begin docs " then	{ if \previscode then writes(if /raw then "<p>" else "\\par")
                          previscode := &null
                          text := 0                                              } else
@
Sometimes it happens that a document-chunk anchor is put in a document chunk that
contains no text.  In that case, we put in a phony anchor at the end of the chunk so 
we won't lose the cross-reference.
<<docs chunks>>=
if ="@end docs " then  	{ write(linklabel(\lastxreflabel, "*"))
                          lastxreflabel := &null                                 } else
@
Normally, if there's a pending anchor, we put it on the first available text line.
There's a bit of a fine point that crops up if the very first piece of text
is quoted code.  In that case we have to attach both the label for the
{\it docs} anchor and the ref for the {\it index} anchor.
<<@text>>=
if ="@text " then 	{ text +:= *(line := tab(0))
			  if \code then
			    writes((<<index anchor>>)          | escapeSpecials(line))
		          else if \quoting then
			    writes((<<index and docs anchor>>) | escapeSpecials(line))
                          else
                            writes((<<docs anchor>>) | line)                } else
<<index anchor>>=
2(line ? {tab(many(' \t')); not pos(0)}, 
  linkto(\lastindexref, escapeSpecials(line)),
  lastindexref := &null)
@
We anchor on the first nonblank character of the line, unless that's
an SGML tag, in which case we have to
skip past. 
None of this crap would be necessary if HTML could anchor to empty text.
<<docs anchor>>=
1(line ? skip_white_tags() || 
         linklabel(\lastxreflabel, skip_chars()) || tab(0),
  insert(defns_above, \lastxreflabel),
  lastxreflabel := &null)
<<old docs anchor>>=
2(line ? {tab(many(' \t')); not pos(0)},
  line ? linklabel(\lastxreflabel, 
                           (tab(many(' \t')) | "") || 
                            (skip_tags_and_char() | "*")) ||
         tab(0),
  insert(defns_above, \lastxreflabel),
  lastxreflabel := &null)
@ 
An indexed identifier is not empty and contains no tags, so we don't 
have to worry.
<<index and docs anchor>>=
2(line ? {tab(many(' \t')); not pos(0)}, 
  linklabelto(lastxreflabel, lastindexref, escapeSpecials(line), "bare"),
  lastxreflabel := lastindexref := &null)
@
Skip as many tags as possible.
Argument is number of tags already skipped; if nonzero, we're willing to succeed
at the end of the string.
<<*>>=
procedure skip_white_tags()
  suspend thewhite() ||
             ((="<" || thewhite() || (="/" || thewhite() | "") ||
                  (tab(many(&letters)) | ="!--") || 
                   tab(upto(">")) || =">" || skip_white_tags() |
              "") \ 1)
end
<<*>>=
procedure skip_chars()
  if not any('<') & upto('<') then
    suspend tab(upto('<'))
  else if any('&') & upto(';') then
    suspend (tab(upto(';')) || =";")
  else if (any('<') & upto('>')) | any(' \t') then
    (write(&errout, "broken skip_chars, report to nr@eecs.harvard.edu: ", image(tab(0))),
     &fail)
  else if any('<') then
    (write(&errout, "warning: can't find end of HTML tag: ", image(tab(0)),
	   "\n\tquoted code embedded in comment?"), &fail)
  else if pos(0) then 
    fail
  else
    suspend tab(0)
end
procedure optwhite()
  suspend tab(many(' \t')) | ""
end
procedure thewhite()
  return optwhite() \ 1
end
<<*>>=
procedure skip_tags_and_char(count)
  local tag
  /count := 0
  if any('&') & upto(';') then
    suspend(tab(upto(';')) || =";")
  else if any('<') then
    suspend ="<" || (tab(many(' \t')) | "") ||
            (="/" | (tag := tab(many(&letters)))) ||
            tab(upto(">")) || =">" || 
            (if map(tag) == "a" then 
               tab(upto("<")) || ="<" || optwhite() || ="/" || optwhite() || 
               (("A"|"a") == tab(many(&letters))) || optwhite || =">"
             else "") ||
             skip_tags_and_char(count+1)
  else
    suspend (tab(many(' \t')) | "") || 
            ((move(1) | if count > 0 then "")\1)  # succeed at end if count > 0
            
end
@
The preceding scheme sometimes wraps an anchor around a tag.
If that turns out to be bad, we could try the following alternative:
<<funky new docs anchor>>=
2(line ? {tab(many(' \t')); not pos(0)},
  line ? scan_initial_tags() ||
                   linklabel(\lastxreflabel, 
                                (tab(many(' \t')) | "") || (scan_past_char() | "*")) ||
         tab(0),
  insert(defns_above, \lastxreflabel),
  lastxreflabel := &null)
<<procedures to support new docs anchor>>=
procedure scan_initial_tags()
  suspend (="<" || (tab(many(' \t')) | "") ||
              (="/" | (tag := tab(many(&letters)), map(tag) ~== "a", tag)) ||
              tab(upto(">")) || =">" || scan_initial_tags()) | 
          ""
end

procedure scan_past_char()
  if any('&') & upto(';') then
    suspend(tab(upto(';')) || =";")
  else
    suspend (tab(many(' \t')) | "") || tab(any(~'<'))
end
<<@nl>>=
if ="@nl" & pos(0) then { <<show definitions and uses on definition line---once>>
			  write()                                                } else
<<@defn>>=
if ="@defn " then 	{ writechunk(lastxreflabel, lastxrefref, "dfn", 
                                     thischunk := tab(0), defns[thischunk] || "=")
			  insert(defns_above, \lastxreflabel)
			  useitems := \useitemstab[thischunk]
			  pendinguses := 1
			  <<clear [[lastxref*]]>>
                          defns[thischunk] := "+"                                } else
<<initialization>>=
useitemstab := table()
defns := table("")
defns_above := set()	# keep track of defining chunks we've seen
<<@use>>=
if ="@use " then 	{ writechunk(lastxreflabel, lastxrefref, "i", tab(0)) 
       	                  <<clear [[lastxref*]]>>                                } else
@
Writing a chunk involves creating an anchor for it.
The anchor is ``bare'' because it's always in an HTML-containing place.
<<*>>=
procedure writechunk(label, ref, tag, name, suffix)
  /suffix := ""
  writes(linklabelto(label, ref, 
                     sgmlwrap(tag, "&lt;" || convquotes(name) || "&gt;" || suffix),
		     "bare"))
  return
end
@
<<others>>= 
if ="@quote"    & pos(0) then 	{ quoting := 1   ; writes(braw, "<code>")        } else
if ="@endquote" & pos(0) then 	{ quoting := nil ; writes("</code>", eraw)       } else
<<others>>=
if ="@file "                  then { filename := tab(0); <<clear [[lastxref*]]>> } else
if ="@literal "               then { writes(tab(0))                              } else
if ="@header html "           then { <<write HTML header>>		         } else
if ="@trailer html" & pos(0)  then { <<write HTML trailer>>		         } else
@
<<write HTML header>>=
writes("<html><head><title>", tab(0), "</title></head><body>")
<<write HTML trailer>>=
write("</body></html>")
@
<<@xref>>=
if ="@xref " then {
  if fun := tab(upto(' ')) then {move(1); arg := tab(0)}
  else { fun := tab(0); arg := &null }
  case fun of { 
    <<cases for @xref>> 
    default : (\arg|"") ? warn_unknown("xref " || fun)
  }
} else
@
[[useitemstab]] seems to exist to enable us to repeat the
uses for each chunk, even though the [[noidx]] filter only provides them once.
I think it may prove better to provide use information for each instance
of a chunk.
<<cases for @xref>>=
"label"     : { <<warn if unused [[lastxreflabel]]>>; lastxreflabel := arg }
"ref"       : { <<warn if unused [[lastxrefref]]>>;   lastxrefref   := arg }
"prevdef"   : pendingprev   := arg
"nextdef"   : pendingnext   := arg
"beginuses" : useitems := []
"useitem"   : put(useitems, arg)
"enduses"   : useitemstab[\thischunk] := useitems
"notused"   : notused := arg
<<clear [[lastxref*]]>>=
every lastxreflabel | lastxrefref := &null
<<warn if unused [[lastxreflabel]]>>=
warn_unused_xref("label", \lastxreflabel)
<<warn if unused [[lastxrefref]]>>=
warn_unused_xref("ref", \lastxrefref)
<<*>>=
procedure warn_unused_xref(tag, label)
  static warned
  initial warned := set()
  if not member(warned, tag) then {
    insert(warned, tag)
    write(&errout, "Warning: internal inconsistency in noweb (not urgent)---")
    write(&errout, "\tnever used @xref ", tag, " ", label)
  }
  return
end  
@
<<show definitions and uses on definition line---once>>=
if \shortxref & (\pendinguses | \pendingprev | \pendingnext | \notused) then {
  <<write out definition-line markup>>
  pendinguses := pendingprev := pendingnext := notused := &null
}
<<write out definition-line markup>>=
if \pendinguses & *\useitems > 0 then {
  writes(" <b>(")
  every i := 1 to *\useitems do {
    usedir := if member(defns_above, useitems[i]) then "&lt;-U" else "U-&gt;"
    writes(if i > 1 then " " else "", linkto(useitems[i], usedir))
  }
  writes(")</b>")
}
if \pendingprev | \pendingnext then {
  writes(" <b>[")
  writes(linkto(\pendingprev, "&lt;-"))
  writes("D")
  writes(linkto(\pendingnext, "-&gt;"))
  writes("]</b>")
}
<<dump pending cross-reference info (long form)>>=
if /shortxref then {
  useitems := useitemstab[\thischunk]
  if \pendingprev | \pendingnext | (\pendinguses, (*\useitems > 0 | \notused)) then {
    <<code-to-blockquote>>
    <<write out uses with links>>
    useitemstab[\thischunk]
    if *\useitems > 0 & (\pendingprev | \pendingnext) then
      writes("; ")
    p := if *\useitems > 0 then "previous" else "Previous"
    n := if *\useitems > 0 then "next"     else "Next"
    if \pendingprev then
      if \pendingnext then
        writes(linkto(pendingprev, p), " and ",
               linkto(pendingnext, "next"), " definitions")
      else
        writes(linkto(pendingprev, p), " definition")
    else
      if \pendingnext then
        writes(linkto(pendingnext, n), " definition")
    if \pendingprev | \pendingnext | *\useitems > 0 then
      writes(".")
    if \notused then
      writes("<br>\nThis code is written to a file (or else not used).")
    write("<p>")
    pendinguses := pendingprev := pendingnext := useitems := notused := &null
  } 
} else
  &null
<<write out uses with links>>=
useprefix := "Used "
every i := 1 to *\useitems do {
  usedir := if member(defns_above, useitems[i]) then "above" else "below"
  usesuffix := if *\useitems > 1 then " (" || i || ")" else ""
  writes(useprefix, linkto(useitems[i], usedir || usesuffix))
  useprefix := ", "
}
<<reset cross-reference info>>=
useitems := []
notused := &null
@
The hack here is to put the supplementary information in a blockquote area
after the code.
<<code-to-blockquote>>=
if ecode == "</pre>" then {
  writes("</pre><blockquote>")
  ecode := "</blockquote>"
}
@
The HTML back end ignores [[@xref begindefs]], [[@xref defitem]], and
[[@xref enddefs]]; it uses the [[nextdef]] and [[prevdef]] links instead.
<<cases for @xref>>=
"begindefs" | "defitem" | "enddefs" : &null
@
<<cases for @xref>>=
"beginchunks" : { write(braw, "<ul>") }
"chunkbegin"  : { writes("<li>"); comma := ": "; count := 0
                  arg ? { ref := tab(upto(' ')); =" "; name := tab(0) }
                  writechunk(&null, ref, "i", name) }
"chunkuse"    : { writes(comma, linkto(arg, "U" || (count +:= 1)));  comma := ", " }
"chunkdefn"   : { writes(comma, linkto(arg, "D" || (count +:= 1)));  comma := ", " }
"chunkend"    : write()
"endchunks"   : write("</ul>", eraw)
<<cases for @index>>=
"beginindex"  : { write(braw, "<ul>") }
"entrybegin"  : { writes("<li>"); comma := ": "; count := 0
                  arg ? { ref := tab(upto(' ')); =" "; name := tab(0) }
                  writes(linklabelto("NWI-" || escapeSpecials(name), ref, name, "b")) }
"entryuse"    : { writes(comma, linkto(arg, "U" || (count +:= 1)));  comma := ", " }
"entrydefn"   : { writes(comma, linkto(arg, "D" || (count +:= 1)));  comma := ", " }
"entryend"    : write()
"endindex"   : write("</ul>", eraw)
<<@index>>=
if ="@index " then {
  if /noindex then {
    if fun := tab(upto(' ')) then {move(1); arg := tab(0)}
    else { fun := tab(0); arg := &null }
    case fun of { 
      <<cases for @index>> 
      default : (\arg|"") ? warn_unknown("index " || fun)
    }
  }
  # don't get any warnings if not doing indexing
} else
@
The local identifier cross-reference doesn't show each use; it just shows
the identifiers that are defined, with links to the full index.
<<cases for @index>>=
"use" 	    : { lastindexref := lastxrefref; lastxrefref := &null }
"defn"      : { <<clear [[lastxref*]]>> }
"localdefn" : { <<clear [[lastxref*]]>> }
"nl"        : &null # do nothing -- no hope of keeping line numbering
@
I tried an abbreviated index format at the end of the chunk, but it looks
so bad that I've punted on it (for now).  Hence the [[\uglyindex]].
<<cases for @index>>=
"begindefs" : if \localindex then {
	        <<dump pending cross-reference info (long form)>>
                if /uglyindex | /shortxref then {
                  <<code-to-blockquote>>
                  writes("Defines ") 
                } else {
                  writes("<hr><b>[</b>")
                }
                comma := "" 
              }
"isused"    : &null
"defitem"   : if \localindex then { 
                writes(comma, linkto("NWI-" || escapeSpecials(arg), 
                                sgmlwrap("code", escapeSpecials(arg))))
                comma := if /uglyindex | /shortxref then ", " else "<b>,</b> "
              }
"enddefs"   : if \localindex then 
                write(if /uglyindex | /shortxref then " (links are to index).<p>" 
                      else "<b>]</b>")
"beginuses" | "isdefined" | "useitem" | "enduses" : &null  # use local links
@
\subsection{Support functions}
Here's all our anchor support goo.
Unless the anchor is deemed ``bare,'' the beginning and ending tags
are protected with [[braw]] and [[eraw]].
This step is necessary to make \verb=noweave -latex+html= work without
trapping {\LaTeX} markup in a \texttt{rawhtml} environment.
<<*>>=
procedure linklabelto(label, ref, contents, bare)
  local s
  s   := if \bare then "" else braw
  s ||:= if \label | \ref then "<a"   else ""
  s ||:= " name=" || image(\label)
  s ||:= " href=" || image("#" || \ref)
  s ||:= if \label | \ref then ">"    else ""
  s ||:= if \bare then "" else eraw
  s ||:= contents
  s ||:= if \bare then "" else braw
  s ||:= if \label | \ref then "</a>" else ""
  s ||:= if \bare then "" else eraw
  return s
end
@ 
Here are two abbreviations.
<<*>>=
procedure linklabel(label, contents, bare)
  return linklabelto(label, &null, contents, bare)
end

procedure linkto(ref, contents)
  return linklabelto(&null, ref, contents, "bare")
end
@ [[linkto]] is always used ``bare,'' and I've decided that the
inconsistency in calling conversions is worth the savings from not
having to write a meaningless argument everywhere.
Reasonable people could disagree.
@
Another support function is used for wrapping tags around text:
<<*>>=
procedure sgmlwrap(tag, s)
  return "<" || tag || ">" || s || "</" || tag || ">"
end
@
Lucky for us, {\tt HTML} has few special characters.  Unlucky for us,
we have to deal with each one seperately.  Nothing much to whine
about, really.
<<*>>=
procedure escapeSpecials (l)
  static escapes, specials
  initial {escapes := table(); 
           escapes["&"] := "&amp;"
           escapes["<"] := "&lt;"
           escapes[">"] := "&gt;"
           escapes["\""] := "&quot;"
	   specials := ''
           every specials ++:= key(escapes)
          }
  s := ""
  l ? { 
    while s ||:= tab(upto(specials)) do
      s ||:= escapes[move(1)]
    return s || tab(0)
  }
end
@
A special function is used to implement {\tt noweb}'s quoting 
convention within chunk names.
<<*>>=
procedure convquotes(s)
  r := ""
  s ? {  # don't call escapeSpecials on s; it destroys internal markup
    while r ||:= tab(find("[[")) do {
      ="[[" | fatal("impossible missing [[")
      r ||:= "<code>" || tab(find("]]"))
      r ||:= tab(many(']')-2)
      ="]]" | fatal("impossible missing ]] in ", image(s))
      r ||:= "</code>"
    }
    return r || tab(0)
  }
end
@
<<*>>=
procedure warn_unknown(tag)
  static warned
  initial warned := set()
  if not member(warned, tag) then {
    write(&errout, "Warning: unrecognized escape @", tag, " ", tab(0))
    insert(warned, tag)
  }
  return
end
@
<<*>>=
procedure fatal(L[])
  write!([&errout, "noweb error in tohtml: "] ||| L)
  exit(1)
end
@ 
\end{document}
<<*>>=
procedure rcsinfo () 
  return "$Id: tohtml.nw,v 1.19 2006/06/12 21:03:54 nr Exp nr $" ||
         "$Name: v2_11b $"
end
@