>
exit(status);
}
@ %def main argc argv
If the first argument begins with a `[[-]]', the
user is choosing the desired counts and specifying
the order in which they should be displayed.
Each selection is given by the
initial character (lines, words, or characters).
For example, `[[-cl]]' would cause just the
number of characters and the number of lines to
be printed, in that order.
We do not process this string now; we simply remember where it is.
It will be used to control the formatting at output time.
<>=
int file_count;
/* how many files there are */
char *which;
/* which counts to print */
@ %def file_count which
<>=
which = "lwc";
/* if no option is given, print 3 values */
if (argc > 1 && *argv[1] == '-') {
which = argv[1] + 1;
argc--;
argv++;
}
file_count = argc - 1;
@
Now we scan the remaining arguments and try to open a file, if possible.
The file is processed and its statistics are given.
We use a [[do ... while]] loop because we should read from the standard
input if no file name is given.
<>=
argc--;
do {
<>
<>
<>
<>
<>
<>
/* even if there is only one file */
} while (--argc > 0);
@
Here's the code to open the file. A special trick allows us to handle
input from [[stdin]] when no name is given.
Recall that the file descriptor to [[stdin]] is 0; that's what we use
as the default initial value.
<>=
int fd = 0;
/* file descriptor, initialized to stdin */
@ %def fd
<>=
#define READ_ONLY 0
/* read access code for system open */
@ %def READ_ONLY
<>=
if (file_count > 0
&& (fd = open(*(++argv), READ_ONLY)) < 0) {
fprintf(stderr,
"%s: cannot open file %s\n",
prog_name, *argv);
status |= cannot_open_file;
file_count--;
continue;
}
<>=
close(fd);
@
We will do some homemade buffering in order to speed things up:
Characters will be read into the [[buffer]] array before we process
them.
To do this we set up appropriate pointers and counters.
<>=
#define buf_size BUFSIZ
/* stdio.h BUFSIZ chosen for efficiency */
@ %def buf_size
<>=
char buffer[buf_size];
/* we read the input into this array */
register char *ptr;
/* first unprocessed character in buffer */
register char *buf_end;
/* the first unused position in buffer */
register int c;
/* current char, or # of chars just read */
int in_word;
/* are we within a word? */
long word_count, line_count, char_count;
/* # of words, lines, and chars so far */
@ %def buffer ptr buf_end in_word word_count line_count char_count
<>=
ptr = buf_end = buffer;
line_count = word_count = char_count = 0;
in_word = 0;
@
The grand totals must be initialized to zero at the beginning of the
program.
If we made these variables local to [[main]], we would have to do this
initialization explicitly; however, C's globals are automatically
zeroed. (Or rather, ``statically zeroed.'') (Get it?)
<>=
long tot_word_count, tot_line_count,
tot_char_count;
/* total number of words, lines, chars */
@
The present chunk, which does the counting that is wc's
raison d'etre, was actually one of the simplest to write.
We look at each character and change state if it begins or ends a word.
<>=
while (1) {
<>
c = *ptr++;
if (c > ' ' && c < 0177) {
/* visible ASCII codes */
if (!in_word) {
word_count++;
in_word = 1;
}
continue;
}
if (c == '\n') line_count++;
else if (c != ' ' && c != '\t') continue;
in_word = 0;
/* c is newline, space, or tab */
}
@
Buffered I/O allows us to count the number of characters almost for
free.
<>=
if (ptr >= buf_end) {
ptr = buffer;
c = read(fd, ptr, buf_size);
if (c <= 0) break;
char_count += c;
buf_end = buffer + c;
}
@
It's convenient to output the statistics by defining a new function
[[wc_print]]; then the same function can be used for the totals.
Additionally we must decide here if we know the name of the file we have
processed or if it was just [[stdin]].
<>=
wc_print(which, char_count, word_count,
line_count);
if (file_count)
printf(" %s\n", *argv); /* not stdin */
else
printf("\n"); /* stdin */
@
<>=
tot_line_count += line_count;
tot_word_count += word_count;
tot_char_count += char_count;
@
We might as well improve a bit on Unix's wc by displaying
the number of files too.
<>=
if (file_count > 1) {
wc_print(which, tot_char_count,
tot_word_count, tot_line_count);
printf(" total in %d files\n", file_count);
}
@
Here now is the function that prints the values according to the
specified options.
The calling routine is supposed to supply a newline.
If an invalid option character is found we inform the user about proper
usage of the command.
Counts are printed in 8-digit fields so that they will line up in
columns.
<>=
#define print_count(n) printf("%8ld", n)
@ %def print_count
<>=
wc_print(which, char_count, word_count, line_count)
char *which; /* which counts to print */
long char_count, word_count, line_count;
/* given totals */
{
while (*which)
switch (*which++) {
case 'l': print_count(line_count);
break;
case 'w': print_count(word_count);
break;
case 'c': print_count(char_count);
break;
default:
if ((status & usage_error) == 0) {
fprintf(stderr,
"\nUsage: %s [-lwc] [filename ...]\n",
prog_name);
status |= usage_error;
}
}
}
@ %def wc_print
Incidentally, a test of this program against the system wc
command on a SPARCstation showed that the ``official'' wc was
slightly slower.
Furthermore, although that wc gave an appropriate error message
for the options `[[-abc]]', it made no complaints about the options
`[[-labc]]'!
Dare we suggest that the system routine might have been better if its
programmer had used a more literate approach?
Chunks
Indifiers