comparison sbr/m_getfld.c @ 0:bce86c4163a3

Initial revision
author kono
date Mon, 18 Apr 2005 23:46:02 +0900
parents
children 441a2190cfae
comparison
equal deleted inserted replaced
-1:000000000000 0:bce86c4163a3
1 /* m_getfld.c - read/parse a message */
2 #ifndef lint
3 static char ident[] = "@(#)$Id$";
4 #endif /* lint */
5
6 #include "../h/mh.h"
7 #include <stdio.h>
8 #include "../zotnet/mts.h"
9 #include <ctype.h>
10
11
12 /* This module has a long and checkered history. First, it didn't burst
13 maildrops correctly because it considered two CTRL-A:s in a row to be
14 an inter-message delimiter. It really is four CTRL-A:s followed by a
15 newline. Unfortunately, MMDF will convert this delimiter *inside* a
16 message to a CTRL-B followed by three CTRL-A:s and a newline. This
17 caused the old version of m_getfld() to declare eom prematurely. The
18 fix was a lot slower than
19
20 c == '\001' && peekc (iob) == '\001'
21
22 but it worked, and to increase generality, UUCP style maildrops could
23 be parsed as well. Unfortunately the speed issue finally caught up with
24 us since this routine is at the very heart of MH.
25
26 To speed things up considerably, the routine Eom() was made an auxilary
27 function called by the macro eom(). Unless we are bursting a maildrop,
28 the eom() macro returns FALSE saying we aren't at the end of the
29 message.
30
31 The next thing to do is to read the mtstailor file and initialize
32 delimiter[] and delimlen accordingly...
33
34 After mhl was made a built-in in msh, m_getfld() worked just fine
35 (using m_unknown() at startup). Until one day: a message which was
36 the result of a bursting was shown. Then, since the burst boundaries
37 aren't CTRL-A:s, m_getfld() would blinding plunge on past the boundary.
38 Very sad. The solution: introduce m_eomsbr(). This hook gets called
39 after the end of each line (since testing for eom involves an fseek()).
40 This worked fine, until one day: a message with no body portion arrived.
41 Then the
42
43 while (eom (c = Getc (iob), iob))
44 continue;
45
46 loop caused m_getfld() to return FMTERR. So, that logic was changed to
47 check for (*eom_action) and act accordingly.
48
49 This worked fine, until one day: someone didn't use four CTRL:A's as
50 their delimiters. So, the bullet got bit and we read mts.h and
51 continue to struggle on. It's not that bad though, since the only time
52 the code gets executed is when inc (or msh) calls it, and both of these
53 have already called mts_init().
54
55 ------------------------
56 (Written by Van Jacobson for the mh6 m_getfld, January, 1986):
57
58 This routine was accounting for 60% of the cpu time used by most mh
59 programs. I spent a bit of time tuning and it now accounts for <10%
60 of the time used. Like any heavily tuned routine, it's a bit
61 complex and you want to be sure you understand everything that it's
62 doing before you start hacking on it. Let me try to emphasize
63 that: every line in this atrocity depends on every other line,
64 sometimes in subtle ways. You should understand it all, in detail,
65 before trying to change any part. If you do change it, test the
66 result thoroughly (I use a hand-constructed test file that exercises
67 all the ways a header name, header body, header continuation,
68 header-body separator, body line and body eom can align themselves
69 with respect to a buffer boundary). "Minor" bugs in this routine
70 result in garbaged or lost mail.
71
72 If you hack on this and slow it down, I, my children and my
73 children's children will curse you.
74
75 This routine gets used on three different types of files: normal,
76 single msg files, "packed" unix or mmdf mailboxs (when used by inc)
77 and packed, directoried bulletin board files (when used by msh).
78 The biggest impact of different file types is in "eom" testing. The
79 code has been carefully organized to test for eom at appropriate
80 times and at no other times (since the check is quite expensive).
81 I have tried to arrange things so that the eom check need only be
82 done on entry to this routine. Since an eom can only occur after a
83 newline, this is easy to manage for header fields. For the msg
84 body, we try to efficiently search the input buffer to see if
85 contains the eom delimiter. If it does, we take up to the
86 delimiter, otherwise we take everything in the buffer. (The change
87 to the body eom/copy processing produced the most noticeable
88 performance difference, particularly for "inc" and "show".)
89
90 There are three qualitatively different things this routine busts
91 out of a message: field names, field text and msg bodies. Field
92 names are typically short (~8 char) and the loop that extracts them
93 might terminate on a colon, newline or max width. I considered
94 using a Vax "scanc" to locate the end of the field followed by a
95 "bcopy" but the routine call overhead on a Vax is too large for this
96 to work on short names. If Berkeley ever makes "inline" part of the
97 C optimiser (so things like "scanc" turn into inline instructions) a
98 change here would be worthwhile.
99
100 Field text is typically 60 - 100 characters so there's (barely)
101 a win in doing a routine call to something that does a "locc"
102 followed by a "bmove". About 30% of the fields have continuations
103 (usually the 822 "received:" lines) and each continuation generates
104 another routine call. "Inline" would be a big win here, as well.
105
106 Messages, as of this writing, seem to come in two flavors: small
107 (~1K) and long (>2K). Most messages have 400 - 600 bytes of headers
108 so message bodies average at least a few hundred characters.
109 Assuming your system uses reasonably sized stdio buffers (1K or
110 more), this routine should be able to remove the body in large
111 (>500 byte) chunks. The makes the cost of a call to "bcopy"
112 small but there is a premium on checking for the eom in packed
113 maildrops. The eom pattern is always a simple string so we can
114 construct an efficient pattern matcher for it (e.g., a Vax "matchc"
115 instruction). Some thought went into recognizing the start of
116 an eom that has been split across two buffers.
117
118 This routine wants to deal with large chunks of data so, rather
119 than "getc" into a local buffer, it uses stdio's buffer. If
120 you try to use it on a non-buffered file, you'll get what you
121 deserve. This routine "knows" that struct FILEs have a _ptr
122 and a _cnt to describe the current state of the buffer and
123 it knows that _filbuf ignores the _ptr & _cnt and simply fills
124 the buffer. If stdio on your system doesn't work this way, you
125 may have to make small changes in this routine.
126
127 This routine also "knows" that an EOF indication on a stream is
128 "sticky" (i.e., you will keep getting EOF until you reposition the
129 stream). If your system doesn't work this way it is broken and you
130 should complain to the vendor. As a consequence of the sticky
131 EOF, this routine will never return any kind of EOF status when
132 there is data in "name" or "buf").
133 */
134
135
136 #define Getc(iob) getc(iob)
137 #define eom(c,iob) (msg_style != MS_DEFAULT && \
138 (((c) == *msg_delim && m_Eom(c,iob)) ||\
139 (eom_action && (*eom_action)(c))))
140
141 static unsigned char *matchc();
142 static unsigned char *locc();
143
144 static unsigned char **pat_map;
145
146 extern int msg_count; /* defined in sbr/m_msgdef.c = 0
147 * disgusting hack for "inc" so it can
148 * know how many characters were stuffed
149 * in the buffer on the last call (see
150 * comments in uip/scansbr.c) */
151
152 extern int msg_style; /* defined in sbr/m_msgdef.c = MS_DEFAULT */
153 /*
154 * The "full" delimiter string for a packed maildrop consists
155 * of a newline followed by the actual delimiter. E.g., the
156 * full string for a Unix maildrop would be: "\n\nFrom ".
157 * "Fdelim" points to the start of the full string and is used
158 * in the BODY case of the main routine to search the buffer for
159 * a possible eom. Msg_delim points to the first character of
160 * the actual delim. string (i.e., fdelim+1). Edelim
161 * points to the 2nd character of actual delimiter string. It
162 * is used in m_Eom because the first character of the string
163 * has been read and matched before m_Eom is called.
164 */
165 extern char *msg_delim; /* defined in sbr/m_msgdef.c = "" */
166 static unsigned char *fdelim;
167 static unsigned char *delimend;
168 static int fdelimlen;
169 static unsigned char *edelim;
170 static int edelimlen;
171
172 #ifdef CONTENT_LENGTH
173 static int content_length = -1;
174 static long end_of_contents = -1;
175 #endif
176
177 static int (*eom_action) () = NULL;
178
179 #ifdef FILE__PTR
180 #define _ptr __ptr
181 #define _cnt __cnt
182 #endif
183
184 #ifdef _FSTDIO
185 #define _ptr _p /* Gag */
186 #define _cnt _r /* Retch */
187 #define _filbuf __srget /* Puke */
188 #endif
189
190 /* */
191
192 m_getfld (state, name, buf, bufsz, iob)
193 int state;
194 int bufsz;
195 unsigned char *name,
196 *buf;
197 register FILE *iob;
198 {
199 register unsigned char *cp;
200 register unsigned char *bp;
201 register unsigned char *ep;
202 register unsigned char *sp;
203 register int cnt;
204 register int c;
205 register int i;
206 register int j;
207
208 #ifdef CONTENT_LENGTH
209 /*
210 * When starting to read from a new file, we have to reset the state,
211 * but only if the state wasn't reset. That may save us a number of
212 * lseeks.
213 */
214 if (state == FLD &&
215 (content_length != -1 || end_of_contents != -1) &&
216 ftell(iob) == 0)
217 end_of_contents = content_length = -1;
218 #endif
219 if ((c = Getc(iob)) < 0) {
220 msg_count = 0;
221 *buf = 0;
222 return FILEEOF;
223 }
224 if (eom (c, iob)) {
225 if (! eom_action) {
226 /* flush null messages */
227 while ((c = Getc(iob)) >= 0 && eom (c, iob))
228 ;
229 if (c >= 0)
230 (void) ungetc(c, iob);
231 }
232 msg_count = 0;
233 *buf = 0;
234 return FILEEOF;
235 }
236
237 switch (state) {
238 case FLDEOF:
239 case BODYEOF:
240 case FLD:
241 if (c == '\n' || c == '-') {
242 /* we hit the header/body separator */
243 while (c != '\n' && (c = Getc(iob)) >= 0)
244 ;
245
246 #ifdef CONTENT_LENGTH
247 /*
248 * When we've found a content-length header, we're
249 * going to use it to tell where the message boundary
250 * is, if it is a valid mesage boundary.
251 * There can be a number of cases:
252 * - no bytes after <content-length> bytes: the usual format
253 * of a message in an MH folder.
254 * - only a newline - last message in mail drop.
255 * - "\nFrom " - beginning of next message
256 * - other - ignore Content-Length header, but issue warning
257 */
258 if (msg_style == MS_UUCP && content_length != -1) {
259 long here = ftell(iob);
260 static char delim[] = "\nFrom ";
261 char buf[sizeof(delim)-1];
262 int cnt;
263
264 /* compute position of character after file */
265 end_of_contents = here + content_length + 1;
266 content_length = -1;
267 /* And see whether this is a From header or eof. */
268 fseek(iob, end_of_contents - 1, 0);
269 cnt = fread(buf, sizeof(char), sizeof(buf), iob);
270 if (cnt != 0 && (cnt != 1 || buf[0] != '\n') &&
271 (cnt != sizeof(buf) ||
272 strncmp(buf,delim, sizeof(buf)) != 0)) {
273 advise (NULLCP, "invalid Content-Length: header\n");
274 end_of_contents = -1;
275 }
276 fseek(iob, here, 0);
277 }
278 #endif
279 if (c < 0 || (c = Getc(iob)) < 0 || eom (c, iob)) {
280 if (! eom_action) {
281 /* flush null messages */
282 while ((c = Getc(iob)) >= 0 && eom (c, iob))
283 ;
284 if (c >= 0)
285 (void) ungetc(c, iob);
286 }
287 msg_count = 0;
288 *buf = 0;
289 return FILEEOF;
290 }
291 state = BODY;
292 goto body;
293 }
294 /*
295 * get the name of this component. take characters up
296 * to a ':', a newline or NAMESZ-1 characters, whichever
297 * comes first.
298 */
299 cp = name; i = NAMESZ - 1;
300 for (;;) {
301 #ifdef _STDIO_USES_IOSTREAM
302 bp = sp = (unsigned char *) iob->_IO_read_ptr - 1;
303 j = (cnt = ((long) iob->_IO_read_end - (long) iob->_IO_read_ptr) + 1) < i?
304 cnt: i;
305 #else
306 bp = sp = (unsigned char *) iob->_ptr - 1;
307 j = (cnt = iob->_cnt+1) < i ? cnt : i;
308 #endif
309 while (--j >= 0 && (c = *bp++) != ':' && c != '\n')
310 *cp++ = c;
311
312 j = bp - sp;
313 if ((cnt -= j) <= 0) {
314 #ifdef _STDIO_USES_IOSTREAM
315 iob->_IO_read_ptr = iob->_IO_read_end;
316 if (__underflow((struct _IO_FILE *) iob) == EOF) {
317 #else
318 #ifdef FILBUF_ADJ
319 iob -> _ptr += iob -> _cnt;
320 iob -> _cnt = 0;
321 #endif /* FILBUF_ADJ */
322 if (_filbuf(iob) == EOF) {
323 #endif
324 *cp = *buf = 0;
325 advise (NULLCP, "eof encountered in field \"%s\"",
326 name);
327 return FMTERR;
328 }
329 #ifdef _STDIO_USES_IOSTREAM
330 iob->_IO_read_ptr++; /* NOT automatic in __underflow()! */
331 #endif
332 } else {
333 #ifdef _STDIO_USES_IOSTREAM
334 iob->_IO_read_ptr = bp + 1;
335 #else
336 iob->_ptr = bp + 1;
337 iob->_cnt = cnt - 1;
338 #endif
339 }
340 if (c == ':')
341 break;
342
343 /*
344 * something went wrong. possibilities are:
345 * . hit a newline (error)
346 * . got more than namesz chars. (error)
347 * . hit the end of the buffer. (loop)
348 */
349 if (c == '\n') {
350 *cp = *buf = 0;
351 advise (NULLCP, "eol encountered in field \"%s\"", name);
352 state = FMTERR;
353 goto finish;
354 }
355 if ((i -= j) <= 0) {
356 *cp = *buf = 0;
357 advise (NULLCP, "field name \"%s\" exceeds %d bytes",
358 name, NAMESZ - 1);
359 state = LENERR;
360 goto finish;
361 }
362 }
363
364 while (isspace (*--cp) && cp >= name)
365 ;
366 *++cp = 0;
367 /* fall through */
368
369 case FLDPLUS:
370 /*
371 * get (more of) the text of a field. take
372 * characters up to the end of this field (newline
373 * followed by non-blank) or bufsz-1 characters.
374 */
375 cp = buf; i = bufsz-1;
376 for (;;) {
377 #ifdef _STDIO_USES_IOSTREAM
378 cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
379 bp = (unsigned char *) --iob->_IO_read_ptr;
380 #else
381 cnt = iob->_cnt++; bp = (unsigned char *) --iob->_ptr;
382 #endif
383 c = cnt < i ? cnt : i;
384 while (ep = locc( c, bp, '\n' )) {
385 /*
386 * if we hit the end of this field, return.
387 */
388 if ((j = *++ep) != ' ' && j != '\t') {
389 #ifdef _STDIO_USES_IOSTREAM
390 j = ep - (unsigned char *) iob->_IO_read_ptr;
391 (void) bcopy( iob->_IO_read_ptr, cp, j);
392 iob->_IO_read_ptr = ep;
393 #else
394 j = ep - (unsigned char *) iob->_ptr;
395 (void) bcopy( iob->_ptr, cp, j);
396 iob->_ptr = ep; iob->_cnt -= j;
397 #endif
398 cp += j;
399 state = FLD;
400 goto finish;
401 }
402 c -= ep - bp; bp = ep;
403 }
404 /*
405 * end of input or dest buffer - copy what we've found.
406 */
407 #ifdef _STDIO_USES_IOSTREAM
408 c += bp - (unsigned char *) iob->_IO_read_ptr;
409 (void) bcopy( iob->_IO_read_ptr, cp, c);
410 #else
411 c += bp - (unsigned char *) iob->_ptr;
412 (void) bcopy( iob->_ptr, cp, c);
413 #endif
414 i -= c; cp += c;
415 if (i <= 0) {
416 /* the dest buffer is full */
417 #ifdef _STDIO_USES_IOSTREAM
418 iob->_IO_read_ptr += c;
419 #else
420 iob->_cnt -= c; iob->_ptr += c;
421 #endif
422 state = FLDPLUS;
423 break;
424 }
425 /*
426 * There's one character left in the input buffer.
427 * Copy it & fill the buffer. If the last char
428 * was a newline and the next char is not whitespace,
429 * this is the end of the field. Otherwise loop.
430 */
431 --i;
432 #ifdef _STDIO_USES_IOSTREAM
433 *cp++ = j = *(iob->_IO_read_ptr + c);
434 iob->_IO_read_ptr = iob->_IO_read_end;
435 c = __underflow((struct _IO_FILE *) iob);
436 iob->_IO_read_ptr++; /* NOT automatic! */
437 #else
438 *cp++ = j = *(iob->_ptr + c);
439 #ifdef FILBUF_ADJ
440 iob -> _ptr += iob -> _cnt;
441 iob -> _cnt = 0;
442 #endif /* FILBUF_ADJ */
443 c = _filbuf(iob);
444 #endif
445 /* bugfix, 03/1998.
446 * If we encounter EOF halfway through reading the value (ie there is
447 * no trailing \n in the field) then __underflow() above returns EOF.
448 * Previously we didn't check for this, with the result that we then
449 * attempt to read from the stream and wind up segfaulting doing a
450 * bcopy() with length parameter -1. Instead, we just append a newline
451 * to what we've read, so the following conditional will pick it up
452 * and return the field value. Then the EOF is actually dealt with
453 * the next time this function is called.
454 * The reason for appending \n is because the callers are known to
455 * work with 'name: value\n', and setting j to '\n' is known to exit
456 * in the right way, and the less we change the less likely
457 * we are to introduce new bugs. And I'm scared of the curse in the
458 * comments at the top of this file :->
459 * -- PMM (pmaydell@chiark.greenend.org.uk)
460 */
461 if (c == EOF && j != '\0' && j != '\n') {
462 *cp++ = j = '\n';
463 advise (NULLCP, "file missing final eol");
464 }
465 /* bugfix end */
466 if ((j == '\0' || j == '\n') && c != ' ' && c != '\t') {
467 if (c != EOF)
468 #ifdef _STDIO_USES_IOSTREAM
469 --iob->_IO_read_ptr;
470 #else
471 --iob->_ptr, ++iob->_cnt;
472 #endif
473 state = FLD;
474 break;
475 }
476 }
477 break;
478
479 case BODY:
480 body:
481 /*
482 * get the message body up to bufsz characters or the
483 * end of the message. Sleazy hack: if bufsz is negative
484 * we assume that we were called to copy directly into
485 * the output buffer and we don't add an eos.
486 */
487 i = (bufsz < 0) ? -bufsz : bufsz-1;
488 #ifdef _STDIO_USES_IOSTREAM
489 bp = (unsigned char *) --iob->_IO_read_ptr;
490 cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
491 #else
492 bp = (unsigned char *) --iob->_ptr; cnt = ++iob->_cnt;
493 #endif
494 c = (cnt < i ? cnt : i);
495 if (msg_style != MS_DEFAULT && c > 1) {
496 /*
497 * packed maildrop - only take up to the (possible)
498 * start of the next message. This "matchc" should
499 * probably be a Boyer-Moore matcher for non-vaxen,
500 * particularly since we have the alignment table
501 * all built for the end-of-buffer test (next).
502 * But our vax timings indicate that the "matchc"
503 * instruction is 50% faster than a carefully coded
504 * B.M. matcher for most strings. (So much for elegant
505 * algorithms vs. brute force.) Since I (currently)
506 * run MH on a vax, we use the matchc instruction. --vj
507 */
508 if (ep = matchc( fdelimlen, fdelim, c, bp ) )
509 c = ep - bp + 1;
510 else {
511 /*
512 * There's no delim in the buffer but there may be
513 * a partial one at the end. If so, we want to leave
514 * it so the "eom" check on the next call picks it up.
515 * Use a modified Boyer-Moore matcher to make this
516 * check relatively cheap. The first "if" figures
517 * out what position in the pattern matches the last
518 * character in the buffer. The inner "while" matches
519 * the pattern against the buffer, backwards starting
520 * at that position. Note that unless the buffer
521 * ends with one of the characters in the pattern
522 * (excluding the first and last), we do only one test.
523 */
524 ep = bp + c - 1;
525 if (sp = pat_map[*ep & 0x00ff]) {
526 do {
527 cp = sp;
528 while (*--ep == *--cp)
529 ;
530 if (cp < fdelim) {
531 if (ep >= bp)
532 /*
533 * ep < bp means that all the buffer
534 * contains is a prefix of delim.
535 * If this prefix is really a delim, the
536 * m_eom call at entry should have found
537 * it. Thus it's not a delim and we can
538 * take all of it.
539 */
540 c = (ep - bp) + 2;
541 break;
542 }
543 /* try matching one less char of delim string */
544 ep = bp + c - 1;
545 } while (--sp > fdelim);
546 }
547 }
548 }
549 (void) bcopy( bp, buf, c );
550 #ifdef _STDIO_USES_IOSTREAM
551 iob->_IO_read_ptr += c;
552 #else
553 iob->_cnt -= c;
554 iob->_ptr += c;
555 #endif
556 if (bufsz < 0) {
557 msg_count = c;
558 return (state);
559 }
560 cp = buf + c;
561 break;
562
563 default:
564 adios (NULLCP, "m_getfld() called with bogus state of %d", state);
565 }
566 finish:;
567 *cp = 0;
568 msg_count = cp - buf;
569
570 #ifdef CONTENT_LENGTH
571 /* Check whether this was a Content-Length header */
572 if (msg_style == MS_UUCP && state == FLD &&
573 uleq((char*)"content-length", (char*) name)) {
574 content_length = atoi(buf);
575 /* This value is computed when end-of-headers is detected */
576 end_of_contents = -1;
577 }
578 #endif
579 return (state);
580 }
581
582 /* */
583
584 #ifdef RPATHS
585 static char unixbuf[BUFSIZ] = "";
586 #endif /* RPATHS */
587
588 void
589 m_unknown(iob)
590 register FILE *iob;
591 {
592 register int c;
593 register long pos;
594 char text[10];
595 register char *cp;
596 register char *delimstr;
597
598 msg_style = MS_UNKNOWN;
599
600 /* Figure out what the message delimitter string is for this
601 * maildrop. (This used to be part of m_Eom but I didn't like
602 * the idea of an "if" statement that could only succeed on the
603 * first call to m_Eom getting executed on each call, i.e., at
604 * every newline in the message).
605 *
606 * If the first line of the maildrop is a Unix "from" line, we say the
607 * style is UUCP and eat the rest of the line. Otherwise we say the style
608 * is MMDF & look for the delimiter string specified when MH was built
609 * (or from the mtstailor file).
610 */
611 pos = ftell (iob);
612 if (fread (text, sizeof *text, 5, iob) == 5
613 && strncmp (text, "From ", 5) == 0) {
614 msg_style = MS_UUCP;
615 delimstr = "\nFrom ";
616 #ifndef RPATHS
617 while ((c = getc (iob)) != '\n' && c >= 0)
618 ;
619 #else /* RPATHS */
620 cp = unixbuf;
621 while ((c = getc (iob)) != '\n')
622 *cp++ = c;
623 *cp = 0;
624 #endif /* RPATHS */
625 } else {
626 /* not a Unix style maildrop */
627 (void) fseek (iob, pos, 0);
628 if (mmdlm2 == NULLCP || *mmdlm2 == 0)
629 mmdlm2 = "\001\001\001\001\n";
630 delimstr = mmdlm2;
631 msg_style = MS_MMDF;
632 }
633 c = strlen (delimstr);
634 fdelim = (unsigned char *)malloc((unsigned)c + 3);
635 *fdelim++ = '\0';
636 *fdelim = '\n';
637 msg_delim = (char *)fdelim+1;
638 edelim = (unsigned char *)msg_delim+1;
639 fdelimlen = c + 1;
640 edelimlen = c - 1;
641 (void)strcpy(msg_delim, delimstr);
642 delimend = (unsigned char *)msg_delim + edelimlen;
643 if (edelimlen <= 1)
644 adios (NULLCP, "maildrop delimiter must be at least 2 bytes");
645 /*
646 * build a Boyer-Moore end-position map for the matcher in m_getfld.
647 * N.B. - we don't match just the first char (since it's the newline
648 * separator) or the last char (since the matchc would have found it
649 * if it was a real delim).
650 */
651 pat_map = (unsigned char **) calloc (256, sizeof (unsigned char *));
652
653 for (cp = (char *)fdelim + 1; cp < (char *)delimend; cp++ )
654 pat_map[*cp] = (unsigned char *)cp;
655
656 if (msg_style == MS_MMDF) {
657 /* flush extra msg hdrs */
658 while ((c = Getc(iob)) >= 0 && eom (c, iob))
659 ;
660 if (c >= 0)
661 (void) ungetc(c, iob);
662 }
663 }
664
665
666 void m_eomsbr (action)
667 int (*action) ();
668 {
669 if (eom_action = action) {
670 msg_style = MS_MSH;
671 *msg_delim = 0;
672 fdelimlen = 1;
673 delimend = fdelim;
674 } else {
675 msg_style = MS_MMDF;
676 msg_delim = (char *)fdelim + 1;
677 fdelimlen = strlen((char *)fdelim);
678 delimend = (unsigned char *)(msg_delim + edelimlen);
679 }
680 }
681
682 /* */
683
684 /* test for msg delimiter string */
685
686 int m_Eom (c, iob)
687 register int c;
688 register FILE *iob;
689 {
690 register long pos = 0L;
691 register int i;
692 char text[10];
693 #ifdef RPATHS
694 register char *cp;
695 #endif /* RPATHS */
696
697 pos = ftell (iob);
698
699 #ifdef CONTENT_LENGTH
700 if (msg_style == MS_UUCP && end_of_contents != -1) {
701 if (end_of_contents == pos) {
702 end_of_contents = -1;
703 if ((fread (text, sizeof *text, edelimlen, iob) == edelimlen)
704 && (strncmp (text, (char *)edelim, edelimlen) == 0)) {
705 #ifndef RPATHS
706 while ((c = getc (iob)) != '\n')
707 if (c < 0)
708 break;
709 #else /* RPATHS */
710 cp = unixbuf;
711 while ((c = getc (iob)) != '\n' && c >= 0)
712 *cp++ = c;
713 *cp = 0;
714 #endif /* RPATHS */
715 }
716 return 1;
717 }
718 /* we've read past the end of a message, this should never happen
719 * because of the other checks we do */
720 if (end_of_contents < pos) {
721 end_of_contents = -1;
722 adios(NULLCP,
723 "Content-Length: header broken, can't read mailbox\n");
724 }
725 return 0;
726 }
727 #endif
728
729 if ((i = fread (text, sizeof *text, edelimlen, iob)) != edelimlen
730 || strncmp (text, (char *)edelim, edelimlen)) {
731 if (i == 0 && msg_style == MS_UUCP)
732 /* the final newline in the (brain damaged) unix-format
733 * maildrop is part of the delimitter - delete it.
734 */
735 return 1;
736
737 #ifdef notdef
738 (void) fseek (iob, pos, 0);
739 #else
740 (void) fseek (iob, (long)(pos-1), 0);
741 (void) getc (iob); /* should be OK */
742 #endif /* !notdef */
743 return 0;
744 }
745
746 #ifdef CONTENT_LENGTH
747 /* There's one extra special case to be considered here:
748 * content_length > 0. That we got here is because the
749 * message body starts with "From "
750 */
751 if (msg_style == MS_UUCP && content_length > 0) {
752 (void) fseek (iob, (long)(pos-1), 0);
753 (void) getc (iob); /* should be OK */
754 return 0;
755 }
756 #endif
757
758 if (msg_style == MS_UUCP) {
759 #ifndef RPATHS
760 while ((c = getc (iob)) != '\n')
761 if (c < 0)
762 break;
763 #else /* RPATHS */
764 cp = unixbuf;
765 while ((c = getc (iob)) != '\n' && c >= 0)
766 *cp++ = c;
767 *cp = 0;
768 #endif /* RPATHS */
769 }
770
771 return 1;
772 }
773
774 /* */
775
776 #ifdef RPATHS
777 char *unixline () {
778 register char *cp,
779 *dp,
780 *pp;
781 static char unixfrom[BUFSIZ];
782 int i;
783
784 pp = unixfrom;
785 if (cp = dp = index (unixbuf, ' ')) {
786 while (cp = index (cp + 1, 'r'))
787 if (strncmp (cp, "remote from ", 12) == 0) {
788 *cp = 0;
789 (void) sprintf (pp, "%s!", cp + 12);
790 pp += strlen (pp);
791 break;
792 }
793 if (cp == NULL)
794 cp = unixbuf + strlen (unixbuf);
795 #if 0
796 if ((cp -= 25) >= dp)
797 #else
798 /* On most of BSD systems, the date field length of UNIX From line
799 is 25, but it's not suitable for other systems. We should not
800 use this length. */
801 while (cp > dp && *--cp != ':')
802 ;
803 for (i = 0; i < 4 && cp > dp; i++) {
804 while (!isspace(*--cp))
805 ;
806 while (isspace(*(cp - 1)))
807 --cp;
808 }
809 if (cp >= dp)
810 #endif
811 *cp = 0;
812 }
813
814 (void) sprintf (pp, "%s\n", unixbuf);
815 unixbuf[0] = 0;
816 return unixfrom;
817 }
818 #endif /* RPATHS */
819
820 /* */
821
822 /* matchc: find the first occurrence of string pat in string str.
823 * We can't use the C library routine strstr because the string
824 * won't have a trailing NUL. See also the note about using a
825 * Boyer-Moore search on non-Vaxen (in the only place this fn
826 * is used...)
827 */
828 #if (vax && !lint)
829 asm(".align 1");
830 asm("_matchc: .word 0");
831 asm(" movq 4(ap),r0");
832 asm(" movq 12(ap),r2");
833 asm(" matchc r0,(r1),r2,(r3)");
834 asm(" beql 1f");
835 asm(" movl 4(ap),r3");
836 asm("1: subl3 4(ap),r3,r0");
837 asm(" ret");
838 #else
839 static unsigned char *
840 matchc( patln, pat, strln, str )
841 int patln;
842 char *pat;
843 int strln;
844 register char *str;
845 {
846 register char *es = str + strln - patln;
847 register char *sp;
848 register char *pp;
849 register char *ep = pat + patln;
850 register char pc = *pat++;
851
852 /* es is a pointer to the last character we need to
853 * check (the pattern can't start beyond it because then
854 * the end of the pattern would be beyond the end of the
855 * string).
856 */
857 for(;;) {
858 /* Search for the next occurrence of pc (first character
859 * in the pattern.
860 */
861 do {
862 if (str > es)
863 return 0;
864 } while (pc != *str++);
865
866 /* At this point we have a match for the first
867 * character and basically do a strcmp() for the
868 * rest of the pattern. We know that the pattern
869 * will fit in the remainder of the string because
870 * of the es check.
871 */
872 sp = str; pp = pat;
873 while (pp < ep && *sp++ == *pp)
874 pp++;
875
876 if (pp >= ep) /* whole pattern matched? */
877 return ((unsigned char *)--str);
878
879 /* If we get this far then it wasn't a good match,
880 * so go back to looking for the first character in
881 * the pattern.
882 */
883
884 }
885 }
886 #endif
887
888 /* */
889
890 /*
891 * Locate character "term" in the next "cnt" characters of "src".
892 * If found, return its address, otherwise return 0.
893 */
894 #if (vax && !lint)
895 asm(".align 1");
896 asm("_locc: .word 0");
897 asm(" movq 4(ap),r0");
898 asm(" locc 12(ap),r0,(r1)");
899 asm(" beql 1f");
900 asm(" movl r1,r0");
901 asm("1: ret");
902 #else
903 static unsigned char *
904 locc( cnt, src, term )
905 register int cnt;
906 register unsigned char *src;
907 register unsigned char term;
908 {
909 while (*src++ != term && --cnt > 0);
910
911 return (cnt > 0 ? --src : (unsigned char *)0);
912 }
913 #endif
914
915 /* */
916
917 #if !defined (BSD42) && !defined (bcopy)
918 int bcmp (b1, b2, length)
919 register char *b1,
920 *b2;
921 register int length;
922 {
923 while (length-- > 0)
924 if (*b1++ != *b2++)
925 return 1;
926
927 return 0;
928 }
929
930
931 bcopy (b1, b2, length)
932 register char *b1,
933 *b2;
934 register int length;
935 {
936 while (length-- > 0)
937 *b2++ = *b1++;
938 }
939
940
941 bzero (b, length)
942 register char *b;
943 register int length;
944 {
945 while (length-- > 0)
946 *b++ = 0;
947 }
948 #endif /* not BSD42 */