Mercurial > hg > Applications > mh
comparison sbr/m_getfld.c @ 0:bce86c4163a3
Initial revision
author | kono |
---|---|
date | Mon, 18 Apr 2005 23:46:02 +0900 |
parents | |
children | 441a2190cfae |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bce86c4163a3 |
---|---|
1 /* m_getfld.c - read/parse a message */ | |
2 #ifndef lint | |
3 static char ident[] = "@(#)$Id$"; | |
4 #endif /* lint */ | |
5 | |
6 #include "../h/mh.h" | |
7 #include <stdio.h> | |
8 #include "../zotnet/mts.h" | |
9 #include <ctype.h> | |
10 | |
11 | |
12 /* This module has a long and checkered history. First, it didn't burst | |
13 maildrops correctly because it considered two CTRL-A:s in a row to be | |
14 an inter-message delimiter. It really is four CTRL-A:s followed by a | |
15 newline. Unfortunately, MMDF will convert this delimiter *inside* a | |
16 message to a CTRL-B followed by three CTRL-A:s and a newline. This | |
17 caused the old version of m_getfld() to declare eom prematurely. The | |
18 fix was a lot slower than | |
19 | |
20 c == '\001' && peekc (iob) == '\001' | |
21 | |
22 but it worked, and to increase generality, UUCP style maildrops could | |
23 be parsed as well. Unfortunately the speed issue finally caught up with | |
24 us since this routine is at the very heart of MH. | |
25 | |
26 To speed things up considerably, the routine Eom() was made an auxilary | |
27 function called by the macro eom(). Unless we are bursting a maildrop, | |
28 the eom() macro returns FALSE saying we aren't at the end of the | |
29 message. | |
30 | |
31 The next thing to do is to read the mtstailor file and initialize | |
32 delimiter[] and delimlen accordingly... | |
33 | |
34 After mhl was made a built-in in msh, m_getfld() worked just fine | |
35 (using m_unknown() at startup). Until one day: a message which was | |
36 the result of a bursting was shown. Then, since the burst boundaries | |
37 aren't CTRL-A:s, m_getfld() would blinding plunge on past the boundary. | |
38 Very sad. The solution: introduce m_eomsbr(). This hook gets called | |
39 after the end of each line (since testing for eom involves an fseek()). | |
40 This worked fine, until one day: a message with no body portion arrived. | |
41 Then the | |
42 | |
43 while (eom (c = Getc (iob), iob)) | |
44 continue; | |
45 | |
46 loop caused m_getfld() to return FMTERR. So, that logic was changed to | |
47 check for (*eom_action) and act accordingly. | |
48 | |
49 This worked fine, until one day: someone didn't use four CTRL:A's as | |
50 their delimiters. So, the bullet got bit and we read mts.h and | |
51 continue to struggle on. It's not that bad though, since the only time | |
52 the code gets executed is when inc (or msh) calls it, and both of these | |
53 have already called mts_init(). | |
54 | |
55 ------------------------ | |
56 (Written by Van Jacobson for the mh6 m_getfld, January, 1986): | |
57 | |
58 This routine was accounting for 60% of the cpu time used by most mh | |
59 programs. I spent a bit of time tuning and it now accounts for <10% | |
60 of the time used. Like any heavily tuned routine, it's a bit | |
61 complex and you want to be sure you understand everything that it's | |
62 doing before you start hacking on it. Let me try to emphasize | |
63 that: every line in this atrocity depends on every other line, | |
64 sometimes in subtle ways. You should understand it all, in detail, | |
65 before trying to change any part. If you do change it, test the | |
66 result thoroughly (I use a hand-constructed test file that exercises | |
67 all the ways a header name, header body, header continuation, | |
68 header-body separator, body line and body eom can align themselves | |
69 with respect to a buffer boundary). "Minor" bugs in this routine | |
70 result in garbaged or lost mail. | |
71 | |
72 If you hack on this and slow it down, I, my children and my | |
73 children's children will curse you. | |
74 | |
75 This routine gets used on three different types of files: normal, | |
76 single msg files, "packed" unix or mmdf mailboxs (when used by inc) | |
77 and packed, directoried bulletin board files (when used by msh). | |
78 The biggest impact of different file types is in "eom" testing. The | |
79 code has been carefully organized to test for eom at appropriate | |
80 times and at no other times (since the check is quite expensive). | |
81 I have tried to arrange things so that the eom check need only be | |
82 done on entry to this routine. Since an eom can only occur after a | |
83 newline, this is easy to manage for header fields. For the msg | |
84 body, we try to efficiently search the input buffer to see if | |
85 contains the eom delimiter. If it does, we take up to the | |
86 delimiter, otherwise we take everything in the buffer. (The change | |
87 to the body eom/copy processing produced the most noticeable | |
88 performance difference, particularly for "inc" and "show".) | |
89 | |
90 There are three qualitatively different things this routine busts | |
91 out of a message: field names, field text and msg bodies. Field | |
92 names are typically short (~8 char) and the loop that extracts them | |
93 might terminate on a colon, newline or max width. I considered | |
94 using a Vax "scanc" to locate the end of the field followed by a | |
95 "bcopy" but the routine call overhead on a Vax is too large for this | |
96 to work on short names. If Berkeley ever makes "inline" part of the | |
97 C optimiser (so things like "scanc" turn into inline instructions) a | |
98 change here would be worthwhile. | |
99 | |
100 Field text is typically 60 - 100 characters so there's (barely) | |
101 a win in doing a routine call to something that does a "locc" | |
102 followed by a "bmove". About 30% of the fields have continuations | |
103 (usually the 822 "received:" lines) and each continuation generates | |
104 another routine call. "Inline" would be a big win here, as well. | |
105 | |
106 Messages, as of this writing, seem to come in two flavors: small | |
107 (~1K) and long (>2K). Most messages have 400 - 600 bytes of headers | |
108 so message bodies average at least a few hundred characters. | |
109 Assuming your system uses reasonably sized stdio buffers (1K or | |
110 more), this routine should be able to remove the body in large | |
111 (>500 byte) chunks. The makes the cost of a call to "bcopy" | |
112 small but there is a premium on checking for the eom in packed | |
113 maildrops. The eom pattern is always a simple string so we can | |
114 construct an efficient pattern matcher for it (e.g., a Vax "matchc" | |
115 instruction). Some thought went into recognizing the start of | |
116 an eom that has been split across two buffers. | |
117 | |
118 This routine wants to deal with large chunks of data so, rather | |
119 than "getc" into a local buffer, it uses stdio's buffer. If | |
120 you try to use it on a non-buffered file, you'll get what you | |
121 deserve. This routine "knows" that struct FILEs have a _ptr | |
122 and a _cnt to describe the current state of the buffer and | |
123 it knows that _filbuf ignores the _ptr & _cnt and simply fills | |
124 the buffer. If stdio on your system doesn't work this way, you | |
125 may have to make small changes in this routine. | |
126 | |
127 This routine also "knows" that an EOF indication on a stream is | |
128 "sticky" (i.e., you will keep getting EOF until you reposition the | |
129 stream). If your system doesn't work this way it is broken and you | |
130 should complain to the vendor. As a consequence of the sticky | |
131 EOF, this routine will never return any kind of EOF status when | |
132 there is data in "name" or "buf"). | |
133 */ | |
134 | |
135 | |
136 #define Getc(iob) getc(iob) | |
137 #define eom(c,iob) (msg_style != MS_DEFAULT && \ | |
138 (((c) == *msg_delim && m_Eom(c,iob)) ||\ | |
139 (eom_action && (*eom_action)(c)))) | |
140 | |
141 static unsigned char *matchc(); | |
142 static unsigned char *locc(); | |
143 | |
144 static unsigned char **pat_map; | |
145 | |
146 extern int msg_count; /* defined in sbr/m_msgdef.c = 0 | |
147 * disgusting hack for "inc" so it can | |
148 * know how many characters were stuffed | |
149 * in the buffer on the last call (see | |
150 * comments in uip/scansbr.c) */ | |
151 | |
152 extern int msg_style; /* defined in sbr/m_msgdef.c = MS_DEFAULT */ | |
153 /* | |
154 * The "full" delimiter string for a packed maildrop consists | |
155 * of a newline followed by the actual delimiter. E.g., the | |
156 * full string for a Unix maildrop would be: "\n\nFrom ". | |
157 * "Fdelim" points to the start of the full string and is used | |
158 * in the BODY case of the main routine to search the buffer for | |
159 * a possible eom. Msg_delim points to the first character of | |
160 * the actual delim. string (i.e., fdelim+1). Edelim | |
161 * points to the 2nd character of actual delimiter string. It | |
162 * is used in m_Eom because the first character of the string | |
163 * has been read and matched before m_Eom is called. | |
164 */ | |
165 extern char *msg_delim; /* defined in sbr/m_msgdef.c = "" */ | |
166 static unsigned char *fdelim; | |
167 static unsigned char *delimend; | |
168 static int fdelimlen; | |
169 static unsigned char *edelim; | |
170 static int edelimlen; | |
171 | |
172 #ifdef CONTENT_LENGTH | |
173 static int content_length = -1; | |
174 static long end_of_contents = -1; | |
175 #endif | |
176 | |
177 static int (*eom_action) () = NULL; | |
178 | |
179 #ifdef FILE__PTR | |
180 #define _ptr __ptr | |
181 #define _cnt __cnt | |
182 #endif | |
183 | |
184 #ifdef _FSTDIO | |
185 #define _ptr _p /* Gag */ | |
186 #define _cnt _r /* Retch */ | |
187 #define _filbuf __srget /* Puke */ | |
188 #endif | |
189 | |
190 /* */ | |
191 | |
192 m_getfld (state, name, buf, bufsz, iob) | |
193 int state; | |
194 int bufsz; | |
195 unsigned char *name, | |
196 *buf; | |
197 register FILE *iob; | |
198 { | |
199 register unsigned char *cp; | |
200 register unsigned char *bp; | |
201 register unsigned char *ep; | |
202 register unsigned char *sp; | |
203 register int cnt; | |
204 register int c; | |
205 register int i; | |
206 register int j; | |
207 | |
208 #ifdef CONTENT_LENGTH | |
209 /* | |
210 * When starting to read from a new file, we have to reset the state, | |
211 * but only if the state wasn't reset. That may save us a number of | |
212 * lseeks. | |
213 */ | |
214 if (state == FLD && | |
215 (content_length != -1 || end_of_contents != -1) && | |
216 ftell(iob) == 0) | |
217 end_of_contents = content_length = -1; | |
218 #endif | |
219 if ((c = Getc(iob)) < 0) { | |
220 msg_count = 0; | |
221 *buf = 0; | |
222 return FILEEOF; | |
223 } | |
224 if (eom (c, iob)) { | |
225 if (! eom_action) { | |
226 /* flush null messages */ | |
227 while ((c = Getc(iob)) >= 0 && eom (c, iob)) | |
228 ; | |
229 if (c >= 0) | |
230 (void) ungetc(c, iob); | |
231 } | |
232 msg_count = 0; | |
233 *buf = 0; | |
234 return FILEEOF; | |
235 } | |
236 | |
237 switch (state) { | |
238 case FLDEOF: | |
239 case BODYEOF: | |
240 case FLD: | |
241 if (c == '\n' || c == '-') { | |
242 /* we hit the header/body separator */ | |
243 while (c != '\n' && (c = Getc(iob)) >= 0) | |
244 ; | |
245 | |
246 #ifdef CONTENT_LENGTH | |
247 /* | |
248 * When we've found a content-length header, we're | |
249 * going to use it to tell where the message boundary | |
250 * is, if it is a valid mesage boundary. | |
251 * There can be a number of cases: | |
252 * - no bytes after <content-length> bytes: the usual format | |
253 * of a message in an MH folder. | |
254 * - only a newline - last message in mail drop. | |
255 * - "\nFrom " - beginning of next message | |
256 * - other - ignore Content-Length header, but issue warning | |
257 */ | |
258 if (msg_style == MS_UUCP && content_length != -1) { | |
259 long here = ftell(iob); | |
260 static char delim[] = "\nFrom "; | |
261 char buf[sizeof(delim)-1]; | |
262 int cnt; | |
263 | |
264 /* compute position of character after file */ | |
265 end_of_contents = here + content_length + 1; | |
266 content_length = -1; | |
267 /* And see whether this is a From header or eof. */ | |
268 fseek(iob, end_of_contents - 1, 0); | |
269 cnt = fread(buf, sizeof(char), sizeof(buf), iob); | |
270 if (cnt != 0 && (cnt != 1 || buf[0] != '\n') && | |
271 (cnt != sizeof(buf) || | |
272 strncmp(buf,delim, sizeof(buf)) != 0)) { | |
273 advise (NULLCP, "invalid Content-Length: header\n"); | |
274 end_of_contents = -1; | |
275 } | |
276 fseek(iob, here, 0); | |
277 } | |
278 #endif | |
279 if (c < 0 || (c = Getc(iob)) < 0 || eom (c, iob)) { | |
280 if (! eom_action) { | |
281 /* flush null messages */ | |
282 while ((c = Getc(iob)) >= 0 && eom (c, iob)) | |
283 ; | |
284 if (c >= 0) | |
285 (void) ungetc(c, iob); | |
286 } | |
287 msg_count = 0; | |
288 *buf = 0; | |
289 return FILEEOF; | |
290 } | |
291 state = BODY; | |
292 goto body; | |
293 } | |
294 /* | |
295 * get the name of this component. take characters up | |
296 * to a ':', a newline or NAMESZ-1 characters, whichever | |
297 * comes first. | |
298 */ | |
299 cp = name; i = NAMESZ - 1; | |
300 for (;;) { | |
301 #ifdef _STDIO_USES_IOSTREAM | |
302 bp = sp = (unsigned char *) iob->_IO_read_ptr - 1; | |
303 j = (cnt = ((long) iob->_IO_read_end - (long) iob->_IO_read_ptr) + 1) < i? | |
304 cnt: i; | |
305 #else | |
306 bp = sp = (unsigned char *) iob->_ptr - 1; | |
307 j = (cnt = iob->_cnt+1) < i ? cnt : i; | |
308 #endif | |
309 while (--j >= 0 && (c = *bp++) != ':' && c != '\n') | |
310 *cp++ = c; | |
311 | |
312 j = bp - sp; | |
313 if ((cnt -= j) <= 0) { | |
314 #ifdef _STDIO_USES_IOSTREAM | |
315 iob->_IO_read_ptr = iob->_IO_read_end; | |
316 if (__underflow((struct _IO_FILE *) iob) == EOF) { | |
317 #else | |
318 #ifdef FILBUF_ADJ | |
319 iob -> _ptr += iob -> _cnt; | |
320 iob -> _cnt = 0; | |
321 #endif /* FILBUF_ADJ */ | |
322 if (_filbuf(iob) == EOF) { | |
323 #endif | |
324 *cp = *buf = 0; | |
325 advise (NULLCP, "eof encountered in field \"%s\"", | |
326 name); | |
327 return FMTERR; | |
328 } | |
329 #ifdef _STDIO_USES_IOSTREAM | |
330 iob->_IO_read_ptr++; /* NOT automatic in __underflow()! */ | |
331 #endif | |
332 } else { | |
333 #ifdef _STDIO_USES_IOSTREAM | |
334 iob->_IO_read_ptr = bp + 1; | |
335 #else | |
336 iob->_ptr = bp + 1; | |
337 iob->_cnt = cnt - 1; | |
338 #endif | |
339 } | |
340 if (c == ':') | |
341 break; | |
342 | |
343 /* | |
344 * something went wrong. possibilities are: | |
345 * . hit a newline (error) | |
346 * . got more than namesz chars. (error) | |
347 * . hit the end of the buffer. (loop) | |
348 */ | |
349 if (c == '\n') { | |
350 *cp = *buf = 0; | |
351 advise (NULLCP, "eol encountered in field \"%s\"", name); | |
352 state = FMTERR; | |
353 goto finish; | |
354 } | |
355 if ((i -= j) <= 0) { | |
356 *cp = *buf = 0; | |
357 advise (NULLCP, "field name \"%s\" exceeds %d bytes", | |
358 name, NAMESZ - 1); | |
359 state = LENERR; | |
360 goto finish; | |
361 } | |
362 } | |
363 | |
364 while (isspace (*--cp) && cp >= name) | |
365 ; | |
366 *++cp = 0; | |
367 /* fall through */ | |
368 | |
369 case FLDPLUS: | |
370 /* | |
371 * get (more of) the text of a field. take | |
372 * characters up to the end of this field (newline | |
373 * followed by non-blank) or bufsz-1 characters. | |
374 */ | |
375 cp = buf; i = bufsz-1; | |
376 for (;;) { | |
377 #ifdef _STDIO_USES_IOSTREAM | |
378 cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr; | |
379 bp = (unsigned char *) --iob->_IO_read_ptr; | |
380 #else | |
381 cnt = iob->_cnt++; bp = (unsigned char *) --iob->_ptr; | |
382 #endif | |
383 c = cnt < i ? cnt : i; | |
384 while (ep = locc( c, bp, '\n' )) { | |
385 /* | |
386 * if we hit the end of this field, return. | |
387 */ | |
388 if ((j = *++ep) != ' ' && j != '\t') { | |
389 #ifdef _STDIO_USES_IOSTREAM | |
390 j = ep - (unsigned char *) iob->_IO_read_ptr; | |
391 (void) bcopy( iob->_IO_read_ptr, cp, j); | |
392 iob->_IO_read_ptr = ep; | |
393 #else | |
394 j = ep - (unsigned char *) iob->_ptr; | |
395 (void) bcopy( iob->_ptr, cp, j); | |
396 iob->_ptr = ep; iob->_cnt -= j; | |
397 #endif | |
398 cp += j; | |
399 state = FLD; | |
400 goto finish; | |
401 } | |
402 c -= ep - bp; bp = ep; | |
403 } | |
404 /* | |
405 * end of input or dest buffer - copy what we've found. | |
406 */ | |
407 #ifdef _STDIO_USES_IOSTREAM | |
408 c += bp - (unsigned char *) iob->_IO_read_ptr; | |
409 (void) bcopy( iob->_IO_read_ptr, cp, c); | |
410 #else | |
411 c += bp - (unsigned char *) iob->_ptr; | |
412 (void) bcopy( iob->_ptr, cp, c); | |
413 #endif | |
414 i -= c; cp += c; | |
415 if (i <= 0) { | |
416 /* the dest buffer is full */ | |
417 #ifdef _STDIO_USES_IOSTREAM | |
418 iob->_IO_read_ptr += c; | |
419 #else | |
420 iob->_cnt -= c; iob->_ptr += c; | |
421 #endif | |
422 state = FLDPLUS; | |
423 break; | |
424 } | |
425 /* | |
426 * There's one character left in the input buffer. | |
427 * Copy it & fill the buffer. If the last char | |
428 * was a newline and the next char is not whitespace, | |
429 * this is the end of the field. Otherwise loop. | |
430 */ | |
431 --i; | |
432 #ifdef _STDIO_USES_IOSTREAM | |
433 *cp++ = j = *(iob->_IO_read_ptr + c); | |
434 iob->_IO_read_ptr = iob->_IO_read_end; | |
435 c = __underflow((struct _IO_FILE *) iob); | |
436 iob->_IO_read_ptr++; /* NOT automatic! */ | |
437 #else | |
438 *cp++ = j = *(iob->_ptr + c); | |
439 #ifdef FILBUF_ADJ | |
440 iob -> _ptr += iob -> _cnt; | |
441 iob -> _cnt = 0; | |
442 #endif /* FILBUF_ADJ */ | |
443 c = _filbuf(iob); | |
444 #endif | |
445 /* bugfix, 03/1998. | |
446 * If we encounter EOF halfway through reading the value (ie there is | |
447 * no trailing \n in the field) then __underflow() above returns EOF. | |
448 * Previously we didn't check for this, with the result that we then | |
449 * attempt to read from the stream and wind up segfaulting doing a | |
450 * bcopy() with length parameter -1. Instead, we just append a newline | |
451 * to what we've read, so the following conditional will pick it up | |
452 * and return the field value. Then the EOF is actually dealt with | |
453 * the next time this function is called. | |
454 * The reason for appending \n is because the callers are known to | |
455 * work with 'name: value\n', and setting j to '\n' is known to exit | |
456 * in the right way, and the less we change the less likely | |
457 * we are to introduce new bugs. And I'm scared of the curse in the | |
458 * comments at the top of this file :-> | |
459 * -- PMM (pmaydell@chiark.greenend.org.uk) | |
460 */ | |
461 if (c == EOF && j != '\0' && j != '\n') { | |
462 *cp++ = j = '\n'; | |
463 advise (NULLCP, "file missing final eol"); | |
464 } | |
465 /* bugfix end */ | |
466 if ((j == '\0' || j == '\n') && c != ' ' && c != '\t') { | |
467 if (c != EOF) | |
468 #ifdef _STDIO_USES_IOSTREAM | |
469 --iob->_IO_read_ptr; | |
470 #else | |
471 --iob->_ptr, ++iob->_cnt; | |
472 #endif | |
473 state = FLD; | |
474 break; | |
475 } | |
476 } | |
477 break; | |
478 | |
479 case BODY: | |
480 body: | |
481 /* | |
482 * get the message body up to bufsz characters or the | |
483 * end of the message. Sleazy hack: if bufsz is negative | |
484 * we assume that we were called to copy directly into | |
485 * the output buffer and we don't add an eos. | |
486 */ | |
487 i = (bufsz < 0) ? -bufsz : bufsz-1; | |
488 #ifdef _STDIO_USES_IOSTREAM | |
489 bp = (unsigned char *) --iob->_IO_read_ptr; | |
490 cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr; | |
491 #else | |
492 bp = (unsigned char *) --iob->_ptr; cnt = ++iob->_cnt; | |
493 #endif | |
494 c = (cnt < i ? cnt : i); | |
495 if (msg_style != MS_DEFAULT && c > 1) { | |
496 /* | |
497 * packed maildrop - only take up to the (possible) | |
498 * start of the next message. This "matchc" should | |
499 * probably be a Boyer-Moore matcher for non-vaxen, | |
500 * particularly since we have the alignment table | |
501 * all built for the end-of-buffer test (next). | |
502 * But our vax timings indicate that the "matchc" | |
503 * instruction is 50% faster than a carefully coded | |
504 * B.M. matcher for most strings. (So much for elegant | |
505 * algorithms vs. brute force.) Since I (currently) | |
506 * run MH on a vax, we use the matchc instruction. --vj | |
507 */ | |
508 if (ep = matchc( fdelimlen, fdelim, c, bp ) ) | |
509 c = ep - bp + 1; | |
510 else { | |
511 /* | |
512 * There's no delim in the buffer but there may be | |
513 * a partial one at the end. If so, we want to leave | |
514 * it so the "eom" check on the next call picks it up. | |
515 * Use a modified Boyer-Moore matcher to make this | |
516 * check relatively cheap. The first "if" figures | |
517 * out what position in the pattern matches the last | |
518 * character in the buffer. The inner "while" matches | |
519 * the pattern against the buffer, backwards starting | |
520 * at that position. Note that unless the buffer | |
521 * ends with one of the characters in the pattern | |
522 * (excluding the first and last), we do only one test. | |
523 */ | |
524 ep = bp + c - 1; | |
525 if (sp = pat_map[*ep & 0x00ff]) { | |
526 do { | |
527 cp = sp; | |
528 while (*--ep == *--cp) | |
529 ; | |
530 if (cp < fdelim) { | |
531 if (ep >= bp) | |
532 /* | |
533 * ep < bp means that all the buffer | |
534 * contains is a prefix of delim. | |
535 * If this prefix is really a delim, the | |
536 * m_eom call at entry should have found | |
537 * it. Thus it's not a delim and we can | |
538 * take all of it. | |
539 */ | |
540 c = (ep - bp) + 2; | |
541 break; | |
542 } | |
543 /* try matching one less char of delim string */ | |
544 ep = bp + c - 1; | |
545 } while (--sp > fdelim); | |
546 } | |
547 } | |
548 } | |
549 (void) bcopy( bp, buf, c ); | |
550 #ifdef _STDIO_USES_IOSTREAM | |
551 iob->_IO_read_ptr += c; | |
552 #else | |
553 iob->_cnt -= c; | |
554 iob->_ptr += c; | |
555 #endif | |
556 if (bufsz < 0) { | |
557 msg_count = c; | |
558 return (state); | |
559 } | |
560 cp = buf + c; | |
561 break; | |
562 | |
563 default: | |
564 adios (NULLCP, "m_getfld() called with bogus state of %d", state); | |
565 } | |
566 finish:; | |
567 *cp = 0; | |
568 msg_count = cp - buf; | |
569 | |
570 #ifdef CONTENT_LENGTH | |
571 /* Check whether this was a Content-Length header */ | |
572 if (msg_style == MS_UUCP && state == FLD && | |
573 uleq((char*)"content-length", (char*) name)) { | |
574 content_length = atoi(buf); | |
575 /* This value is computed when end-of-headers is detected */ | |
576 end_of_contents = -1; | |
577 } | |
578 #endif | |
579 return (state); | |
580 } | |
581 | |
582 /* */ | |
583 | |
584 #ifdef RPATHS | |
585 static char unixbuf[BUFSIZ] = ""; | |
586 #endif /* RPATHS */ | |
587 | |
588 void | |
589 m_unknown(iob) | |
590 register FILE *iob; | |
591 { | |
592 register int c; | |
593 register long pos; | |
594 char text[10]; | |
595 register char *cp; | |
596 register char *delimstr; | |
597 | |
598 msg_style = MS_UNKNOWN; | |
599 | |
600 /* Figure out what the message delimitter string is for this | |
601 * maildrop. (This used to be part of m_Eom but I didn't like | |
602 * the idea of an "if" statement that could only succeed on the | |
603 * first call to m_Eom getting executed on each call, i.e., at | |
604 * every newline in the message). | |
605 * | |
606 * If the first line of the maildrop is a Unix "from" line, we say the | |
607 * style is UUCP and eat the rest of the line. Otherwise we say the style | |
608 * is MMDF & look for the delimiter string specified when MH was built | |
609 * (or from the mtstailor file). | |
610 */ | |
611 pos = ftell (iob); | |
612 if (fread (text, sizeof *text, 5, iob) == 5 | |
613 && strncmp (text, "From ", 5) == 0) { | |
614 msg_style = MS_UUCP; | |
615 delimstr = "\nFrom "; | |
616 #ifndef RPATHS | |
617 while ((c = getc (iob)) != '\n' && c >= 0) | |
618 ; | |
619 #else /* RPATHS */ | |
620 cp = unixbuf; | |
621 while ((c = getc (iob)) != '\n') | |
622 *cp++ = c; | |
623 *cp = 0; | |
624 #endif /* RPATHS */ | |
625 } else { | |
626 /* not a Unix style maildrop */ | |
627 (void) fseek (iob, pos, 0); | |
628 if (mmdlm2 == NULLCP || *mmdlm2 == 0) | |
629 mmdlm2 = "\001\001\001\001\n"; | |
630 delimstr = mmdlm2; | |
631 msg_style = MS_MMDF; | |
632 } | |
633 c = strlen (delimstr); | |
634 fdelim = (unsigned char *)malloc((unsigned)c + 3); | |
635 *fdelim++ = '\0'; | |
636 *fdelim = '\n'; | |
637 msg_delim = (char *)fdelim+1; | |
638 edelim = (unsigned char *)msg_delim+1; | |
639 fdelimlen = c + 1; | |
640 edelimlen = c - 1; | |
641 (void)strcpy(msg_delim, delimstr); | |
642 delimend = (unsigned char *)msg_delim + edelimlen; | |
643 if (edelimlen <= 1) | |
644 adios (NULLCP, "maildrop delimiter must be at least 2 bytes"); | |
645 /* | |
646 * build a Boyer-Moore end-position map for the matcher in m_getfld. | |
647 * N.B. - we don't match just the first char (since it's the newline | |
648 * separator) or the last char (since the matchc would have found it | |
649 * if it was a real delim). | |
650 */ | |
651 pat_map = (unsigned char **) calloc (256, sizeof (unsigned char *)); | |
652 | |
653 for (cp = (char *)fdelim + 1; cp < (char *)delimend; cp++ ) | |
654 pat_map[*cp] = (unsigned char *)cp; | |
655 | |
656 if (msg_style == MS_MMDF) { | |
657 /* flush extra msg hdrs */ | |
658 while ((c = Getc(iob)) >= 0 && eom (c, iob)) | |
659 ; | |
660 if (c >= 0) | |
661 (void) ungetc(c, iob); | |
662 } | |
663 } | |
664 | |
665 | |
666 void m_eomsbr (action) | |
667 int (*action) (); | |
668 { | |
669 if (eom_action = action) { | |
670 msg_style = MS_MSH; | |
671 *msg_delim = 0; | |
672 fdelimlen = 1; | |
673 delimend = fdelim; | |
674 } else { | |
675 msg_style = MS_MMDF; | |
676 msg_delim = (char *)fdelim + 1; | |
677 fdelimlen = strlen((char *)fdelim); | |
678 delimend = (unsigned char *)(msg_delim + edelimlen); | |
679 } | |
680 } | |
681 | |
682 /* */ | |
683 | |
684 /* test for msg delimiter string */ | |
685 | |
686 int m_Eom (c, iob) | |
687 register int c; | |
688 register FILE *iob; | |
689 { | |
690 register long pos = 0L; | |
691 register int i; | |
692 char text[10]; | |
693 #ifdef RPATHS | |
694 register char *cp; | |
695 #endif /* RPATHS */ | |
696 | |
697 pos = ftell (iob); | |
698 | |
699 #ifdef CONTENT_LENGTH | |
700 if (msg_style == MS_UUCP && end_of_contents != -1) { | |
701 if (end_of_contents == pos) { | |
702 end_of_contents = -1; | |
703 if ((fread (text, sizeof *text, edelimlen, iob) == edelimlen) | |
704 && (strncmp (text, (char *)edelim, edelimlen) == 0)) { | |
705 #ifndef RPATHS | |
706 while ((c = getc (iob)) != '\n') | |
707 if (c < 0) | |
708 break; | |
709 #else /* RPATHS */ | |
710 cp = unixbuf; | |
711 while ((c = getc (iob)) != '\n' && c >= 0) | |
712 *cp++ = c; | |
713 *cp = 0; | |
714 #endif /* RPATHS */ | |
715 } | |
716 return 1; | |
717 } | |
718 /* we've read past the end of a message, this should never happen | |
719 * because of the other checks we do */ | |
720 if (end_of_contents < pos) { | |
721 end_of_contents = -1; | |
722 adios(NULLCP, | |
723 "Content-Length: header broken, can't read mailbox\n"); | |
724 } | |
725 return 0; | |
726 } | |
727 #endif | |
728 | |
729 if ((i = fread (text, sizeof *text, edelimlen, iob)) != edelimlen | |
730 || strncmp (text, (char *)edelim, edelimlen)) { | |
731 if (i == 0 && msg_style == MS_UUCP) | |
732 /* the final newline in the (brain damaged) unix-format | |
733 * maildrop is part of the delimitter - delete it. | |
734 */ | |
735 return 1; | |
736 | |
737 #ifdef notdef | |
738 (void) fseek (iob, pos, 0); | |
739 #else | |
740 (void) fseek (iob, (long)(pos-1), 0); | |
741 (void) getc (iob); /* should be OK */ | |
742 #endif /* !notdef */ | |
743 return 0; | |
744 } | |
745 | |
746 #ifdef CONTENT_LENGTH | |
747 /* There's one extra special case to be considered here: | |
748 * content_length > 0. That we got here is because the | |
749 * message body starts with "From " | |
750 */ | |
751 if (msg_style == MS_UUCP && content_length > 0) { | |
752 (void) fseek (iob, (long)(pos-1), 0); | |
753 (void) getc (iob); /* should be OK */ | |
754 return 0; | |
755 } | |
756 #endif | |
757 | |
758 if (msg_style == MS_UUCP) { | |
759 #ifndef RPATHS | |
760 while ((c = getc (iob)) != '\n') | |
761 if (c < 0) | |
762 break; | |
763 #else /* RPATHS */ | |
764 cp = unixbuf; | |
765 while ((c = getc (iob)) != '\n' && c >= 0) | |
766 *cp++ = c; | |
767 *cp = 0; | |
768 #endif /* RPATHS */ | |
769 } | |
770 | |
771 return 1; | |
772 } | |
773 | |
774 /* */ | |
775 | |
776 #ifdef RPATHS | |
777 char *unixline () { | |
778 register char *cp, | |
779 *dp, | |
780 *pp; | |
781 static char unixfrom[BUFSIZ]; | |
782 int i; | |
783 | |
784 pp = unixfrom; | |
785 if (cp = dp = index (unixbuf, ' ')) { | |
786 while (cp = index (cp + 1, 'r')) | |
787 if (strncmp (cp, "remote from ", 12) == 0) { | |
788 *cp = 0; | |
789 (void) sprintf (pp, "%s!", cp + 12); | |
790 pp += strlen (pp); | |
791 break; | |
792 } | |
793 if (cp == NULL) | |
794 cp = unixbuf + strlen (unixbuf); | |
795 #if 0 | |
796 if ((cp -= 25) >= dp) | |
797 #else | |
798 /* On most of BSD systems, the date field length of UNIX From line | |
799 is 25, but it's not suitable for other systems. We should not | |
800 use this length. */ | |
801 while (cp > dp && *--cp != ':') | |
802 ; | |
803 for (i = 0; i < 4 && cp > dp; i++) { | |
804 while (!isspace(*--cp)) | |
805 ; | |
806 while (isspace(*(cp - 1))) | |
807 --cp; | |
808 } | |
809 if (cp >= dp) | |
810 #endif | |
811 *cp = 0; | |
812 } | |
813 | |
814 (void) sprintf (pp, "%s\n", unixbuf); | |
815 unixbuf[0] = 0; | |
816 return unixfrom; | |
817 } | |
818 #endif /* RPATHS */ | |
819 | |
820 /* */ | |
821 | |
822 /* matchc: find the first occurrence of string pat in string str. | |
823 * We can't use the C library routine strstr because the string | |
824 * won't have a trailing NUL. See also the note about using a | |
825 * Boyer-Moore search on non-Vaxen (in the only place this fn | |
826 * is used...) | |
827 */ | |
828 #if (vax && !lint) | |
829 asm(".align 1"); | |
830 asm("_matchc: .word 0"); | |
831 asm(" movq 4(ap),r0"); | |
832 asm(" movq 12(ap),r2"); | |
833 asm(" matchc r0,(r1),r2,(r3)"); | |
834 asm(" beql 1f"); | |
835 asm(" movl 4(ap),r3"); | |
836 asm("1: subl3 4(ap),r3,r0"); | |
837 asm(" ret"); | |
838 #else | |
839 static unsigned char * | |
840 matchc( patln, pat, strln, str ) | |
841 int patln; | |
842 char *pat; | |
843 int strln; | |
844 register char *str; | |
845 { | |
846 register char *es = str + strln - patln; | |
847 register char *sp; | |
848 register char *pp; | |
849 register char *ep = pat + patln; | |
850 register char pc = *pat++; | |
851 | |
852 /* es is a pointer to the last character we need to | |
853 * check (the pattern can't start beyond it because then | |
854 * the end of the pattern would be beyond the end of the | |
855 * string). | |
856 */ | |
857 for(;;) { | |
858 /* Search for the next occurrence of pc (first character | |
859 * in the pattern. | |
860 */ | |
861 do { | |
862 if (str > es) | |
863 return 0; | |
864 } while (pc != *str++); | |
865 | |
866 /* At this point we have a match for the first | |
867 * character and basically do a strcmp() for the | |
868 * rest of the pattern. We know that the pattern | |
869 * will fit in the remainder of the string because | |
870 * of the es check. | |
871 */ | |
872 sp = str; pp = pat; | |
873 while (pp < ep && *sp++ == *pp) | |
874 pp++; | |
875 | |
876 if (pp >= ep) /* whole pattern matched? */ | |
877 return ((unsigned char *)--str); | |
878 | |
879 /* If we get this far then it wasn't a good match, | |
880 * so go back to looking for the first character in | |
881 * the pattern. | |
882 */ | |
883 | |
884 } | |
885 } | |
886 #endif | |
887 | |
888 /* */ | |
889 | |
890 /* | |
891 * Locate character "term" in the next "cnt" characters of "src". | |
892 * If found, return its address, otherwise return 0. | |
893 */ | |
894 #if (vax && !lint) | |
895 asm(".align 1"); | |
896 asm("_locc: .word 0"); | |
897 asm(" movq 4(ap),r0"); | |
898 asm(" locc 12(ap),r0,(r1)"); | |
899 asm(" beql 1f"); | |
900 asm(" movl r1,r0"); | |
901 asm("1: ret"); | |
902 #else | |
903 static unsigned char * | |
904 locc( cnt, src, term ) | |
905 register int cnt; | |
906 register unsigned char *src; | |
907 register unsigned char term; | |
908 { | |
909 while (*src++ != term && --cnt > 0); | |
910 | |
911 return (cnt > 0 ? --src : (unsigned char *)0); | |
912 } | |
913 #endif | |
914 | |
915 /* */ | |
916 | |
917 #if !defined (BSD42) && !defined (bcopy) | |
918 int bcmp (b1, b2, length) | |
919 register char *b1, | |
920 *b2; | |
921 register int length; | |
922 { | |
923 while (length-- > 0) | |
924 if (*b1++ != *b2++) | |
925 return 1; | |
926 | |
927 return 0; | |
928 } | |
929 | |
930 | |
931 bcopy (b1, b2, length) | |
932 register char *b1, | |
933 *b2; | |
934 register int length; | |
935 { | |
936 while (length-- > 0) | |
937 *b2++ = *b1++; | |
938 } | |
939 | |
940 | |
941 bzero (b, length) | |
942 register char *b; | |
943 register int length; | |
944 { | |
945 while (length-- > 0) | |
946 *b++ = 0; | |
947 } | |
948 #endif /* not BSD42 */ |