X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=mailinfo.c;h=5b6c2157ede415e019099098f2d0dc522b1e7a27;hb=fb6a9f93d39e4e5fdb83673a927f71a34e9fb7c0;hp=cb853df993f37d74576e308008379d709361a447;hpb=8fc66df237afce0b4318657f166b3583831949f3;p=git.git diff --git a/mailinfo.c b/mailinfo.c index cb853df9..5b6c2157 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -7,16 +7,16 @@ #include #include #include +#ifndef NO_ICONV #include - -#ifdef NO_STRCASESTR -extern char *gitstrcasestr(const char *haystack, const char *needle); #endif +#include "git-compat-util.h" +#include "cache.h" static FILE *cmitmsg, *patchfile; static int keep_subject = 0; -static int metainfo_utf8 = 0; +static char *metainfo_charset = NULL; static char line[1000]; static char date[1000]; static char name[1000]; @@ -42,13 +42,46 @@ static char *sanity_check(char *name, char *email) return name; } -static int handle_from(char *line) +static int bogus_from(char *line) +{ + /* John Doe */ + char *bra, *ket, *dst, *cp; + + /* This is fallback, so do not bother if we already have an + * e-mail address. + */ + if (*email) + return 0; + + bra = strchr(line, '<'); + if (!bra) + return 0; + ket = strchr(bra, '>'); + if (!ket) + return 0; + + for (dst = email, cp = bra+1; cp < ket; ) + *dst++ = *cp++; + *dst = 0; + for (cp = line; isspace(*cp); cp++) + ; + for (bra--; isspace(*bra); bra--) + *bra = 0; + cp = sanity_check(cp, email); + strcpy(name, cp); + return 1; +} + +static int handle_from(char *in_line) { - char *at = strchr(line, '@'); + char line[1000]; + char *at; char *dst; + strcpy(line, in_line); + at = strchr(line, '@'); if (!at) - return 0; + return bogus_from(line); /* * If we already have one email, don't take any confusing lines @@ -207,38 +240,46 @@ static int eatspace(char *line) #define SEEN_FROM 01 #define SEEN_DATE 02 #define SEEN_SUBJECT 04 +#define SEEN_BOGUS_UNIX_FROM 010 +#define SEEN_PREFIX 020 /* First lines of body can have From:, Date:, and Subject: */ -static int handle_inbody_header(int *seen, char *line) +static void handle_inbody_header(int *seen, char *line) { + if (!memcmp(">From", line, 5) && isspace(line[5])) { + if (!(*seen & SEEN_BOGUS_UNIX_FROM)) { + *seen |= SEEN_BOGUS_UNIX_FROM; + return; + } + } if (!memcmp("From:", line, 5) && isspace(line[5])) { if (!(*seen & SEEN_FROM) && handle_from(line+6)) { *seen |= SEEN_FROM; - return 1; + return; } } if (!memcmp("Date:", line, 5) && isspace(line[5])) { if (!(*seen & SEEN_DATE)) { handle_date(line+6); *seen |= SEEN_DATE; - return 1; + return; } } if (!memcmp("Subject:", line, 8) && isspace(line[8])) { if (!(*seen & SEEN_SUBJECT)) { handle_subject(line+9); *seen |= SEEN_SUBJECT; - return 1; + return; } } if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { if (!(*seen & SEEN_SUBJECT)) { handle_subject(line); *seen |= SEEN_SUBJECT; - return 1; + return; } } - return 0; + *seen |= SEEN_PREFIX; } static char *cleanup_subject(char *subject) @@ -294,6 +335,7 @@ static void cleanup_space(char *buf) } } +static void decode_header_bq(char *it); typedef int (*header_fn_t)(char *); struct header_def { const char *name; @@ -301,7 +343,7 @@ struct header_def { int namelen; }; -static void check_header(char *line, int len, struct header_def *header) +static void check_header(char *line, struct header_def *header) { int i; @@ -313,13 +355,17 @@ static void check_header(char *line, int len, struct header_def *header) int len = header[i].namelen; if (!strncasecmp(line, header[i].name, len) && line[len] == ':' && isspace(line[len + 1])) { + /* Unwrap inline B and Q encoding, and optionally + * normalize the meta information to utf8. + */ + decode_header_bq(line + len + 2); header[i].func(line + len + 2); break; } } } -static void check_subheader_line(char *line, int len) +static void check_subheader_line(char *line) { static struct header_def header[] = { { "Content-Type", handle_subcontent_type }, @@ -327,9 +373,9 @@ static void check_subheader_line(char *line, int len) handle_content_transfer_encoding }, { NULL }, }; - check_header(line, len, header); + check_header(line, header); } -static void check_header_line(char *line, int len) +static void check_header_line(char *line) { static struct header_def header[] = { { "From", handle_from }, @@ -340,7 +386,30 @@ static void check_header_line(char *line, int len) handle_content_transfer_encoding }, { NULL }, }; - check_header(line, len, header); + check_header(line, header); +} + +static int is_rfc2822_header(char *line) +{ + /* + * The section that defines the loosest possible + * field name is "3.6.8 Optional fields". + * + * optional-field = field-name ":" unstructured CRLF + * field-name = 1*ftext + * ftext = %d33-57 / %59-126 + */ + int ch; + char *cp = line; + while ((ch = *cp++)) { + if (ch == ':') + return cp != line; + if ((33 <= ch && ch <= 57) || + (59 <= ch && ch <= 126)) + continue; + break; + } + return 0; } static int read_one_header_line(char *line, int sz, FILE *in) @@ -349,18 +418,25 @@ static int read_one_header_line(char *line, int sz, FILE *in) while (ofs < sz) { int peek, len; if (fgets(line + ofs, sz - ofs, in) == NULL) - return ofs; + break; len = eatspace(line + ofs); if (len == 0) - return ofs; - peek = fgetc(in); ungetc(peek, in); - if (peek == ' ' || peek == '\t') { - /* Yuck, 2822 header "folding" */ - ofs += len; - continue; + break; + if (!is_rfc2822_header(line)) { + /* Re-add the newline */ + line[ofs + len] = '\n'; + line[ofs + len + 1] = '\0'; + break; } - return ofs + len; + ofs += len; + /* Yuck, 2822 header "folding" */ + peek = fgetc(in); ungetc(peek, in); + if (peek != ' ' && peek != '\t') + break; } + /* Count mbox From headers as headers */ + if (!ofs && !memcmp(line, "From ", 5)) + ofs = 1; return ofs; } @@ -375,7 +451,7 @@ static unsigned hexval(int c) return ~0; } -static int decode_q_segment(char *in, char *ot, char *ep) +static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047) { int c; while ((c = *in++) != 0 && (in <= ep)) { @@ -384,9 +460,11 @@ static int decode_q_segment(char *in, char *ot, char *ep) if (d == '\n' || !d) break; /* drop trailing newline */ *ot++ = ((hexval(d) << 4) | hexval(*in++)); + continue; } - else - *ot++ = c; + if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */ + c = 0x20; + *ot++ = c; } *ot = 0; return 0; @@ -441,29 +519,40 @@ static int decode_b_segment(char *in, char *ot, char *ep) static void convert_to_utf8(char *line, char *charset) { - if (*charset) { - char *in, *out; - size_t insize, outsize, nrc; - char outbuf[4096]; /* cheat */ - iconv_t conv = iconv_open("utf-8", charset); - - if (conv == (iconv_t) -1) { - fprintf(stderr, "cannot convert from %s to utf-8\n", - charset); +#ifndef NO_ICONV + char *in, *out; + size_t insize, outsize, nrc; + char outbuf[4096]; /* cheat */ + static char latin_one[] = "latin1"; + char *input_charset = *charset ? charset : latin_one; + iconv_t conv = iconv_open(metainfo_charset, input_charset); + + if (conv == (iconv_t) -1) { + static int warned_latin1_once = 0; + if (input_charset != latin_one) { + fprintf(stderr, "cannot convert from %s to %s\n", + input_charset, metainfo_charset); *charset = 0; - return; } - in = line; - insize = strlen(in); - out = outbuf; - outsize = sizeof(outbuf); - nrc = iconv(conv, &in, &insize, &out, &outsize); - iconv_close(conv); - if (nrc == (size_t) -1) - return; - *out = 0; - strcpy(line, outbuf); + else if (!warned_latin1_once) { + warned_latin1_once = 1; + fprintf(stderr, "tried to convert from %s to %s, " + "but your iconv does not work with it.\n", + input_charset, metainfo_charset); + } + return; } + in = line; + insize = strlen(in); + out = outbuf; + outsize = sizeof(outbuf); + nrc = iconv(conv, &in, &insize, &out, &outsize); + iconv_close(conv); + if (nrc == (size_t) -1) + return; + *out = 0; + strcpy(line, outbuf); +#endif } static void decode_header_bq(char *it) @@ -506,12 +595,12 @@ static void decode_header_bq(char *it) sz = decode_b_segment(cp + 3, piecebuf, ep); break; case 'q': - sz = decode_q_segment(cp + 3, piecebuf, ep); + sz = decode_q_segment(cp + 3, piecebuf, ep, 1); break; } if (sz < 0) return; - if (metainfo_utf8) + if (metainfo_charset) convert_to_utf8(piecebuf, charset_q); strcpy(out, piecebuf); out += strlen(out); @@ -528,7 +617,7 @@ static void decode_transfer_encoding(char *line) switch (transfer_encoding) { case TE_QP: ep = line + strlen(line); - decode_q_segment(line, line, ep); + decode_q_segment(line, line, ep, 0); break; case TE_BASE64: ep = line + strlen(line); @@ -542,25 +631,13 @@ static void decode_transfer_encoding(char *line) static void handle_info(void) { char *sub; - static int done_info = 0; - - if (done_info) - return; - done_info = 1; sub = cleanup_subject(subject); cleanup_space(name); cleanup_space(date); cleanup_space(email); cleanup_space(sub); - /* Unwrap inline B and Q encoding, and optionally - * normalize the meta information to utf8. - */ - decode_header_bq(name); - decode_header_bq(date); - decode_header_bq(email); - decode_header_bq(sub); printf("Author: %s\nEmail: %s\nSubject: %s\nDate: %s\n\n", name, email, sub, date); } @@ -568,7 +645,7 @@ static void handle_info(void) /* We are inside message body and have read line[] already. * Spit out the commit log. */ -static int handle_commit_msg(void) +static int handle_commit_msg(int *seen) { if (!cmitmsg) return 0; @@ -590,8 +667,13 @@ static int handle_commit_msg(void) * normalize the log message to UTF-8. */ decode_transfer_encoding(line); - if (metainfo_utf8) + if (metainfo_charset) convert_to_utf8(line, charset); + + handle_inbody_header(seen, line); + if (!(*seen & SEEN_PREFIX)) + continue; + fputs(line, cmitmsg); } while (fgets(line, sizeof(line), stdin) != NULL); fclose(cmitmsg); @@ -623,26 +705,16 @@ static void handle_patch(void) * that the first part to contain commit message and a patch, and * handle other parts as pure patches. */ -static int handle_multipart_one_part(void) +static int handle_multipart_one_part(int *seen) { - int seen = 0; int n = 0; - int len; while (fgets(line, sizeof(line), stdin) != NULL) { again: - len = eatspace(line); n++; - if (!len) - continue; if (is_multipart_boundary(line)) break; - if (0 <= seen && handle_inbody_header(&seen, line)) - continue; - seen = -1; /* no more inbody headers */ - line[len] = '\n'; - handle_info(); - if (handle_commit_msg()) + if (handle_commit_msg(seen)) goto again; handle_patch(); break; @@ -654,6 +726,7 @@ static int handle_multipart_one_part(void) static void handle_multipart_body(void) { + int seen = 0; int part_num = 0; /* Skip up to the first boundary */ @@ -666,13 +739,16 @@ static void handle_multipart_body(void) return; /* We are on boundary line. Start slurping the subhead. */ while (1) { - int len = read_one_header_line(line, sizeof(line), stdin); - if (!len) { - if (handle_multipart_one_part() < 0) + int hdr = read_one_header_line(line, sizeof(line), stdin); + if (!hdr) { + if (handle_multipart_one_part(&seen) < 0) return; + /* Reset per part headers */ + transfer_encoding = TE_DONTCARE; + charset[0] = 0; } else - check_subheader_line(line, len); + check_subheader_line(line); } fclose(patchfile); if (!patch_lines) { @@ -686,18 +762,9 @@ static void handle_body(void) { int seen = 0; - while (fgets(line, sizeof(line), stdin) != NULL) { - int len = eatspace(line); - if (!len) - continue; - if (0 <= seen && handle_inbody_header(&seen, line)) - continue; - seen = -1; /* no more inbody headers */ - line[len] = '\n'; - handle_info(); - handle_commit_msg(); + if (line[0] || fgets(line, sizeof(line), stdin) != NULL) { + handle_commit_msg(&seen); handle_patch(); - break; } fclose(patchfile); if (!patch_lines) { @@ -707,27 +774,29 @@ static void handle_body(void) } static const char mailinfo_usage[] = - "git-mailinfo [-k] [-u] msg patch info"; - -static void usage(void) { - fprintf(stderr, "%s\n", mailinfo_usage); - exit(1); -} + "git-mailinfo [-k] [-u | --encoding=] msg patch info"; int main(int argc, char **argv) { + /* NEEDSWORK: might want to do the optional .git/ directory + * discovery + */ + git_config(git_default_config); + while (1 < argc && argv[1][0] == '-') { if (!strcmp(argv[1], "-k")) keep_subject = 1; else if (!strcmp(argv[1], "-u")) - metainfo_utf8 = 1; + metainfo_charset = git_commit_encoding; + else if (!strncmp(argv[1], "--encoding=", 11)) + metainfo_charset = argv[1] + 11; else - usage(); + usage(mailinfo_usage); argc--; argv++; } if (argc != 3) - usage(); + usage(mailinfo_usage); cmitmsg = fopen(argv[1], "w"); if (!cmitmsg) { perror(argv[1]); @@ -739,15 +808,16 @@ int main(int argc, char **argv) exit(1); } while (1) { - int len = read_one_header_line(line, sizeof(line), stdin); - if (!len) { + int hdr = read_one_header_line(line, sizeof(line), stdin); + if (!hdr) { if (multipart_boundary[0]) handle_multipart_body(); else handle_body(); + handle_info(); break; } - check_header_line(line, len); + check_header_line(line); } return 0; }