Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

ship with par

  • Loading branch information...
commit aed2fbc56590cae04a6252d5c711cea251a7df23 1 parent aaf35ac
@martinh martinh authored
View
17 par/Makefile
@@ -0,0 +1,17 @@
+#! /usr/bin/make
+
+BIN=$(DESTDIR)/usr/bin
+DOC=$(DESTDIR)/usr/share/doc/par
+MAN=$(DESTDIR)/usr/share/man/man1
+
+include protoMakefile
+
+CC = cc $(CFLAGS) -c
+
+install: par par.doc
+ install -o root -g root -m 0755 par $(BIN)/par
+ install -d $(DOC) -o root -g root -m 0755
+ install -o root -g root -m 0644 par.doc $(DOC)
+ install -d $(MAN) -o root -g root -m 0755
+ install -o root -g root -m 0644 par.1 $(MAN)
+
View
215 par/buffer.c
@@ -0,0 +1,215 @@
+/***********************/
+/* buffer.c */
+/* for Par 1.52-i18n.3 */
+/* Copyright 2001 by */
+/* Adam M. Costello */
+/***********************/
+
+/* This is ANSI C code (C89). */
+
+
+/* additem(), copyitems(), and nextitem() rely on the fact that */
+/* sizeof (char) is 1. See section A7.4.8 of The C Programming */
+/* Language, Second Edition, by Kerninghan and Ritchie. */
+
+
+#include "buffer.h" /* Makes sure we're consistent with the prototypes. */
+ /* Also includes <stddef.h> and "errmsg.h". */
+
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+
+#undef NULL
+#define NULL ((void *) 0)
+
+#ifdef DONTFREE
+#define free(ptr)
+#endif
+
+
+struct buffer {
+ struct block *firstblk, /* The first block. */
+ *current, /* The last non-empty block, or */
+ /* firstblk if all are empty. */
+ *nextblk; /* The block containing the item to be */
+ /* returned by nextitem(), or NULL. */
+ int nextindex; /* Index of item in nextblock->items. */
+ size_t itemsize; /* The size of an item. */
+};
+
+typedef struct block {
+ struct block *next; /* The next block, or NULL if none. */
+ void *items; /* Storage for the items in this block. */
+ int maxhere, /* Number of items that fit in *items. */
+ numprevious, /* Total of numhere for all previous blocks. */
+ numhere; /* The first numhere slots in *items are filled. */
+} block;
+
+
+buffer *newbuffer(size_t itemsize, errmsg_t errmsg)
+{
+ buffer *buf;
+ block *blk;
+ void *items;
+ int maxhere;
+
+ maxhere = 124 / itemsize;
+ if (maxhere < 4) maxhere = 4;
+
+ buf = malloc(sizeof (buffer));
+ blk = malloc(sizeof (block));
+ items = malloc(maxhere * itemsize);
+ if (!buf || !blk || !items) {
+ wcscpy(errmsg,outofmem);
+ goto nberror;
+ }
+
+ buf->itemsize = itemsize;
+ buf->firstblk = buf->current = buf->nextblk = blk;
+ buf->nextindex = 0;
+ blk->next = NULL;
+ blk->numprevious = blk->numhere = 0;
+ blk->maxhere = maxhere;
+ blk->items = items;
+
+ *errmsg = '\0';
+ return buf;
+
+nberror:
+
+ if (buf) free(buf);
+ if (blk) free(blk);
+ if (items) free(items);
+ return NULL;
+}
+
+
+void freebuffer(buffer *buf)
+{
+ block *blk, *tmp;
+
+ blk = buf->firstblk;
+ while (blk) {
+ tmp = blk;
+ blk = blk->next;
+ if (tmp->items) free(tmp->items);
+ free(tmp);
+ }
+
+ free(buf);
+}
+
+
+void clearbuffer(buffer *buf)
+{
+ block *blk;
+
+ for (blk = buf->firstblk; blk; blk = blk->next)
+ blk->numhere = 0;
+
+ buf->current = buf->firstblk;
+}
+
+
+void additem(buffer *buf, const void *item, errmsg_t errmsg)
+{
+ block *blk, *new;
+ void *items;
+ int maxhere;
+ size_t itemsize = buf->itemsize;
+
+ blk = buf->current;
+
+ if (blk->numhere == blk->maxhere) {
+ new = blk->next;
+ if (!new) {
+ maxhere = 2 * blk->maxhere;
+ new = malloc(sizeof (block));
+ items = malloc(maxhere * itemsize);
+ if (!new || !items) {
+ wcscpy(errmsg,outofmem);
+ goto aierror;
+ }
+ blk->next = new;
+ new->next = NULL;
+ new->maxhere = maxhere;
+ new->numprevious = blk->numprevious + blk->numhere;
+ new->numhere = 0;
+ new->items = items;
+ }
+ blk = buf->current = new;
+ }
+
+ memcpy( ((char *) blk->items) + (blk->numhere * itemsize), item, itemsize );
+
+ ++blk->numhere;
+
+ *errmsg = '\0';
+ return;
+
+aierror:
+
+ if (new) free(new);
+ if (items) free(items);
+}
+
+
+int numitems(buffer *buf)
+{
+ block *blk = buf->current;
+ return blk->numprevious + blk->numhere;
+}
+
+
+void *copyitems(buffer *buf, errmsg_t errmsg)
+{
+ int n;
+ void *r;
+ block *blk, *b;
+ size_t itemsize = buf->itemsize;
+
+ b = buf->current;
+ n = b->numprevious + b->numhere;
+ if (!n) return NULL;
+
+ r = malloc(n * itemsize);
+ if (!r) {
+ wcscpy(errmsg,outofmem);
+ return NULL;
+ }
+
+ b = b->next;
+
+ for (blk = buf->firstblk; blk != b; blk = blk->next)
+ memcpy( ((char *) r) + (blk->numprevious * itemsize),
+ blk->items, blk->numhere * itemsize);
+
+ *errmsg = '\0';
+ return r;
+}
+
+
+void rewindbuffer(buffer *buf)
+{
+ buf->nextblk = buf->firstblk;
+ buf->nextindex = 0;
+}
+
+
+void *nextitem(buffer *buf)
+{
+ void *r;
+
+ if (!buf->nextblk || buf->nextindex >= buf->nextblk->numhere)
+ return NULL;
+
+ r = ((char *) buf->nextblk->items) + (buf->nextindex * buf->itemsize);
+
+ if (++buf->nextindex >= buf->nextblk->maxhere) {
+ buf->nextblk = buf->nextblk->next;
+ buf->nextindex = 0;
+ }
+
+ return r;
+}
View
78 par/buffer.h
@@ -0,0 +1,78 @@
+/***********************/
+/* buffer.h */
+/* for Par 1.52-i18n.3 */
+/* Copyright 2001 by */
+/* Adam M. Costello */
+/***********************/
+
+/* This is ANSI C code (C89). */
+
+
+/* Note: Those functions declared here which do not use errmsg */
+/* always succeed, provided that they are passed valid arguments. */
+
+
+#include "errmsg.h"
+
+#include <stddef.h>
+
+
+typedef struct buffer buffer;
+
+
+buffer *newbuffer(size_t itemsize, errmsg_t errmsg);
+
+ /* newbuffer(itemsize,errmsg) returns a pointer to a */
+ /* new empty buffer which holds items of size itemsize. */
+ /* itemsize must not be 0. Returns NULL on failure. */
+
+
+void freebuffer(buffer *buf);
+
+ /* freebuffer(buf) frees the memory associated with */
+ /* *buf. buf may not be used after this call. */
+
+
+void clearbuffer(buffer *buf);
+
+ /* clearbuffer(buf) removes */
+ /* all items from *buf, but */
+ /* does not free any memory. */
+
+
+void additem(buffer *buf, const void *item, errmsg_t errmsg);
+
+ /* additem(buf,item,errmsg) copies *item to the end of */
+ /* *buf. item must point to an object of the proper size */
+ /* for *buf. If additem() fails, *buf will be unaffected. */
+
+
+int numitems(buffer *buf);
+
+ /* numitems(buf) returns the number of items in *buf. */
+
+
+void *copyitems(buffer *buf, errmsg_t errmsg);
+
+ /* copyitems(buf,errmsg) returns an array of objects of */
+ /* the proper size for *buf, one for each item in *buf, */
+ /* or NULL if there are no items in buf. The elements */
+ /* of the array are copied from the items in *buf, in */
+ /* order. The array is allocated with malloc(), so it */
+ /* may be freed with free(). Returns NULL on failure. */
+
+
+void *nextitem(buffer *buf);
+
+ /* When buf was created by newbuffer, a pointer associated with buf */
+ /* was initialized to point at the first slot in *buf. If there is */
+ /* an item in the slot currently pointed at, nextitem(buf) advances */
+ /* the pointer to the next slot and returns the old value. If there */
+ /* is no item in the slot, nextitem(buf) leaves the pointer where it */
+ /* is and returns NULL. */
+
+
+void rewindbuffer(buffer *buf);
+
+ /* rewindbuffer(buf) resets the pointer used by */
+ /* nextitem() to point at the first slot in *buf. */
View
319 par/charset.c
@@ -0,0 +1,319 @@
+/***********************/
+/* charset.c */
+/* for Par 1.52-i18n.3 */
+/* Copyright 2001 by */
+/* Adam M. Costello */
+/* Modified by */
+/* Jérôme Pouiller */
+/***********************/
+
+/* This is ANSI C code (C89). */
+
+
+/* Because this is ANSI C code, we can't assume that there are only 256 */
+/* characters. Therefore, we can't use bit vectors to represent sets */
+/* without the risk of consuming large amounts of memory. Therefore, */
+/* this code is much more complicated than might be expected. */
+
+
+#include "charset.h" /* Makes sure we're consistent with the. */
+ /* prototypes. Also includes "errmsg.h". */
+#include "buffer.h" /* Also includes <stddef.h>. */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#undef NULL
+#define NULL ((void *) 0)
+
+#ifdef DONTFREE
+#define free(ptr)
+#endif
+
+
+/* The issues regarding char and unsigned char are relevant to the */
+/* use of the ctype.h functions, and the interpretation of the _xhh */
+/* sequence. See the comments near the beginning of par.c. */
+
+
+typedef unsigned char csflag_t;
+
+struct charset {
+ wchar_t *inlist; /* Characters in inlist are in the set. */
+ wchar_t *outlist; /* Characters in outlist are not in the set. */
+ /* inlist and outlist must have no common characters. */
+ /* inlist and outlist may be NULL, which acts like "". */
+ csflag_t flags; /* Characters in neither list are in the set if they */
+ /* belong to any of the classes indicated by flags. */
+};
+
+/* The following may be bitwise-OR'd together */
+/* to set the flags field of a charset: */
+
+static const csflag_t CS_UCASE = 1, /* Includes all upper case letters. */
+ CS_LCASE = 2, /* Includes all lower case letters. */
+ CS_DIGIT = 4, /* Includes all decimal digits. */
+ CS_NUL = 8; /* Includes the NUL character. */
+
+
+static int appearsin(wchar_t c, const wchar_t *str)
+
+/* Returns 0 if c is '\0' or str is NULL or c */
+/* does not appear in *str. Otherwise returns 1. */
+{
+ return c && str && wcschr(str,c);
+}
+
+
+static int hexdigtoint(wchar_t c)
+
+/* Returns the value represented by the hexadecimal */
+/* digit c, or -1 if c is not a hexadecimal digit. */
+{
+ const wchar_t *p, * const hexdigits = L"0123456789ABCDEFabcdef";
+ int n;
+
+ if (!c) return -1;
+ p = wcschr(hexdigits, c);
+ if (!p) return -1;
+ n = p - hexdigits;
+ if (n >= 16) n -= 6;
+ return n;
+
+ /* We can't do things like c - 'A' because we can't */
+ /* depend on the order of the characters in ANSI C. */
+ /* Nor can we do things like hexdigtoint[c] because */
+ /* we don't know how large such an array might be. */
+}
+
+
+charset *parsecharset(const char *str, errmsg_t errmsg)
+{
+ charset *cset = NULL;
+ buffer *cbuf = NULL;
+ const wchar_t *p, * const singleescapes = L"_sbqQx";
+ int hex1, hex2;
+ wchar_t ch;
+ wchar_t *wstr;
+
+ wstr = (wchar_t *) malloc((strlen(str) + 1) * sizeof(wchar_t));
+ if (!wstr) {
+ wcscpy(errmsg,outofmem);
+ goto pcserror;
+ }
+ if ((size_t)(-1) == mbstowcs(wstr, str, strlen(str) + 1)) {
+ wcscpy(errmsg,mbserror);
+ goto pcserror;
+ }
+ cset = malloc(sizeof (charset));
+ if (!cset) {
+ wcscpy(errmsg,outofmem);
+ goto pcserror;
+ }
+ cset->inlist = cset->outlist = NULL;
+ cset->flags = 0;
+
+ cbuf = newbuffer(sizeof (wchar_t), errmsg);
+ if (*errmsg) goto pcserror;
+
+ for (p = wstr; *p; ++p)
+ if (*p == L'_') {
+ ++p;
+ if (appearsin(*p, singleescapes)) {
+ if (*p == L'_') ch = L'_' ;
+ else if (*p == L's') ch = L' ' ;
+ else if (*p == L'b') ch = L'\\';
+ else if (*p == L'q') ch = L'\'';
+ else if (*p == L'Q') ch = L'\"';
+ else /* *p == 'x' */ {
+ /* FIXME _x metacharacter should allow wide characters input.*/
+ hex1 = hexdigtoint(p[1]);
+ hex2 = hexdigtoint(p[2]);
+ if (hex1 < 0 || hex2 < 0) goto pcsbadstr;
+ ch = 16 * hex1 + hex2;
+ p += 2;
+ }
+ if (!ch)
+ cset->flags |= CS_NUL;
+ else {
+ additem(cbuf, &ch, errmsg);
+ if (*errmsg) goto pcserror;
+ }
+ }
+ else {
+ if (*p == L'A') cset->flags |= CS_UCASE;
+ else if (*p == L'a') cset->flags |= CS_LCASE;
+ else if (*p == L'0') cset->flags |= CS_DIGIT;
+ else goto pcsbadstr;
+ }
+ }
+ else {
+ additem(cbuf, p,errmsg);
+ if (*errmsg) goto pcserror;
+ }
+ ch = '\0';
+ additem(cbuf, &ch, errmsg);
+ if (*errmsg) goto pcserror;
+ cset->inlist = copyitems(cbuf,errmsg);
+ if (*errmsg) goto pcserror;
+
+pcscleanup:
+
+ if (cbuf) freebuffer(cbuf);
+ if (wstr) free(wstr);
+ return cset;
+
+pcsbadstr:
+
+ swprintf(errmsg, errmsg_size, L"Bad charset syntax: %.*s\n", errmsg_size - 22, str);
+
+pcserror:
+
+ if (cset) freecharset(cset);
+ cset = NULL;
+ goto pcscleanup;
+}
+
+
+void freecharset(charset *cset)
+{
+ if (cset->inlist) free(cset->inlist);
+ if (cset->outlist) free(cset->outlist);
+ free(cset);
+}
+
+
+int csmember(wchar_t c, const charset *cset)
+{
+ return
+ appearsin(c, cset->inlist) ||
+ ( !appearsin(c, cset->outlist) &&
+ ( (cset->flags & CS_LCASE && iswlower(*(wint_t *)&c)) ||
+ (cset->flags & CS_UCASE && iswupper(*(wint_t *)&c)) ||
+ (cset->flags & CS_DIGIT && iswdigit(*(wint_t *)&c)) ||
+ (cset->flags & CS_NUL && !c ) ) );
+}
+
+
+static charset *csud(
+ int u, const charset *cset1, const charset *cset2, errmsg_t errmsg
+)
+/* Returns the union of cset1 and cset2 if u is 1, or the set */
+/* difference cset1 - cset2 if u is 0. Returns NULL on failure. */
+{
+ charset *csu;
+ buffer *inbuf = NULL, *outbuf = NULL;
+ wchar_t *lists[4], **list, *p, nullchar = L'\0';
+
+ csu = malloc(sizeof (charset));
+ if (!csu) {
+ wcscpy(errmsg,outofmem);
+ goto csuderror;
+ }
+ inbuf = newbuffer(sizeof (wchar_t), errmsg);
+ if (*errmsg) goto csuderror;
+ outbuf = newbuffer(sizeof (wchar_t), errmsg);
+ if (*errmsg) goto csuderror;
+ csu->inlist = csu->outlist = NULL;
+ csu->flags = u ? cset1->flags | cset2->flags
+ : cset1->flags & ~cset2->flags;
+
+ lists[0] = cset1->inlist;
+ lists[1] = cset1->outlist;
+ lists[2] = cset2->inlist;
+ lists[3] = cset2->outlist;
+
+ for (list = lists; list < lists + 4; ++list)
+ if (*list) {
+ for (p = *list; *p; ++p)
+ if (u ? csmember(*p, cset1) || csmember(*p, cset2)
+ : csmember(*p, cset1) && !csmember(*p, cset2)) {
+ if (!csmember(*p, csu)) {
+ additem(inbuf,p,errmsg);
+ if (*errmsg) goto csuderror;
+ }
+ }
+ else
+ if (csmember(*p, csu)) {
+ additem(outbuf,p,errmsg);
+ if (*errmsg) goto csuderror;
+ }
+ }
+
+ additem(inbuf, &nullchar, errmsg);
+ if (*errmsg) goto csuderror;
+ additem(outbuf, &nullchar, errmsg);
+ if (*errmsg) goto csuderror;
+ csu->inlist = copyitems(inbuf,errmsg);
+ if (*errmsg) goto csuderror;
+ csu->outlist = copyitems(outbuf,errmsg);
+ if (*errmsg) goto csuderror;
+
+csudcleanup:
+
+ if (inbuf) freebuffer(inbuf);
+ if (outbuf) freebuffer(outbuf);
+ return csu;
+
+csuderror:
+
+ if (csu) freecharset(csu);
+ csu = NULL;
+ goto csudcleanup;
+}
+
+
+charset *csunion(const charset *cset1, const charset *cset2, errmsg_t errmsg)
+{
+ return csud(1,cset1,cset2,errmsg);
+}
+
+
+charset *csdiff(const charset *cset1, const charset *cset2, errmsg_t errmsg)
+{
+ return csud(0,cset1,cset2,errmsg);
+}
+
+
+void csadd(charset *cset1, const charset *cset2, errmsg_t errmsg)
+{
+ charset *csu;
+
+ csu = csunion(cset1,cset2,errmsg);
+ if (*errmsg) return;
+ csswap(csu,cset1);
+ freecharset(csu);
+}
+
+
+void csremove(charset *cset1, const charset *cset2, errmsg_t errmsg)
+{
+ charset *csu;
+
+ csu = csdiff(cset1,cset2,errmsg);
+ if (*errmsg) return;
+ csswap(csu,cset1);
+ freecharset(csu);
+}
+
+
+charset *cscopy(const charset *cset, errmsg_t errmsg)
+{
+ charset emptycharset = { NULL, NULL, 0 };
+
+ return csunion(cset, &emptycharset, errmsg);
+}
+
+
+void csswap(charset *cset1, charset *cset2)
+{
+ charset tmp;
+
+ tmp = *cset1;
+ *cset1 = *cset2;
+ *cset2 = tmp;
+}
View
71 par/charset.h
@@ -0,0 +1,71 @@
+/***********************/
+/* charset.h */
+/* for Par 1.52-i18n.3 */
+/* Copyright 2001 by */
+/* Adam M. Costello */
+/* Modified by */
+/* Jérôme Pouiller */
+/***********************/
+
+/* This is ANSI C code (C89). */
+
+
+/* Note: Those functions declared here which do not use errmsg */
+/* always succeed, provided that they are passed valid arguments. */
+
+#include <wchar.h>
+#include "errmsg.h"
+
+
+typedef struct charset charset;
+
+
+charset *parsecharset(const char *str, errmsg_t errmsg);
+
+ /* parsecharset(str,errmsg) returns the set of characters defined by */
+ /* str using charset syntax (see par.doc). Returns NULL on failure. */
+
+
+void freecharset(charset *cset);
+
+ /* freecharset(cset) frees any memory associated with */
+ /* *cset. cset may not be used after this call. */
+
+
+int csmember(wchar_t c, const charset *cset);
+
+ /* csmember(c,cset) returns 1 if c is a member of *cset, 0 otherwise. */
+
+
+charset *csunion(const charset *cset1, const charset *cset2, errmsg_t errmsg);
+
+ /* csunion(cset1,cset2) returns a pointer to the */
+ /* union of *cset1 and *cset2, or NULL on failure. */
+
+
+charset *csdiff(const charset *cset1, const charset *cset2, errmsg_t errmsg);
+
+ /* csdiff(cset1,cset2) returns a pointer to the set */
+ /* difference *cset1 - *cset2 , or NULL on failure. */
+
+
+void csadd(charset *cset1, const charset *cset2, errmsg_t errmsg);
+
+ /* csadd(cset1,cset2) adds the members of *cset2 */
+ /* to *cset1. On failure, *cset1 is not changed. */
+
+
+void csremove(charset *cset1, const charset *cset2, errmsg_t errmsg);
+
+ /* csremove(cset1,cset2) removes the members of *cset2 */
+ /* from *cset1. On failure, *cset1 is not changed. */
+
+
+charset *cscopy(const charset *cset, errmsg_t errmsg);
+
+ /* cscopy(cset) returns a copy of cset, or NULL on failure. */
+
+
+void csswap(charset *cset1, charset *cset2);
+
+ /* csswap(cset1,cset2) swaps the contents of *cset1 and *cset2. */
View
23 par/errmsg.c
@@ -0,0 +1,23 @@
+/***********************/
+/* errmsg.c */
+/* for Par 1.52-i18n.3 */
+/* Copyright 2001 by */
+/* Adam M. Costello */
+/* Modified by */
+/* Jérôme Pouiller */
+/***********************/
+
+/* This is ANSI C code (C89). */
+
+
+#include "errmsg.h" /* Makes sure we're consistent with the declarations. */
+
+
+const wchar_t * const outofmem =
+ L"Out of memory.\n";
+
+const wchar_t * const mbserror =
+ L"Error in input multibyte string.\n";
+
+const wchar_t * const impossibility =
+ L"Impossibility #%d has occurred. Please report it.\n";
View
43 par/errmsg.h
@@ -0,0 +1,43 @@
+/***********************/
+/* errmsg.h */
+/* for Par 1.52-i18n.3 */
+/* Copyright 2001 by */
+/* Adam M. Costello */
+/* Modified by */
+/* Jérôme Pouiller */
+/***********************/
+
+/* This is ANSI C code (C89). */
+
+
+#ifndef ERRMSG_H
+#define ERRMSG_H
+
+#include <wchar.h>
+#define errmsg_size 163
+
+/* This is the maximum number of characters that will */
+/* fit in an errmsg_t, including the terminating '\0'. */
+/* It will never decrease, but may increase in future */
+/* versions of this header file. */
+
+
+typedef wchar_t errmsg_t[errmsg_size];
+
+/* Any function which takes the argument errmsg_t errmsg must, before */
+/* returning, either set errmsg[0] to '\0' (indicating success), or */
+/* write an error message string into errmsg, (indicating failure), */
+/* being careful not to overrun the space. */
+
+
+extern const wchar_t * const outofmem;
+ /* "Out of memory.\n" */
+
+extern const wchar_t * const mbserror;
+ /* "Error in input multibyte string.\n" */
+
+extern const wchar_t * const impossibility;
+ /* "Impossibility #%d has occurred. Please report it.\n" */
+
+
+#endif
View
936 par/par.c
@@ -0,0 +1,936 @@
+/***********************/
+/* par.c */
+/* for Par 1.52-i18n.3 */
+/* Copyright 2001 by */
+/* Adam M. Costello */
+/* Modified by */
+/* Jérôme Pouiller */
+/***********************/
+
+/* This is ANSI C code (C89). */
+
+
+#include "charset.h" /* Also includes "errmsg.h". */
+#include "buffer.h" /* Also includes <stddef.h>. */
+#include "reformat.h"
+
+#include <ctype.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+
+#undef NULL
+#define NULL ((void *) 0)
+
+#ifdef DONTFREE
+#define free(ptr)
+#endif
+
+
+/*===
+
+Regarding char and unsigned char: ANSI C is a nightmare in this
+respect. Some functions, like puts(), strchr(), and getenv(), use char
+or char*, so they work well with character constants like 'a', which
+are char, and with argv, which is char**. But several other functions,
+like getchar(), putchar(), and isdigit(), use unsigned char (converted
+to/from int). Therefore innocent-looking code can be wrong, for
+example:
+
+ int c = getchar();
+ if (c == 'a') ...
+
+This is wrong because 'a' is char (converted to int) and could be
+negative, but getchar() returns unsigned char (converted to int), so c
+is always nonnegative or EOF. For similar reasons, it is wrong to pass
+a char to a function that expects an unsigned char:
+
+ putchar('\n');
+ if (isdigit(argv[1][0])) ...
+
+Inevitably, we need to convert between char and unsigned char. This can
+be done by integral conversion (casting or assigning a char to unsigned
+char or vice versa), or by aliasing (converting a pointer to char to
+a pointer to unsigned char (or vice versa) and then dereferencing
+it). ANSI C requires that integral conversion alters the bits when the
+unsigned value is not representable in the signed type and the signed
+type does not use two's complement representation. Aliasing, on the
+other hand, preserves the bits. Although the C standard is not at all
+clear about which sort of conversion is appropriate for making the
+standard library functions interoperate, I think preserving the bits
+is what is needed. Under that assumption, here are some examples of
+correct code:
+
+ int c = getchar();
+ char ch;
+
+ if (c != EOF) {
+ *(unsigned char *)&ch = c;
+ if (ch == 'a') ...
+ if (isdigit(c)) ...
+ }
+
+ char *s = ...
+ if (isdigit(*(unsigned char *)s)) ...
+
+===*/
+
+
+static const char * const usagemsg =
+"\n"
+"Options for par:\n"
+"\n"
+"help print option summary "
+ " ---------- Boolean parameters: ---------\n"
+"version print version number "
+ " b<body> let non-trailing body chars in\n"
+"B<op><set> as <op> is =/+/-, "
+ " prefix, non-leading in suffix\n"
+" replace/augment/diminish "
+ " c<cap> count all words as capitalized\n"
+" body chars by <set> "
+ " d<div> use indentation as a delimiter\n"
+"P<op><set> ditto for protective chars"
+ " E<Err> send messages to stderr\n"
+"Q<op><set> ditto for quote chars "
+ " e<expel> discard superfluous lines\n"
+"-------- Integer parameters: --------"
+ " f<fit> narrow paragraph for best fit\n"
+"h<hang> skip IP's 1st <hang> lines"
+ " g<guess> preserve wide sentence breaks\n"
+" in scan for common affixes"
+ " i<invis> hide lines inserted by <quote>\n"
+"p<prefix> prefix length "
+ " j<just> justify paragraphs\n"
+"r<repeat> if not 0, force bodiless "
+ " l<last> treat last lines like others\n"
+" lines to length <width> "
+ " q<quote> supply vacant lines between\n"
+"s<suffix> suffix length "
+ " different quote nesting levels\n"
+"T<Tab> tab stops every <Tab> cols"
+ " R<Report> print error for too-long words\n"
+"w<width> max output line length "
+ " t<touch> move suffixes left\n"
+"\n"
+"See par.doc or par.1 (the man page) for more information.\n"
+"\n"
+;
+
+
+/* Structure for recording properties of lines within segments: */
+
+typedef unsigned char lflag_t;
+
+typedef struct lineprop {
+ short p, s; /* Length of the prefix and suffix of a bodiless */
+ /* line, or the fallback prelen and suflen */
+ /* of the IP containing a non-bodiless line. */
+ lflag_t flags; /* Boolean properties (see below). */
+ wchar_t rc; /* The repeated character of a bodiless line. */
+} lineprop;
+
+/* Flags for marking boolean properties: */
+
+static const lflag_t L_BODILESS = 1, /* Bodiless line. */
+ L_INVIS = 2, /* Invisible line. */
+ L_FIRST = 4, /* First line of a paragraph. */
+ L_SUPERF = 8; /* Superfluous line. */
+
+#define isbodiless(prop) ( (prop)->flags & 1)
+#define isinvis(prop) (((prop)->flags & 2) != 0)
+#define isfirst(prop) (((prop)->flags & 4) != 0)
+#define issuperf(prop) (((prop)->flags & 8) != 0)
+#define isvacant(prop) (isbodiless(prop) && (prop)->rc == ' ')
+
+
+static int digtoint(char c)
+
+/* Returns the value represented by the digit c, or -1 if c is not a digit. */
+{
+ const char *p, * const digits = "0123456789";
+
+ if (!c) return -1;
+ p = strchr(digits,c);
+ return p ? p - digits : -1;
+
+ /* We can't simply return c - '0' because this is ANSI C code, */
+ /* so it has to work for any character set, not just ones which */
+ /* put the digits together in order. Also, an array that could */
+ /* be referenced as digtoint[c] might be bad because there's no */
+ /* upper limit on CHAR_MAX. */
+}
+
+
+static int strtoudec(const char *s, int *pn)
+
+/* Converts the longest prefix of string s consisting of decimal */
+/* digits to an integer, which is stored in *pn. Normally returns */
+/* 1. If *s is not a digit, then *pn is not changed, but 1 is */
+/* still returned. If the integer represented is greater than */
+/* 9999, then *pn is not changed and 0 is returned. */
+{
+ int n = 0, d;
+
+ d = digtoint(*s);
+ if (d < 0) return 1;
+
+ do {
+ if (n >= 1000) return 0;
+ n = 10 * n + d;
+ d = digtoint(*++s);
+ } while (d >= 0);
+
+ *pn = n;
+
+ return 1;
+}
+
+
+static void parsearg(
+ const char *arg, int *phelp, int *pversion, charset *bodychars, charset
+ *protectchars, charset *quotechars, int *phang, int *pprefix, int *prepeat,
+ int *psuffix, int *pTab, int *pwidth, int *pbody, int *pcap, int *pdiv, int
+ *pErr, int *pexpel, int *pfit, int *pguess, int *pinvis, int *pjust, int
+ *plast, int *pquote, int *pReport, int *ptouch, errmsg_t errmsg
+)
+/* Parses the command line argument in *arg, setting the objects pointed to */
+/* by the other pointers as appropriate. *phelp and *pversion are boolean */
+/* flags indicating whether the help and version options were supplied. */
+{
+ const char *savearg = arg;
+ charset *chars, *change;
+ char oc;
+ int n;
+
+ *errmsg = '\0';
+
+ if (*arg == '-') ++arg;
+
+ if (!strcmp(arg, "help")) {
+ *phelp = 1;
+ return;
+ }
+
+ if (!strcmp(arg, "version")) {
+ *pversion = 1;
+ return;
+ }
+
+ if (*arg == 'B' || *arg == 'P' || *arg == 'Q' ) {
+ chars = *arg == 'B' ? bodychars :
+ *arg == 'P' ? protectchars :
+ /* *arg == 'Q' */ quotechars ;
+ ++arg;
+ if (*arg != '=' && *arg != '+' && *arg != '-') goto badarg;
+ change = parsecharset(arg + 1, errmsg);
+ if (change) {
+ if (*arg == '=') csswap(chars,change);
+ else if (*arg == '+') csadd(chars,change,errmsg);
+ else /* *arg == '-' */ csremove(chars,change,errmsg);
+ freecharset(change);
+ }
+ return;
+ }
+
+ if (isdigit(*(unsigned char *)arg)) {
+ if (!strtoudec(arg, &n)) goto badarg;
+ if (n <= 8) *pprefix = n;
+ else *pwidth = n;
+ }
+
+ for (;;) {
+ while (isdigit(*(unsigned char *)arg)) ++arg;
+ oc = *arg;
+ if (!oc) break;
+ n = -1;
+ if (!strtoudec(++arg, &n)) goto badarg;
+ if ( oc == 'h' || oc == 'p' || oc == 'r'
+ || oc == 's' || oc == 'T' || oc == 'w') {
+ if (oc == 'h') *phang = n >= 0 ? n : 1;
+ else if (oc == 'p') *pprefix = n;
+ else if (oc == 'r') *prepeat = n >= 0 ? n : 3;
+ else if (oc == 's') *psuffix = n;
+ else if (oc == 'T') *pTab = n >= 0 ? n : 8;
+ else /* oc == 'w' */ *pwidth = n >= 0 ? n : 79;
+ }
+ else {
+ if (n < 0) n = 1;
+ if (n > 1) goto badarg;
+ if (oc == 'b') *pbody = n;
+ else if (oc == 'c') *pcap = n;
+ else if (oc == 'd') *pdiv = n;
+ else if (oc == 'E') *pErr = n;
+ else if (oc == 'e') *pexpel = n;
+ else if (oc == 'f') *pfit = n;
+ else if (oc == 'g') *pguess = n;
+ else if (oc == 'i') *pinvis = n;
+ else if (oc == 'j') *pjust = n;
+ else if (oc == 'l') *plast = n;
+ else if (oc == 'q') *pquote = n;
+ else if (oc == 'R') *pReport = n;
+ else if (oc == 't') *ptouch = n;
+ else goto badarg;
+ }
+ }
+
+ return;
+
+badarg:
+
+ swprintf(errmsg, errmsg_size,L"Bad argument: %.*s\n", errmsg_size - 16, savearg);
+ *phelp = 1;
+}
+
+
+static wchar_t **readlines(
+ lineprop **pprops, const charset *protectchars,
+ const charset *quotechars, int Tab, int invis, int quote, errmsg_t errmsg
+)
+/* Reads lines from stdin until EOF, or until a line beginning with a */
+/* protective character is encountered (in which case the protective */
+/* character is pushed back onto the input stream), or until a blank */
+/* line is encountered (in which case the newline is pushed back onto */
+/* the input stream). Returns a NULL-terminated array of pointers to */
+/* individual lines, stripped of their newline characters. Every NUL */
+/* character is stripped, and every white character is changed to a */
+/* space unless it is a newline. If quote is 1, vacant lines will be */
+/* supplied as described for the q option in par.doc. *pprops is set */
+/* to an array of lineprop structures, one for each line, each of whose */
+/* flags field is either 0 or L_INVIS (the other fields are 0). If */
+/* there are no lines, *pprops is set to NULL. The returned array may */
+/* be freed with freelines(). *pprops may be freed with free() if */
+/* it's not NULL. On failure, returns NULL and sets *pprops to NULL. */
+{
+ buffer *cbuf = NULL, *lbuf = NULL, *lpbuf = NULL;
+ int c, empty, blank, firstline, qsonly, oldqsonly = 0, vlnlen, i;
+ char ch, *ln = NULL, *qpend, *oldln = NULL,
+ *oldqpend = NULL, *p, *op;
+ wchar_t nullchar = L'\0';
+ wchar_t *nullline = NULL, *vln = NULL, **lines = NULL;
+ lineprop vprop = { 0, 0, 0, '\0' }, iprop = { 0, 0, 0, '\0' };
+
+ /* oldqsonly, oldln, and oldquend don't really need to be initialized. */
+ /* They are initialized only to appease compilers that try to be helpful */
+ /* by issuing warnings about unitialized automatic variables. */
+
+ iprop.flags = L_INVIS;
+ *errmsg = '\0';
+
+ *pprops = NULL;
+
+ cbuf = newbuffer(sizeof (char), errmsg);
+ if (*errmsg) goto rlcleanup;
+ lbuf = newbuffer(sizeof (wchar_t *), errmsg);
+ if (*errmsg) goto rlcleanup;
+ lpbuf = newbuffer(sizeof (lineprop), errmsg);
+ if (*errmsg) goto rlcleanup;
+
+ for (empty = blank = firstline = 1; ; ) {
+ c = getchar();
+ if (c == EOF) break;
+ *(unsigned char *)&ch = c;
+ if (ch == '\n') {
+ if (blank) {
+ ungetc(c,stdin);
+ break;
+ }
+ additem(cbuf, &nullchar, errmsg);
+ if (*errmsg) goto rlcleanup;
+ ln = copyitems(cbuf,errmsg);
+ if (*errmsg) goto rlcleanup;
+ if (quote) {
+ for (qpend = ln; *qpend && csmember(*qpend, quotechars); ++qpend);
+ for (p = qpend; *p == ' ' || csmember(*p, quotechars); ++p);
+ qsonly = (*p == '\0');
+ while (qpend > ln && qpend[-1] == ' ') --qpend;
+ if (!firstline) {
+ for (p = ln, op = oldln;
+ p < qpend && op < oldqpend && *p == *op;
+ ++p, ++op);
+ if (!(p == qpend && op == oldqpend)) {
+ if (!invis && (oldqsonly || qsonly)) {
+ if (oldqsonly) {
+ *op = '\0';
+ oldqpend = op;
+ }
+ if (qsonly) {
+ *p = '\0';
+ qpend = p;
+ }
+ }
+ else {
+ vlnlen = p - ln;
+ vln = malloc((vlnlen + 1) * sizeof (wchar_t));
+ if (!vln) {
+ wcscpy(errmsg,outofmem);
+ goto rlcleanup;
+ }
+ mbstowcs(vln,ln,vlnlen + 1);
+ vln[vlnlen] = L'\0';
+ additem(lbuf, &vln, errmsg);
+ if (*errmsg) goto rlcleanup;
+ additem(lpbuf, invis ? &iprop : &vprop, errmsg);
+ if (*errmsg) goto rlcleanup;
+ vln = NULL;
+ }
+ }
+ }
+ if (oldln != NULL) {
+ free(oldln);
+ oldln = NULL;
+ }
+ oldln = ln;
+ oldqpend = qpend;
+ oldqsonly = qsonly;
+ }
+ vln = malloc((strlen(ln) + 1) * sizeof (wchar_t));
+ if (!vln) {
+ wcscpy(errmsg, outofmem);
+ goto rlcleanup;
+ }
+ mbstowcs(vln,ln,strlen(ln) + 1);
+ if (oldln == NULL) {
+ free(ln);
+ ln = NULL;
+ }
+ additem(lbuf, &vln, errmsg);
+ if (*errmsg) goto rlcleanup;
+ vln = NULL;
+ additem(lpbuf, &vprop, errmsg);
+ if (*errmsg) goto rlcleanup;
+ clearbuffer(cbuf);
+ empty = blank = 1;
+ firstline = 0;
+ }
+ else {
+ if (empty) {
+ if (csmember(ch, protectchars)) {
+ ungetc(c,stdin);
+ break;
+ }
+ empty = 0;
+ }
+ if (!ch) continue;
+ if (ch == '\t') {
+ ch = ' ';
+ for (i = Tab - numitems(cbuf) % Tab; i > 0; --i) {
+ additem(cbuf, &ch, errmsg);
+ if (*errmsg) goto rlcleanup;
+ }
+ continue;
+ }
+ if (isspace(c))
+ ch = ' ';
+ else
+ blank = 0;
+ additem(cbuf, &ch, errmsg);
+ if (*errmsg)
+ goto rlcleanup;
+ }
+ }
+
+ if (!blank) {
+ additem(cbuf, &nullchar, errmsg);
+ if (*errmsg) goto rlcleanup;
+ ln = copyitems(cbuf,errmsg);
+ if (*errmsg) goto rlcleanup;
+ vln = malloc((strlen(ln) + 1) * sizeof (wchar_t));
+ if (!vln) {
+ wcscpy(errmsg, outofmem);
+ goto rlcleanup;
+ }
+ mbstowcs(vln,ln,strlen(ln) + 1);
+ free(ln);
+ ln = NULL;
+ additem(lbuf, &vln, errmsg);
+ if (*errmsg) goto rlcleanup;
+ vln = NULL;
+ additem(lpbuf, &vprop, errmsg);
+ if (*errmsg) goto rlcleanup;
+ }
+
+ additem(lbuf, &nullline, errmsg);
+ if (*errmsg) goto rlcleanup;
+ *pprops = copyitems(lpbuf,errmsg);
+ if (*errmsg) goto rlcleanup;
+ lines = copyitems(lbuf,errmsg);
+
+rlcleanup:
+
+ if (cbuf) freebuffer(cbuf);
+ if (lpbuf) freebuffer(lpbuf);
+ if (lbuf) {
+ if (!lines)
+ for (;;) {
+ lines = nextitem(lbuf);
+ if (!lines) break;
+ free(*lines);
+ }
+ freebuffer(lbuf);
+ }
+ if (ln) free(ln);
+ if (vln) free(vln);
+
+ return lines;
+}
+
+
+static void compresuflen(
+ const wchar_t * const *lines, const wchar_t * const *endline,
+ const charset *bodychars, int body, int pre, int suf, int *ppre, int *psuf
+)
+/* lines is an array of strings, up to but not including endline. */
+/* Writes into *ppre and *psuf the comprelen and comsuflen of the */
+/* lines in lines. Assumes that they have already been determined */
+/* to be at least pre and suf. endline must not equal lines. */
+{
+ const wchar_t *start, *end, *knownstart, * const *line, *p1, *p2, *knownend,
+ *knownstart2;
+
+ start = *lines;
+ end = knownstart = start + pre;
+ if (body)
+ while (*end) ++end;
+ else
+ while (*end && !csmember(*end, bodychars)) ++end;
+ for (line = lines + 1; line < endline; ++line) {
+ for (p1 = knownstart, p2 = *line + pre;
+ p1 < end && *p1 == *p2;
+ ++p1, ++p2);
+ end = p1;
+ }
+ if (body)
+ for (p1 = end; p1 > knownstart; )
+ if (*--p1 != L' ') {
+ if (csmember(*p1, bodychars))
+ end = p1;
+ else
+ break;
+ }
+ *ppre = end - start;
+
+ knownstart = *lines + *ppre;
+ for (end = knownstart; *end; ++end);
+ knownend = end - suf;
+ if (body)
+ start = knownstart;
+ else
+ for (start = knownend;
+ start > knownstart && !csmember(start[-1], bodychars);
+ --start);
+ for (line = lines + 1; line < endline; ++line) {
+ knownstart2 = *line + *ppre;
+ for (p2 = knownstart2; *p2; ++p2);
+ for (p1 = knownend, p2 -= suf;
+ p1 > start && p2 > knownstart2 && p1[-1] == p2[-1];
+ --p1, --p2);
+ start = p1;
+ }
+ if (body) {
+ for (p1 = start;
+ start < knownend && (*start == L' ' || csmember(*start, bodychars));
+ ++start);
+ if (start > p1 && start[-1] == L' ') --start;
+ }
+ else
+ while (end - start >= 2 && *start == L' ' && start[1] == L' ') ++start;
+ *psuf = end - start;
+}
+
+
+static void delimit(
+ const wchar_t * const *lines, const wchar_t * const *endline,
+ const charset *bodychars, int repeat, int body, int div,
+ int pre, int suf, lineprop *props
+)
+/* lines is an array of strings, up to but not including */
+/* endline. Sets fields in each lineprop in the parallel */
+/* array props as appropriate, except for the L_SUPERF flag, */
+/* which is never set. It is assumed that the comprelen */
+/* and comsuflen of the lines in lines have already been */
+/* determined to be at least pre and suf, respectively. */
+{
+ const wchar_t * const *line, *end, *p, * const *nextline;
+ wchar_t rc;
+ lineprop *prop, *nextprop;
+ int anybodiless = 0, status;
+
+ if (endline == lines) return;
+
+ if (endline == lines + 1) {
+ props->flags |= L_FIRST;
+ props->p = pre, props->s = suf;
+ return;
+ }
+
+ compresuflen(lines, endline, bodychars, body, pre, suf, &pre, &suf);
+
+ line = lines, prop = props;
+ do {
+ prop->flags |= L_BODILESS;
+ prop->p = pre, prop->s = suf;
+ for (end = *line; *end; ++end);
+ end -= suf;
+ p = *line + pre;
+ rc = p < end ? *p : L' ';
+ if (rc != L' ' && (!repeat || end - p < repeat))
+ prop->flags &= ~L_BODILESS;
+ else
+ while (p < end) {
+ if (*p != rc) {
+ prop->flags &= ~L_BODILESS;
+ break;
+ }
+ ++p;
+ }
+ if (isbodiless(prop)) {
+ anybodiless = 1;
+ prop->rc = rc;
+ }
+ ++line, ++prop;
+ } while (line < endline);
+
+ if (anybodiless) {
+ line = lines, prop = props;
+ do {
+ if (isbodiless(prop)) {
+ ++line, ++prop;
+ continue;
+ }
+
+ for (nextline = line + 1, nextprop = prop + 1;
+ nextline < endline && !isbodiless(nextprop);
+ ++nextline, ++nextprop);
+
+ delimit(line,nextline,bodychars,repeat,body,div,pre,suf,prop);
+
+ line = nextline, prop = nextprop;
+ } while (line < endline);
+
+ return;
+ }
+
+ if (!div) {
+ props->flags |= L_FIRST;
+ return;
+ }
+
+ line = lines, prop = props;
+ status = ((*lines)[pre] == L' ');
+ do {
+ if (((*line)[pre] == L' ') == status)
+ prop->flags |= L_FIRST;
+ ++line, ++prop;
+ } while (line < endline);
+}
+
+
+static void marksuperf(
+ const wchar_t * const * lines, const wchar_t * const * endline, lineprop *props
+)
+/* lines points to the first line of a segment, and endline to one */
+/* line beyond the last line in the segment. Sets L_SUPERF bits in */
+/* the flags fields of the props array whenever the corresponding */
+/* line is superfluous. L_BODILESS bits must already be set. */
+{
+ const wchar_t * const *line, *p;
+ lineprop *prop, *mprop, dummy;
+ int inbody, num, mnum;
+
+ for (line = lines, prop = props; line < endline; ++line, ++prop)
+ if (isvacant(prop))
+ prop->flags |= L_SUPERF;
+
+ inbody = mnum = 0;
+ mprop = &dummy;
+ for (line = lines, prop = props; line < endline; ++line, ++prop)
+ if (isvacant(prop)) {
+ for (num = 0, p = *line; *p; ++p)
+ if (*p != L' ') ++num;
+ if (inbody || num < mnum)
+ mnum = num, mprop = prop;
+ inbody = 0;
+ } else {
+ if (!inbody) mprop->flags &= ~L_SUPERF;
+ inbody = 1;
+ }
+}
+
+
+static void setaffixes(
+ const wchar_t * const *inlines, const wchar_t * const *endline,
+ const lineprop *props, const charset *bodychars,
+ const charset *quotechars, int hang, int body, int quote,
+ int *pafp, int *pfs, int *pprefix, int *psuffix
+)
+/* inlines is an array of strings, up to but not including endline, */
+/* representing an IP. inlines and endline must not be equal. props */
+/* is the the parallel array of lineprop structures. *pafp and *pfs */
+/* are set to the augmented fallback prelen and fallback suflen of the */
+/* IP. If either of *pprefix, *psuffix is less than 0, it is set to a */
+/* default value as specified in "par.doc". */
+{
+ int numin, pre, suf;
+ const wchar_t *p;
+
+ numin = endline - inlines;
+
+ if ((*pprefix < 0 || *psuffix < 0) && numin > hang + 1)
+ compresuflen(inlines + hang, endline, bodychars, body, 0, 0, &pre, &suf);
+
+ p = *inlines + props->p;
+ if (numin == 1 && quote)
+ while (*p && csmember (*p, quotechars))
+ ++p;
+ *pafp = p - *inlines;
+ *pfs = props->s;
+
+ if (*pprefix < 0)
+ *pprefix = numin > hang + 1 ? pre : *pafp;
+
+ if (*psuffix < 0)
+ *psuffix = numin > hang + 1 ? suf : *pfs;
+}
+
+
+static void freelines(wchar_t **lines)
+/* Frees the elements of lines, and lines itself. */
+/* lines is a NULL-terminated array of strings. */
+{
+ wchar_t **line;
+
+ for (line = lines; *line; ++line)
+ free(*line);
+
+ free(lines);
+}
+
+int main(int argc, const char * const *argv)
+{
+ int help = 0, version = 0, hang = 0, prefix = -1, repeat = 0, suffix = -1,
+ Tab = 1, width = 72, body = 0, cap = 0, div = 0, Err = 0, expel = 0,
+ fit = 0, guess = 0, invis = 0, just = 0, last = 0, quote = 0, Report = 0,
+ touch = -1;
+ int prefixbak, suffixbak, sawnonblank, oweblank, n, i, afp, fs;
+ charset *bodychars = NULL, *protectchars = NULL, *quotechars = NULL;
+ char ch;
+ wint_t c;
+ char *arg, *parinit = NULL;
+ wchar_t *end, **nextline, **inlines = NULL, **endline, **firstline,
+ **outlines = NULL, **line;
+ const char *env;
+ const char * const whitechars = " \f\n\r\t\v";
+ errmsg_t errmsg = { '\0' };
+ lineprop *props = NULL, *firstprop, *nextprop;
+ FILE *errout;
+
+/* Set the current locale from the environment: */
+
+ setlocale(LC_ALL,"");
+
+/* Process environment variables: */
+
+ env = getenv("PARBODY");
+ if (!env) env = "";
+ bodychars = parsecharset(env,errmsg);
+ if (*errmsg) {
+ help = 1;
+ goto parcleanup;
+ }
+
+ env = getenv("PARPROTECT");
+ if (!env) env = "";
+ protectchars = parsecharset(env,errmsg);
+ if (*errmsg) {
+ help = 1;
+ goto parcleanup;
+ }
+
+ env = getenv("PARQUOTE");
+ if (!env) env = "> ";
+ quotechars = parsecharset(env,errmsg);
+ if (*errmsg) {
+ help = 1;
+ goto parcleanup;
+ }
+
+ env = getenv("PARINIT");
+ if (env) {
+ parinit = malloc((strlen(env) + 1) * sizeof (char));
+ if (!parinit) {
+ wcscpy(errmsg,outofmem);
+ goto parcleanup;
+ }
+ strcpy(parinit,env);
+ arg = strtok(parinit,whitechars);
+ while (arg) {
+ parsearg(arg, &help, &version, bodychars, protectchars,
+ quotechars, &hang, &prefix, &repeat, &suffix, &Tab,
+ &width, &body, &cap, &div, &Err, &expel, &fit, &guess,
+ &invis, &just, &last, &quote, &Report, &touch, errmsg );
+ if (*errmsg || help || version) goto parcleanup;
+ arg = strtok(NULL,whitechars);
+ }
+ free(parinit);
+ parinit = NULL;
+ }
+
+/* Process command line arguments: */
+
+ while (*++argv) {
+ parsearg(*argv, &help, &version, bodychars, protectchars,
+ quotechars, &hang, &prefix, &repeat, &suffix, &Tab,
+ &width, &body, &cap, &div, &Err, &expel, &fit, &guess,
+ &invis, &just, &last, &quote, &Report, &touch, errmsg );
+ if (*errmsg || help || version) goto parcleanup;
+ }
+
+ if (Tab == 0) {
+ wcscpy(errmsg, L"<Tab> must not be 0.\n");
+ goto parcleanup;
+ }
+
+ if (touch < 0) touch = fit || last;
+ prefixbak = prefix;
+ suffixbak = suffix;
+
+ /* Main loop: */
+ for (sawnonblank = oweblank = 0; ; ) {
+ for (;;) {
+ c = getchar();
+ if (c == EOF) break;
+ *(unsigned char *)&ch = c;
+ if (expel && ch == '\n') {
+ oweblank = sawnonblank;
+ continue;
+ }
+ if (csmember(ch, protectchars)) {
+ sawnonblank = 1;
+ if (oweblank) {
+ fputwc('\n', stdout);
+ oweblank = 0;
+ }
+ while (ch != '\n') {
+ putchar(c);
+ c = getchar();
+ if (c == EOF) break;
+ *(unsigned char *)&ch = c;
+ }
+ }
+ if (ch != '\n') break; /* subsumes the case that c == EOF */
+ putchar(c);
+ }
+ if (c == EOF) break;
+ ungetc(c,stdin);
+
+ inlines =
+ readlines(&props, protectchars, quotechars, Tab, invis, quote, errmsg);
+ if (*errmsg) goto parcleanup;
+ for (endline = inlines; *endline; ++endline) ;
+ if (endline == inlines) {
+ free(inlines);
+ inlines = NULL;
+ continue;
+ }
+
+ sawnonblank = 1;
+ if (oweblank) {
+ fputwc('\n', stdout);
+ oweblank = 0;
+ }
+
+ delimit((const wchar_t * const *) inlines,
+ (const wchar_t * const *) endline,
+ bodychars, repeat, body, div, 0, 0, props);
+
+ if (expel)
+ marksuperf((const wchar_t * const *) inlines,
+ (const wchar_t * const *) endline, props);
+
+ firstline = inlines, firstprop = props;
+
+ do {
+ if (isbodiless(firstprop)) {
+ if (!isinvis(firstprop) && !(expel && issuperf(firstprop))) {
+ for (end = *firstline; *end; ++end);
+ if (!repeat || (firstprop->rc == ' ' && !firstprop->s)) {
+ while (end > *firstline && end[-1] == ' ') --end;
+ *end = '\0';
+ fwprintf(stdout, L"%ls\n", *firstline);
+ }
+ else {
+ n = width - firstprop->p - firstprop->s;
+ if (n < 0) {
+ swprintf(errmsg,errmsg_size,impossibility,5);
+ goto parcleanup;
+ }
+ fwprintf(stdout, L"%.*ls", firstprop->p, *firstline);
+ for (i = n; i; --i)
+ fputwc(firstprop->rc, stdout);
+ fwprintf(stdout, L"%ls\n", end - firstprop->s);
+ }
+ }
+ ++firstline, ++firstprop;
+ continue;
+ }
+
+ for (nextline = firstline + 1, nextprop = firstprop + 1;
+ nextline < endline && !isbodiless(nextprop) && !isfirst(nextprop);
+ ++nextline, ++nextprop);
+
+ prefix = prefixbak, suffix = suffixbak;
+ setaffixes((const wchar_t * const *) firstline,
+ (const wchar_t * const *) nextline, firstprop, bodychars,
+ quotechars, hang, body, quote, &afp, &fs, &prefix, &suffix);
+ if (width <= prefix + suffix) {
+ swprintf(errmsg,errmsg_size,
+ L"<width> (%d) <= <prefix> (%d) + <suffix> (%d)\n",
+ width, prefix, suffix);
+ goto parcleanup;
+ }
+
+ outlines =
+ reformat((const wchar_t * const *) firstline,
+ (const wchar_t * const *) nextline,
+ afp, fs, hang, prefix, suffix, width, cap,
+ fit, guess, just, last, Report, touch, errmsg);
+ if (*errmsg) goto parcleanup;
+ for (line = outlines; *line; ++line)
+ fwprintf(stdout, L"%ls\n", *line);
+ freelines(outlines);
+ outlines = NULL;
+
+ firstline = nextline, firstprop = nextprop;
+ } while (firstline < endline);
+
+ freelines(inlines);
+ inlines = NULL;
+
+ free(props);
+ props = NULL;
+ }
+
+parcleanup:
+
+ if (bodychars) freecharset(bodychars);
+ if (protectchars) freecharset(protectchars);
+ if (quotechars) freecharset(quotechars);
+ if (parinit) free(parinit);
+ if (inlines) freelines(inlines);
+ if (props) free(props);
+ if (outlines) freelines(outlines);
+
+ errout = Err ? stderr : stdout;
+ if (*errmsg) fwprintf(errout, L"par error:\n%.*ls", errmsg_size, errmsg);
+#ifdef NOWIDTH
+ if (version) fputws(L"par 1.52-i18n.3 (without wcwidth() support)\n",errout);
+#else
+ if (version) fputws(L"par 1.52-i18n.3\n",errout);
+#endif
+ if (help) fputs(usagemsg,errout);
+
+ return *errmsg ? EXIT_FAILURE : EXIT_SUCCESS;
+}
View
1,345 par/par.doc
@@ -0,0 +1,1345 @@
+ *********************
+ * par.doc *
+ * for Par 1.52 i18n *
+ * Copyright 2001 by *
+ * Adam M. Costello *
+ *********************
+
+
+ Par 1.52 is a package containing:
+
+ + This doc file.
+ + A man page based on this doc file.
+ + The ANSI C source for the filter "par".
+
+
+Contents
+
+ Contents
+ File List
+ Rights and Responsibilities
+ Compilation
+ Synopsis
+ Description
+ *Quick Start
+ Terminology
+ Options
+ Environment
+ Details
+ Diagnostics
+ Examples
+ Limitations
+ Apologies
+ Bugs
+
+
+File List
+
+ The Par 1.52 package is always distributed with at least the
+ following files:
+
+ buffer.c
+ buffer.h
+ charset.c
+ charset.h
+ errmsg.c
+ errmsg.h
+ par.1
+ par.c
+ par.doc
+ protoMakefile
+ reformat.c
+ reformat.h
+ releasenotes
+
+ Each file is a text file which identifies itself on the second line,
+ and identifies the version of Par to which it belongs on the third
+ line, so you can always tell which file is which, even if the files
+ have been renamed.
+
+ The file "par.1" is a man page for the filter par (not to be
+ confused with the package Par, which contains the source code for
+ par). "par.1" is based on this doc file, and conveys much (not
+ all) of the same information, but "par.doc" is the definitive
+ documentation for both par and Par.
+
+
+Rights and Responsibilities
+
+ The files listed in the Files List section above are each Copyright
+ 2001 by Adam M. Costello (henceforth "I", "me").
+
+ I grant everyone ("you") permission to do whatever you like with
+ these files, provided that if you modify them you take reasonable
+ steps to avoid confusing or misleading people about who wrote the
+ modified files (both you and I) or what version they are. All
+ official versions of Par will have version numbers consisting of
+ only digits and periods.
+
+ I encourage you to send me copies of your modifications in case I
+ wish to incorporate them into future versions of Par. See the Bugs
+ section for my address.
+
+ Though I have tried to make sure that Par is free of bugs, I make no
+ guarantees about its soundness. Therefore, I am not responsible for
+ any damage resulting from the use of these files.
+
+
+Compilation
+
+ To compile par, you need an ANSI C compiler. Follow the
+ instructions in the comments in protoMakefile.
+
+ If your compiler generates any warnings that you think are
+ legitimate, please tell me about them (see the Bugs section).
+
+ Note that all variables in par are either constant or automatic
+ (or both), which means that par can be made reentrant (if your
+ compiler supports it). Given the right operating system, it should
+ be possible for several par processes to share the same code space
+ and the same data space (but not the same stack, of course) in
+ memory.
+
+
+Synopsis
+ par [help] [version] [B<op><set>] [P<op><set>] [Q<op><set>]
+ [h[<hang>]] [p[<prefix>]] [r[<repeat>]] [s[<suffix>]] [T[<Tab>]]
+ [w[<width>]] [b[<body>]] [c[<cap>]] [d[<div>]] [E[<Err>]]
+ [e[<expel>]] [f[<fit>]] [g[<guess>]] [i[<invis>]] [j[<just>]]
+ [l[<last>]] [q[<quote>]] [R[<Report>]] [t[<touch>]]
+
+ Things enclosed in [square brackets] are optional. Things enclosed
+ in <angle brackets> are parameters.
+
+
+Description
+
+ par is a filter which copies its input to its output, changing all
+ white characters (except newlines) to spaces, and reformatting
+ each paragraph. Paragraphs are separated by protected, blank, and
+ bodiless lines (see the Terminology section for definitions), and
+ optionally delimited by indentation (see the d option in the Options
+ section).
+
+ Each output paragraph is generated from the corresponding input
+ paragraph as follows:
+
+ 1) An optional prefix and/or suffix is removed from each input
+ line.
+ 2) The remainder is divided into words (separated by spaces).
+ 3) The words are joined into lines to make an eye-pleasing
+ paragraph.
+ 4) The prefixes and suffixes are reattached.
+
+ If there are suffixes, spaces are inserted before them so that they
+ all end in the same column.
+
+
+Quick Start
+
+ par is necessarily complex. For those who wish to use it
+ immediately and understand it later, assign to the PARINIT
+ environment variable the following value:
+
+ rTbgqR B=.,?_A_a Q=_s>|
+
+ The spaces, question mark, greater-than sign, and vertical bar will
+ probably have to be escaped or quoted to prevent your shell from
+ interpreting them.
+
+ The documentation, though precise, is unfortunately not well-written
+ for the end-user. Your best bet is probably to read quickly the
+ Description, Terminology, Options, and Environment sections, then
+ read carefully the Examples section, referring back to the Options
+ and Terminology sections as needed.
+
+ For the "power user", a full understanding of par will require
+ multiple readings of the Terminology, Options, Details, and Examples
+ sections.
+
+
+Terminology
+
+ Miscellaneous terms:
+
+ charset syntax
+ A way of representing a set of characters as a string.
+ The set includes exactly those characters which appear in
+ the string, except that the underscore (_) is an escape
+ character. Whenever it appears, it must begin one of the
+ following escape sequences:
+
+ __ = an underscore
+ _s = a space
+ _b = a backslash (\)
+ _q = a single quote (')
+ _Q = a double quote (")
+ _A = all upper case letters
+ _a = all lower case letters
+ _0 = all decimal digits
+ _xhh = the character represented by the two hexadecimal
+ digits hh (which may be upper or lower case)
+
+ The NUL character must not appear in the string but it may
+ be included in the set with the _x00 sequence.
+
+ error
+ A condition which causes par to abort. See the Diagnostics
+ section.
+
+ IP Input paragraph.
+
+ OP Output paragraph.
+
+ parameter
+ A symbol which may take on unsigned integral values. There
+ are several parameters whose values affect the behavior of
+ par. Parameters can be assigned values using command line
+ options.
+
+
+ Types of characters:
+
+ alphanumeric character
+ An upper case letter, lower case letter, or decimal digit.
+
+ body character
+ A member of the set of characters defined by the PARBODY
+ environment variable (see the Environment section) and/or
+ the B option (see the Options section).
+
+ protective character
+ A member of the set of characters defined by the PARPROTECT
+ environment variable and/or the P option.
+
+ quote character
+ A member of the set of characters defined by the PARQUOTE
+ environment variable and/or the Q option.
+
+ terminal character
+ A period, question mark, exclamation point, or colon.
+
+ white character
+ A space, formfeed, newline, carriage return, tab, or
+ vertical tab.
+
+ Functions:
+
+ comprelen
+ Given a non-empty sequence <S> of lines, let <c> be their
+ longest common prefix. If the parameter <body> is 0, place
+ a divider just after the leading non-body characters in <c>
+ (at the beginning if there are none). If <body> is 1, place
+ the divider just after the last non-space non-body character
+ in <c> (at the beginning if there is none), then advance
+ the divider over any immediately following spaces. The
+ comprelen of <S> is the number of characters preceeding the
+ divider.
+
+ comsuflen
+ Given a non-empty sequence <S> of lines, let <p> be the
+ comprelen of <S>. Let <T> be the set of lines which results
+ from stripping the first <p> characters from each line in
+ <S>. Let <c> be the longest common suffix of the lines
+ in <T>. If <body> is 0, place a divider just before the
+ trailing non-body characters in <c> (at the end if there are
+ none), then advance the divider over all but the last of any
+ immediately following spaces. If <body> is 1, place the
+ divider just before the first non-space non-body character,
+ then back up the divider over one immediately preceeding
+ space if there is one. The comsuflen of <S> is the number
+ of characters following the divider.
+
+ fallback prelen (suflen)
+ The fallback prelen (suflen) of an IP is: the comprelen
+ (comsuflen) of the IP, if the IP contains at least two
+ lines; otherwise, the comprelen (comsuflen) of the block
+ containing the IP, if the block contains at least two
+ lines; otherwise, the length of the longer of the prefixes
+ (suffixes) of the bodiless lines just above and below the
+ block, if the segment containing the block has any bodiless
+ lines; otherwise, 0. (See below for the definitions of
+ block, segment, and bodiless line.)
+
+ augmented fallback prelen
+ Let <fp> be the fallback prelen of an IP. If the IP
+ contains more than one line, or if <quote> is 0, then
+ the augmented fallback prelen of the IP is simply <fp>.
+ Otherwise, it is <fp> plus the number of quote characters
+ immediately following the first <fp> characters of the line.
+
+ quoteprefix
+ The quoteprefix of a line is the longest string of quote
+ characters appearing at the beginning of the line, after
+ this string has been stripped of any trailing spaces.
+
+ Types of lines:
+
+ blank line
+ An empty line, or a line whose first character is not
+ protective and which contains only spaces.
+
+ protected line
+ An input line whose first character is protective.
+
+ bodiless line
+ A line which is order <k> bodiless for some <k>.
+
+ order <k> bodiless line
+ There is no such thing as an order 0 bodiless line. Suppose
+ <S> is a a contiguous subsequence of a segment (see below)
+ containing at least two lines, containing no order <k>-1
+ bodiless lines, bounded above and below by order <k>-1
+ bodiless lines and/or the beginning/end of the segment.
+ Let <p> and <s> be the comprelen and comsuflen of <S>.
+ Any member of <S> which, if stripped of its first <p> and
+ last <s> characters, would be blank (or, if the parameter
+ <repeat> is non-zero, would consist of the same character
+ repeated at least <repeat> times), is order <k> bodiless.
+ The first <p> characters of the bodiless line comprise its
+ prefix; the last <s> characters comprise its suffix. The
+ character which repeats in the middle is called its repeat
+ character. If the middle is empty, the space is taken to be
+ its repeat character.
+
+ vacant line
+ A bodiless line whose repeat character is the space.
+
+ superfluous line
+ Only blank and vacant lines may be superfluous. If
+ contiguous vacant lines lie at the beginning or end of
+ a segment, they are all superfluous. But if they lie
+ between two non-vacant lines within a segment, then all are
+ superfluous except one--the one which contains the fewest
+ non-spaces. In case of a tie, the first of the tied lines
+ is chosen. Similarly, if contiguous blank lines lie outside
+ of any segments at the beginning or end of the input, they
+ are all superfluous. But if they lie between two segments
+ and/or protected lines, then all are superfluous except the
+ first.
+
+ Groups of lines:
+
+ segment
+ A contiguous sequence of input lines containing no protected
+ or blank lines, bounded above and below by protected lines,
+ blank lines, and/or the beginning/end of the input.
+
+ block
+ A contiguous subsequence of a segment containing no bodiless
+ lines, bounded above and below by bodiless lines and/or the
+ beginning/end of the segment.
+
+ Types of words:
+
+ capitalized word
+ If the parameter <cap> is 0, a capitalized word is one which
+ contains at least one alphanumeric character, whose first
+ alphanumeric character is not a lower case letter. If <cap>
+ is 1, every word is considered a capitalized word. (See the
+ c option in the Options section.)
+
+ curious word
+ A word which contains a terminal character <c> such that
+ there are no alphanumeric characters in the word after <c>,
+ but there is at least one alphanumeric character in the word
+ before <c>.
+
+
+Options
+
+ Any command line argument may begin with one minus sign (-) which
+ is ignored. Generally, more than one option may appear in a single
+ command line argument, but there are exceptions: The help, version,
+ B, P, and Q options must have whole arguments all to themselves.
+
+ help Causes all remaining arguments to be ignored. No input
+ is read. A usage message is printed on the output
+ briefly describing the options used by par.
+
+ version Causes all remaining arguments to be ignored. No input
+ is read. "par 1.52" is printed on the output. Of
+ course, this will change in future releases of Par.
+
+ B<op><set> <op> is a single character, either an equal sign (=),
+ a plus sign (+), or a minus sign (-), and <set> is a
+ string using charset syntax. If <op> is an equal sign,
+ the set of body characters is set to the character set
+ defined by <set>. If <op> is a plus/minus sign, the
+ characters in the set defined by <set> are added/removed
+ to/from the existing set of body characters defined by
+ the PARBODY environment variable and any previous B
+ options. It is okay to add characters that are already
+ in the set or to remove characters that are not in the
+ set.
+
+ P<op><set> Just like the B option, except that it applies to the
+ set of protective characters.
+
+ Q<op><set> Just like the B option, except that it applies to the
+ set of quote characters.
+
+ All remaining options are used to set values of parameters. Values
+ set by command line options hold for all paragraphs. Unset
+ parameters are given default values. Any parameters whose default
+ values depend on the IP (namely <prefix> and <suffix>), if left
+ unset, are recomputed separately for each paragraph.
+
+ The approximate role of each parameter is described here. See the
+ Details section for the rest of the story.
+
+ The first six parameters, <hang>, <prefix>, <repeat>, <suffix>,
+ <Tab>, and <width>, may be set to any unsigned decimal integer less
+ than 10000.
+
+ h[<hang>] Mainly affects the default values of <prefix> and
+ <suffix>. Defaults to 0. If the h option is given
+ without a number, the value 1 is inferred. (See also
+ the p and s options.)
+
+ p[<prefix>] The first <prefix> characters of each line of the OP
+ are copied from the first <prefix> characters of the
+ corresponding line of the IP. If there are more than
+ <hang>+1 lines in the IP, the default value is the
+ comprelen of all the lines in the IP except the first
+ <hang> of them. Otherwise, the default value is the
+ augmented fallback prelen of the IP. If the p option is
+ given without a number, <prefix> is unset, even if it
+ had been set earlier. (See also the h and q options.)
+
+ r[<repeat>] If <repeat> is non-zero, bodiless lines have the number
+ of instances of their repeat characters increased or
+ decreased until the length of the line is <width>.
+ The exact value of <repeat> affects the definition of
+ bodiless line. Defaults to 0. If the r option is given
+ without a number, the value 3 is inferred. (See also
+ the w option.)
+
+ s[<suffix>] The last <suffix> characters of each line of the OP
+ are copied from the last <suffix> characters of the
+ corresponding line of the IP. If there are more than
+ <hang>+1 lines in the IP, the default value is the
+ comsuflen of all the lines in the IP except the first
+ <hang> of them. Otherwise, the default value is the
+ fallback suflen of the IP. If the s option is given
+ without a number, <suffix> is unset, even if it had been
+ set earlier. (See also the h option.)
+
+ T[<Tab>] Tab characters in the input are expanded to spaces,
+ assuming tab stops every <Tab> columns. Must not be
+ 0. Defaults to 1. If the T option is given without a
+ number, the value 8 is inferred.
+
+ w[<width>] No line in the OP may contain more than <width>
+ characters, not including the trailing newlines.
+ Defaults to 72. If the w option is given without a
+ number, the value 79 is inferred.
+
+ The remaining thirteen parameters, <body>, <cap>, <div>, <Err>,
+ <expel>, <fit>, <guess>, <invis>, <just>, <last>, <quote>, <Report>,
+ and <touch>, may be set to either 0 or 1. If the number is absent
+ in the option, the value 1 is inferred.
+
+ b[<body>] If <body> is 1, prefixes may not contain any trailing
+ body characters, and suffixes may not contain any
+ leading body characters. (Actually, the situation
+ is complicated by space characters. See comprelen
+ and comsuflen in the Terminology section.) If <body>
+ is 0, prefixes and suffixes may not contain any body
+ characters at all. Defaults to 0.
+
+ c[<cap>] If <cap> is 1, all words are considered capitalized.
+ This currently affects only the application of the g
+ option. Defaults to 0.
+
+ d[<div>] If <div> is 0, each block becomes an IP. If <div> is 1,
+ each block is subdivided into IPs as follows: Let <p>
+ be the comprelen of the block. Let a line's status be
+ 1 if its (<p>+1)st character is a space, 0 otherwise.
+ Every line in the block whose status is the same as the
+ status of the first line will begin a new paragraph.
+ Defaults to 0.
+
+ E[<Err>] If <Err> is 1, messages to the user (caused by the help
+ and version options, or by errors) are sent to the error
+ stream instead of the output stream. Defaults to 0.
+
+ e[<expel>] If <expel> is 1, superfluous lines withheld from the
+ output. Defaults to 0.
+
+ f[<fit>] If <fit> is 1 and <just> is 0, par tries to make the
+ lines in the OP as nearly the same length as possible,
+ even if it means making the OP narrower. Defaults to 0.
+ (See also the j option.)
+
+ g[<guess>] If <guess> is 1, then when par is choosing line breaks,
+ whenever it encounters a curious word followed by a
+ capitalized word, it takes one of two special actions.
+ If the two words are separated by a single space in
+ the input, they will be merged into one word with an
+ embedded non-breaking space. If the two words are
+ separated by more than one space, or by a line break,
+ par will insure that they are separated by two spaces,
+ or by a line break, in the output. Defaults to 0.
+
+ i[<invis>] If <invis> is 1, then vacant lines inserted because
+ <quote> is 1 are invisible; that is, they are not
+ output. If <quote> is 0, <invis> has no effect.
+ Defaults to 0. (See also the q option.)
+
+ j[<just>] If <just> is 1, par justifies the OP, inserting spaces
+ between words so that all lines in the OP have length
+ <width> (except the last, if <last> is 0). Defaults to
+ 0. (See also the w, l, and f options.)
+
+ l[<last>] If <last> is 1, par tries to make the last line of the
+ OP about the same length as the others. Defaults to 0.
+
+ q[<quote>] If <quote> is 1, then before each segment is scanned
+ for bodiless lines, par supplies vacant lines between
+ different quotation nesting levels as follows: For each
+ pair of adjacent lines in the segment (scanned from the
+ top down) which have different quoteprefixes, one of
+ two actions is taken. If <invis> is 0, and either line
+ consists entirely of quote characters and spaces (or is
+ empty), that line is truncated to the longest common
+ prefix of the two lines (both are truncated if both
+ qualify). Otherwise, a line consisting of the longest
+ common prefix of the two lines is inserted between them.
+ <quote> also affects the default value of <prefix>.
+ Defaults to 0. (See also the p and i options.)
+
+ R[<Report>] If <Report> is 1, it is considered an error for an input
+ word to contain more than <L> = (<width> - <prefix> -
+ <suffix>) characters. Otherwise, such words are chopped
+ after each <L>th character into shorter words. Defaults
+ to 0.
+
+ t[<touch>] Has no effect if <suffix> is 0 or <just> is 1.
+ Otherwise, if <touch> is 0, all lines in the OP have
+ length <width>. If <touch> is 1, the length of the
+ lines is decreased until the suffixes touch the body of
+ the OP. Defaults to the logical OR of <fit> and <last>.
+ (See also the s, j, w, f, and l options.)
+
+ If an argument begins with a number, that number is assumed
+ to belong to a p option if it is 8 or less, and to a w option
+ otherwise.
+
+ If the value of any parameter is set more than once, the last value
+ is used. When unset parameters are assigned default values, <hang>
+ and <quote> are assigned before <prefix>, and <fit> and <last> are
+ assigned before <touch> (because of the dependencies).
+
+ It is an error if <width> <= <prefix> + <suffix>.
+
+
+Environment
+
+ PARBODY Determines the initial set of body characters (which are
+ used for determining comprelens and comsuflens), using
+ charset syntax. If PARBODY is not set, the set of body
+ characters is initially empty.
+
+ PARINIT If set, par will read command line arguments from PARINIT
+ before it reads them from the command line. Within
+ the value of PARINIT, arguments are separated by white
+ characters.
+
+ PARPROTECT Determines the set of protective characters, using charset
+ syntax. If PARPROTECT is not set, the set of protective
+ characters is initially empty.
+
+ PARQUOTE Determines the set of quote characters, using charset
+ syntax. If PARQUOTE is not set, the set of quote characters
+ initially contains only the greater-than sign (>) and the
+ space.
+
+ If a NUL character appears in the value of an environment variable, it
+ and the rest of the string will not be seen by par.
+
+ Note that the PARINIT variable, together with the B, P, and Q
+ options, renders the other environment variables unnecessary. They
+ are included for backward compatibility.
+
+Details
+
+ Lines are terminated by newline characters, but the newlines are not
+ considered to be included in the lines. If the last character of
+ the input is a non-newline, a newline will be inferred immediately
+ after it (but if the input is empty, no newline will be inferred;
+ the number of input lines will be 0). Thus, the input can always be
+ viewed as a sequence of lines.
+
+ Protected lines are copied unchanged from the input to the output.
+ All other input lines, as they are read, have any NUL characters
+ removed, and every white character (except newlines) turned into a
+ space. Actually, each tab character is turned into <Tab> - (<n> %
+ <Tab>) spaces, where <n> is the number of characters preceeding the
+ tab character on the line (evaluated after earlier tab characters
+ have been expanded).
+
+ Blank lines in the input are transformed into empty lines in the
+ output.
+
+ If <repeat> is 0, all bodiless lines are vacant, and they are all
+ simply stripped of trailing spaces before being output. If <repeat>
+ is not 0, only vacant lines whose suffixes have length 0 are treated
+ that way; other bodiless lines have the number of instances of their
+ repeat characters increased or decreased until the length of the
+ line is <width>.
+
+ If <expel> is 1, superfluous lines are not output. If <quote> and
+ <invis> are both 1, there may be invisible lines; they are not
+ output.
+
+ The input is divided into segments, which are divided into blocks,
+ which are divided into IPs. The exact process depends on the values
+ of <quote> and <div> (see q and d in the Options section). The
+ remainder of this section describes the process which is applied
+ independently to each IP to construct the corresponding OP.
+
+ After the values of the parameters are determined (see the Options
+ section), the first <prefix> characters and the last <suffix>
+ characters of each input line are removed and remembered. It is
+ an error for any line to contain fewer than <prefix> + <suffix>
+ characters.
+
+ The remaining text is treated as a sequence of characters, not
+ lines. The text is broken into words, which are separated by
+ spaces. That is, a word is a maximal sub-sequence of non-spaces.
+ If <guess> is 1, some words might be merged (see g in the Options
+ section). The first word includes any spaces that preceed it on the
+ same line.
+
+ Let <L> = <width> - <prefix> - <suffix>.
+
+ If <Report> is 0, some words may get chopped up at this point (see R
+ in the Options section).
+
+ The words are reassembled, preserving their order, into lines. If
+ <just> is 0, adjacent words within a line are separated by a single
+ space (or sometimes two if <guess> is 1), and line breaks are chosen
+ so that the paragraph satisfies the following properties:
+
+ 1) No line contains more than <L> characters.
+
+ 2) If <fit> is 1, the difference between the lengths of the
+ shortest and longest lines is as small as possible.
+
+ 3) The shortest line is as long as possible, subject to
+ properties 1 and 2.
+
+ 4) Let <target> be <L> if <fit> is 0, or the length of the
+ longest line if <fit> is 1. The sum of the squares of the
+ differences between <target> and the lengths of the lines is
+ as small as possible, subject to properties 1, 2, and 3.
+
+ If <last> is 0, the last line does not count as a line for the
+ purposes of properties 2, 3, and 4 above.
+
+ If all the words fit on a single line, the properties as worded
+ above don't make much sense. In that case, no line breaks are
+ inserted.
+
+ If <just> is 1, adjacent words within a line are separated by one
+ space (or sometimes two if <guess> is 1) plus zero or more extra
+ spaces. The value of <fit> is disregarded, and line breaks are
+ chosen so that the paragraph satisfies the following properties:
+
+ 1) Every line contains exactly <L> characters.
+
+ 2) The largest inter-word gap is as small as possible, subject
+ to property 1. (An inter-word gap consists only of the
+ extra spaces, not the regular spaces.)
+
+ 3) The sum of the squares of the lengths of the inter-word gaps
+ is as small as possible, subject to properties 1 and 2.
+
+ If <last> is 0, the last line does not count as a line for the
+ purposes of property 1, and it does not require or contain any
+ extra spaces.
+
+ Extra spaces are distributed as uniformly as possible among the
+ inter-word gaps in each line.
+
+ In a justified paragraph, every line must contain at least two
+ words, but that's not always possible to accomplish. If the
+ paragraph cannot be justified, it is considered an error.
+
+ If the number of lines in the resulting paragraph is less than
+ <hang>, empty lines are added at the end to bring the number of
+ lines up to <hang>.
+
+ If <just> is 0 and <touch> is 1, <L> is changed to be the length of
+ the longest line.
+
+ If <suffix> is not 0, each line is padded at the end with spaces to
+ bring its length up to <L>.
+
+ To each line is prepended <prefix> characters. Let <n> be the
+ number of lines in the IP, let <afp> be the augmented fallback
+ prelen of the IP, and let <fs> be the fallback suflen of the IP.
+ The characters which are prepended to the <i>th line are chosen as
+ follows:
+
+ 1) If <i> <= <n>, the characters are copied from the ones that were
+ removed from the beginning of the <n>th input line.
+
+ 2) If <i> > <n> > <hang>, the characters are copied from the ones
+ that were removed from the beginning of the last input line.
+
+ 3) If <i> > <n> and <n> <= <hang>, the first min(<afp>,<prefix>)
+ of the characters are copied from the ones that were removed
+ from the beginning of the last input line, and the rest are all
+ spaces.
+
+ Then to each line is appended <suffix> characters. The characters
+ which are appended to the <i>th line are chosen as follows:
+
+ 1) If <i> <= <n>, the characters are copied from the ones that were
+ removed from the end of the nth input line.
+
+ 2) If <i> > <n> > <hang>, the characters are copied from the ones
+ that were removed from the end of the last input line.
+
+ 3) If <i> > <n> and <n> <= <hang>, the first min(<fs>,<suffix>)
+ of the characters are copied from the ones that were removed
+ from the beginning of the last input line, and the rest are all
+ spaces.
+
+ Finally, the lines are printed to the output as the OP.
+
+
+Diagnostics
+
+ If there are no errors, par returns EXIT_SUCCESS (see <stdlib.h>).
+
+ If there is an error, an error message will be printed to the
+ output, and par will return EXIT_FAILURE. If the error is local
+ to a single paragraph, the preceeding paragraphs will have been
+ output before the error was detected. Line numbers in error
+ messages are local to the IP in which the error occurred. All
+ error messages begin with "par error:" on a line by itself. Error
+ messages concerning command line or environment variable syntax are
+ accompanied by the same usage message that the help option produces.
+
+ Of course, trying to print an error message would be futile if an
+ error resulted from an output function, so par doesn't bother doing
+ any error checking on output functions.
+
+
+Examples
+
+ The superiority of par's dynamic programming algorithm over a greedy
+ algorithm (such as the one used by fmt) can be seen in the following
+ example:
+
+ Original paragraph (note that each line begins with 8 spaces):
+
+ We the people of the United States,
+ in order to form a more perfect union,
+ establish justice,
+ insure domestic tranquility,
+ provide for the common defense,
+ promote the general welfare,
+ and secure the blessing of liberty
+ to ourselves and our posterity,
+ do ordain and establish the Constitution
+ of the United States of America.
+
+ After a greedy algorithm with width = 39:
+
+ We the people of the United
+ States, in order to form a more
+ perfect union, establish
+ justice, insure domestic
+ tranquility, provide for the
+ common defense, promote the
+ general welfare, and secure the
+ blessing of liberty to
+ ourselves and our posterity, do
+ ordain and establish the
+ Constitution of the United
+ States of America.
+
+ After "par 39":
+
+ We the people of the United
+ States, in order to form a
+ more perfect union, establish
+ justice, insure domestic
+ tranquility, provide for the
+ common defense, promote the
+ general welfare, and secure
+ the blessing of liberty to
+ ourselves and our posterity,
+ do ordain and establish the
+ Constitution of the United
+ States of America.
+
+ The line breaks chosen by par are clearly more eye-pleasing.
+
+ par is most useful in conjunction with the text-filtering features
+ of an editor, such as the ! commands of vi. You may wish to add the
+ following lines to your .exrc file:
+
+ " use Bourne shell for speed:
+ set shell=/bin/sh
+ "
+ " reformat paragraph with no arguments:
+ map ** {!}par^M}
+ "
+ " reformat paragraph with arguments:
+ map *^V {!}par
+
+ Note that the leading spaces must be removed, and that what is shown
+ as ^M and ^V really need to be ctrl-M and ctrl-V. Also note that
+ the last map command contains two spaces following the ctrl-V, plus
+ one at the end of the line.
+
+ To reformat a simple paragraph delimited by blank lines in vi, you
+ can put the cursor anywhere in it and type "**" (star star). If
+ you need to supply arguments to par, you can type "* " (star space)
+ instead, then type the arguments.
+
+ The rest of this section is a series of before-and-after pictures
+ showing some typical uses of par. In all cases, no environment
+ variables are set.
+
+ Before:
+
+ /* We the people of the United States, */
+ /* in order to form a more perfect union, */
+ /* establish justice, */
+ /* insure domestic tranquility, */
+ /* provide for the common defense, */
+ /* promote the general welfare, */
+ /* and secure the blessing of liberty */
+ /* to ourselves and our posterity, */
+ /* do ordain and establish the Constitution */
+ /* of the United States of America. */
+
+ After "par 59":
+
+ /* We the people of the United States, in */
+ /* order to form a more perfect union, establish */
+ /* justice, insure domestic tranquility, provide */
+ /* for the common defense, promote the general */
+ /* welfare, and secure the blessing of liberty */
+ /* to ourselves and our posterity, do ordain */
+ /* and establish the Constitution of the United */
+ /* States of America. */
+
+ Or after "par 59f":
+
+ /* We the people of the United States, */
+ /* in order to form a more perfect union, */
+ /* establish justice, insure domestic */
+ /* tranquility, provide for the common */
+ /* defense, promote the general welfare, */
+ /* and secure the blessing of liberty to */
+ /* ourselves and our posterity, do ordain */
+ /* and establish the Constitution of the */
+ /* United States of America. */
+
+ Or after "par 59l":
+
+ /* We the people of the United States, in */
+ /* order to form a more perfect union, establish */
+ /* justice, insure domestic tranquility, */
+ /* provide for the common defense, promote */
+ /* the general welfare, and secure the */
+ /* blessing of liberty to ourselves and our */
+ /* posterity, do ordain and establish the */
+ /* Constitution of the United States of America. */
+
+ Or after "par 59lf":
+
+ /* We the people of the United States, */
+ /* in order to form a more perfect union, */
+ /* establish justice, insure domestic */
+ /* tranquility, provide for the common */
+ /* defense, promote the general welfare, */
+ /* and secure the blessing of liberty */
+ /* to ourselves and our posterity, do */
+ /* ordain and establish the Constitution */
+ /* of the United States of America. */
+
+ Or after "par 59lft0":
+
+ /* We the people of the United States, */
+ /* in order to form a more perfect union, */
+ /* establish justice, insure domestic */
+ /* tranquility, provide for the common */
+ /* defense, promote the general welfare, */
+ /* and secure the blessing of liberty */
+ /* to ourselves and our posterity, do */
+ /* ordain and establish the Constitution */
+ /* of the United States of America. */
+
+ Or after "par 59j":
+
+ /* We the people of the United States, in */
+ /* order to form a more perfect union, establish */
+ /* justice, insure domestic tranquility, provide */