1 files changed, 405 insertions, 0 deletions
diff --git a/conduits/docconduit/makedoc9.cc b/conduits/docconduit/makedoc9.cc
new file mode 100644
index 0000000..1f1c56f
--- /dev/null
+++ b/conduits/docconduit/makedoc9.cc
@@ -0,0 +1,405 @@
+// based on: MakeDoc, version 2
+// I only took the tBuf class from there and adapted it.
+//
+// Compresses text files into a format that is ready to export to a Pilot
+// and work with Rick Bram's PilotDOC reader.
+// Copyright (C) Reinhold Kainhofer, 2002
+// Copyrigth (C) Pat Beirne, 2000
+//
+// Original file (makedoc9.cpp) copyright by:
+// Copyright (C) Pat Beirne, 2000.
+// Distributable under the GNU General Public License Version 2 or later.
+//
+// ver 0.6 enforce 31 char limit on database names
+// ver 0.7 change header and record0 to structs
+// ver 2.0 added category control on the command line
+//              changed extensions from .prc to .pdb
+
+/*
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program in a file called COPYING; if not, write to
+** the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+** MA 02110-1301, USA.
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <iostream>
+
+
+#include "makedoc9.h"
+
+
+
+//
+// Issue()
+//
+// action: handle the details of writing a single
+//              character to the compressed stream
+//
+unsigned
+ tBuf::Issue(byte src, int &bSpace)
+{
+	unsigned int iDest = len;
+	byte *dest = buf;
+
+	// TODO: which of the if parts should really be included???
+#if 0
+	// modified version of issue
+	// just issue the char
+	if (src >= 0x80 || src <= 8)
+		dest[iDest++] = 1;
+	dest[iDest++] = src;
+
+#else
+	// if there is an outstanding space char, see if
+	// we can squeeze it in with an ASCII char
+	if (bSpace)
+	{
+		if (src >= 0x40 && src <= 0x7F)
+			dest[iDest++] = src ^ 0x80;
+		else
+		{
+			// couldn't squeeze it in, so issue the space char by itself
+			// most chars go out simple, except the range 1...8,0x80...0xFF
+			dest[iDest++] = ' ';
+			if (src < 0x80 && (src == 0 || src > 8))
+				dest[iDest++] = src;
+			else
+				dest[iDest++] = 1, dest[iDest++] = src;
+		}
+		// knock down the space flag
+		bSpace = 0;
+	}
+	else
+	{
+		// check for a space char
+		if (src == ' ')
+			bSpace = 1;
+		else
+		{
+			if (src < 0x80 && (src == 0 || src > 8))
+				dest[iDest++] = src;
+			else
+				dest[iDest++] = 1, dest[iDest++] = src;
+
+		}
+	}
+#endif
+	len = iDest;
+	return iDest;
+}
+
+//
+// Compress
+//
+// params:      none
+//
+// action:      takes the given buffer,
+//                                      and compresses
+//                                      the original data down into a second buffer
+//
+// comment:     This version make heavy use of walking pointers.
+//
+unsigned tBuf::Compress()
+{
+	if (!buf)
+		return 0;
+	if (isCompressed) {
+//		cout<<"Buffer is already compressed!"<<endl;
+		return len;
+//	} else {
+//		cout<<" Compressing buffer!!!"<<endl;
+	}
+
+	unsigned int i;
+
+	// run through the input buffer
+	byte *pBuffer;					  // points to the input buffer
+	byte *pHit;						  // points to a walking test hit; works upwards on successive matches
+	byte *pPrevHit;				  // previous value of pHit; also, start of next test
+	byte *pTestHead;				  // current test string
+	byte *pTestTail;				  // current walking pointer; one past the current test buffer
+	byte *pEnd;						  // 1 past the end of the input buffer
+
+	pHit = pPrevHit = pTestHead = pBuffer = buf;
+	pTestTail = pTestHead + 1;
+	pEnd = buf + len;				  // should point to a 0!
+
+	// make a dest buffer and reassign the local buffer
+	buf = new byte[6000];
+	len = 0;							  // used to walk through the output buffer
+
+	// loop, absorbing one more char from the input buffer on each pass
+	for (; pTestHead != pEnd; pTestTail++)
+	{
+		// if we already have 10 char match, don't bother scanning again for the 11th (wasted time)
+		if (pTestTail - pTestHead != (1 << COUNT_BITS) + 3)
+		{
+			// scan in the previous data for a match
+			// terminate the test string (and the matcher string, as well!) in a 0
+			byte tmp = *pTestTail;
+
+			*pTestTail = 0;
+			pHit = (byte *) strstr((const char *) pPrevHit,
+				(const char *) pTestHead);
+			*pTestTail = tmp;		  // restore the char
+		}
+
+		// on a mismatch or end of buffer, issued codes
+		if (pHit == pTestHead
+			|| pTestTail - pTestHead > (1 << COUNT_BITS) + 2
+			|| pTestTail == pEnd)
+		{
+			// issue the codes
+			// first, check for short runs
+			if (pTestTail - pTestHead < 4)
+			{
+				if (pTestHead[0] > 0x7F || pTestHead[0] <= 8)
+					buf[len++] = 1;
+				buf[len++] = pTestHead[0];
+				pTestHead++;
+			}
+			// for longer runs, issue a run-code
+			else
+			{
+				unsigned int dist = pTestHead - pPrevHit;
+				unsigned int compound =
+					(dist << COUNT_BITS) + pTestTail - pTestHead - 4;
+
+//if (dist>=(1<<DISP_BITS)) printf("\n!! error dist overflow");
+//if (pTestTail-pTestHead-4>7) printf("\n!! error len overflow");
+
+				buf[len++] = 0x80 + (compound >> 8);
+				buf[len++] = compound & 0xFF;
+//printf("\nissuing code for sequence len %d <%c%c%c>",pTestTail-pTestHead-1,pTestHead[0],pTestHead[1],pTestHead[2]);
+//printf("\n          <%x%x>",pOut[-2],pOut[-1]);
+				// and start again
+				pTestHead = pTestTail - 1;
+			}
+			// start the search again
+			pPrevHit = pBuffer;
+			// within range
+			if (pTestHead - pPrevHit > ((1 << DISP_BITS) - 1))
+				pPrevHit = pTestHead - ((1 << DISP_BITS) - 1);
+		}
+		// got a match
+		else
+		{
+			pPrevHit = pHit;
+		}
+		// when we get to the end of the buffer, don't inc past the end
+		// this forces the residue chars out one at a time
+		if (pTestTail == pEnd)
+			pTestTail--;
+	}
+
+
+	// final scan to merge consecutive high chars together
+	// and merge space chars
+	unsigned int k;
+
+	for (i = k = 0; i < len; i++, k++)
+	{
+		buf[k] = buf[i];
+		// skip the run-length codes
+		if (buf[k] >= 0x80 && buf[k] < 0xC0)
+			buf[++k] = buf[++i];
+		// if we hit a high char marker, look ahead for another
+		// and merge multiples together
+		else if (buf[k] == 1)
+		{
+			buf[k + 1] = buf[i + 1];
+			while (i + 2 < len && buf[i + 2] == 1 && buf[k] < 8)
+			{
+				buf[k]++;
+				buf[k + buf[k]] = buf[i + 3];
+				i += 2;
+			}
+			k += buf[k];
+			i++;
+		}
+		else if (buf[k] == ' ' && i < len - 1 && buf[i + 1] <= 0x7F
+			&& buf[i + 1] >= 0x40)
+			buf[k] = 0x80 | buf[++i];
+	}
+
+	// delete original buffer
+	delete[]pBuffer;
+	len = k;
+
+	isCompressed = true;
+	return k;
+}
+
+/*
+	Decompress
+
+	params:	none
+
+	action: make a new buffer
+					run through the source data
+					check the 4 cases:
+						0,9...7F represent self
+						1...8		escape n chars
+						80...bf reference earlier run
+						c0...ff	space+ASCII
+
+*/
+unsigned tBuf::Decompress()
+{
+	if (!buf)
+		return 0;
+	if (!isCompressed) {
+//		cout<<"Buffer already uncompressed. Doing nothing"<<endl;
+		return len;
+//	} else {
+//		cout<<"Decompressing buffer"<<endl;
+	}
+
+	// we "know" that all decompresses fit within 4096, right?
+	byte *pOut = new byte[6000];
+	byte *in_buf = buf;
+	byte *out_buf = pOut;
+
+	unsigned int i, j;
+
+	for (j = i = 0; j < len;)
+	{
+		unsigned int c;
+
+		// take a char from the input buffer
+		c = in_buf[j++];
+
+		// separate the char into zones: 0, 1...8, 9...0x7F, 0x80...0xBF, 0xC0...0xFF
+
+		// codes 1...8 mean copy that many bytes; for accented chars & binary
+		if (c > 0 && c < 9)
+			while (c--)
+				out_buf[i++] = in_buf[j++];
+
+		// codes 0, 9...0x7F represent themselves
+		else if (c < 0x80)
+			out_buf[i++] = c;
+
+		// codes 0xC0...0xFF represent "space + ascii char"
+		else if (c >= 0xC0)
+			out_buf[i++] = ' ', out_buf[i++] = c ^ 0x80;
+
+		// codes 0x80...0xBf represent sequences
+		else
+		{
+			int m, n;
+
+			c <<= 8;
+			c += in_buf[j++];
+			m = (c & 0x3FFF) >> COUNT_BITS;
+			n = c & ((1 << COUNT_BITS) - 1);
+			n += 3;
+			while (n--)
+			{
+				out_buf[i] = out_buf[i - m];
+				i++;
+			}
+		}
+	}
+	out_buf[i++]='\0';
+	out_buf[i++]='\0';
+	delete[]buf;
+	buf = pOut;
+	len = i;
+
+	isCompressed = false;
+	return i;
+}
+
+unsigned tBuf::DuplicateCR()
+{
+	if (!buf)
+		return 0;
+	byte *pBuf = new byte[2 * len];
+
+	unsigned int k, j;
+
+	for (j = k = 0; j < len; j++, k++)
+	{
+		pBuf[k] = buf[j];
+		if (pBuf[k] == 0x0A)
+			pBuf[k++] = 0x0D, pBuf[k] = 0x0A;
+	}
+	delete[]buf;
+	buf = pBuf;
+	len = k;
+	return k;
+}
+
+
+
+// this nasty little beast removes really low ASCII and 0's
+// and handles the CR problem
+//
+// if a cr appears before a lf, then remove the cr
+// if a cr appears in isolation, change to a lf
+unsigned tBuf::RemoveBinary()
+{
+	if (!buf)
+		return 0;
+	byte *in_buf = buf;
+	byte *out_buf = new byte[len];
+
+	unsigned int k, j;
+
+	for (j = k = 0; j < len; j++, k++)
+	{
+		// copy each byte
+		out_buf[k] = in_buf[j];
+
+		// throw away really low ASCII
+		if (( /*out_buf[k]>=0 && */ out_buf[k] < 9))
+			k--;
+
+		// for CR
+		if (out_buf[k] == 0x0D)
+		{
+			// if next is LF, then drop it
+			if (j < len - 1 && in_buf[j + 1] == 0x0A)
+				k--;
+			else						  // turn it into a LF
+				out_buf[k] = 0x0A;
+		}
+	}
+	delete[]buf;
+	buf = out_buf;
+	len = k;
+	return k;
+}
+
+void tBuf::setText(const byte * text, unsigned txtlen, bool txtcomp)
+{
+	if (buf)
+		delete[]buf;
+	buf = 0L;
+
+	if (txtlen <= 0)
+		txtlen = strlen((const char *) text);
+	len = txtlen;
+	buf = new byte[len];
+
+	memcpy(buf, text, len*sizeof(char));
+//	strncpy((char *) buf, (const char *) text, len);
+	isCompressed = txtcomp;
+//	cout<<"Setting text, compressed="<<txtcomp<<endl;
+}