summaryrefslogtreecommitdiffstats
path: root/conduits/docconduit/makedoc9.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'conduits/docconduit/makedoc9.cpp')
-rw-r--r--conduits/docconduit/makedoc9.cpp405
1 files changed, 405 insertions, 0 deletions
diff --git a/conduits/docconduit/makedoc9.cpp b/conduits/docconduit/makedoc9.cpp
new file mode 100644
index 0000000..1f1c56f
--- /dev/null
+++ b/conduits/docconduit/makedoc9.cpp
@@ -0,0 +1,405 @@
+// based on: MakeDoc, version 2
+// I only took the tBuf class from there and adapted it.
+//
+// Compresses text files into a format that is ready to export to a Pilot
+// and work with Rick Bram's PilotDOC reader.
+// Copyright (C) Reinhold Kainhofer, 2002
+// Copyrigth (C) Pat Beirne, 2000
+//
+// Original file (makedoc9.cpp) copyright by:
+// Copyright (C) Pat Beirne, 2000.
+// Distributable under the GNU General Public License Version 2 or later.
+//
+// ver 0.6 enforce 31 char limit on database names
+// ver 0.7 change header and record0 to structs
+// ver 2.0 added category control on the command line
+// changed extensions from .prc to .pdb
+
+/*
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program in a file called COPYING; if not, write to
+** the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+** MA 02110-1301, USA.
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <iostream>
+
+
+#include "makedoc9.h"
+
+
+
+//
+// Issue()
+//
+// action: handle the details of writing a single
+// character to the compressed stream
+//
+unsigned
+ tBuf::Issue(byte src, int &bSpace)
+{
+ unsigned int iDest = len;
+ byte *dest = buf;
+
+ // TODO: which of the if parts should really be included???
+#if 0
+ // modified version of issue
+ // just issue the char
+ if (src >= 0x80 || src <= 8)
+ dest[iDest++] = 1;
+ dest[iDest++] = src;
+
+#else
+ // if there is an outstanding space char, see if
+ // we can squeeze it in with an ASCII char
+ if (bSpace)
+ {
+ if (src >= 0x40 && src <= 0x7F)
+ dest[iDest++] = src ^ 0x80;
+ else
+ {
+ // couldn't squeeze it in, so issue the space char by itself
+ // most chars go out simple, except the range 1...8,0x80...0xFF
+ dest[iDest++] = ' ';
+ if (src < 0x80 && (src == 0 || src > 8))
+ dest[iDest++] = src;
+ else
+ dest[iDest++] = 1, dest[iDest++] = src;
+ }
+ // knock down the space flag
+ bSpace = 0;
+ }
+ else
+ {
+ // check for a space char
+ if (src == ' ')
+ bSpace = 1;
+ else
+ {
+ if (src < 0x80 && (src == 0 || src > 8))
+ dest[iDest++] = src;
+ else
+ dest[iDest++] = 1, dest[iDest++] = src;
+
+ }
+ }
+#endif
+ len = iDest;
+ return iDest;
+}
+
+//
+// Compress
+//
+// params: none
+//
+// action: takes the given buffer,
+// and compresses
+// the original data down into a second buffer
+//
+// comment: This version make heavy use of walking pointers.
+//
+unsigned tBuf::Compress()
+{
+ if (!buf)
+ return 0;
+ if (isCompressed) {
+// cout<<"Buffer is already compressed!"<<endl;
+ return len;
+// } else {
+// cout<<" Compressing buffer!!!"<<endl;
+ }
+
+ unsigned int i;
+
+ // run through the input buffer
+ byte *pBuffer; // points to the input buffer
+ byte *pHit; // points to a walking test hit; works upwards on successive matches
+ byte *pPrevHit; // previous value of pHit; also, start of next test
+ byte *pTestHead; // current test string
+ byte *pTestTail; // current walking pointer; one past the current test buffer
+ byte *pEnd; // 1 past the end of the input buffer
+
+ pHit = pPrevHit = pTestHead = pBuffer = buf;
+ pTestTail = pTestHead + 1;
+ pEnd = buf + len; // should point to a 0!
+
+ // make a dest buffer and reassign the local buffer
+ buf = new byte[6000];
+ len = 0; // used to walk through the output buffer
+
+ // loop, absorbing one more char from the input buffer on each pass
+ for (; pTestHead != pEnd; pTestTail++)
+ {
+ // if we already have 10 char match, don't bother scanning again for the 11th (wasted time)
+ if (pTestTail - pTestHead != (1 << COUNT_BITS) + 3)
+ {
+ // scan in the previous data for a match
+ // terminate the test string (and the matcher string, as well!) in a 0
+ byte tmp = *pTestTail;
+
+ *pTestTail = 0;
+ pHit = (byte *) strstr((const char *) pPrevHit,
+ (const char *) pTestHead);
+ *pTestTail = tmp; // restore the char
+ }
+
+ // on a mismatch or end of buffer, issued codes
+ if (pHit == pTestHead
+ || pTestTail - pTestHead > (1 << COUNT_BITS) + 2
+ || pTestTail == pEnd)
+ {
+ // issue the codes
+ // first, check for short runs
+ if (pTestTail - pTestHead < 4)
+ {
+ if (pTestHead[0] > 0x7F || pTestHead[0] <= 8)
+ buf[len++] = 1;
+ buf[len++] = pTestHead[0];
+ pTestHead++;
+ }
+ // for longer runs, issue a run-code
+ else
+ {
+ unsigned int dist = pTestHead - pPrevHit;
+ unsigned int compound =
+ (dist << COUNT_BITS) + pTestTail - pTestHead - 4;
+
+//if (dist>=(1<<DISP_BITS)) printf("\n!! error dist overflow");
+//if (pTestTail-pTestHead-4>7) printf("\n!! error len overflow");
+
+ buf[len++] = 0x80 + (compound >> 8);
+ buf[len++] = compound & 0xFF;
+//printf("\nissuing code for sequence len %d <%c%c%c>",pTestTail-pTestHead-1,pTestHead[0],pTestHead[1],pTestHead[2]);
+//printf("\n <%x%x>",pOut[-2],pOut[-1]);
+ // and start again
+ pTestHead = pTestTail - 1;
+ }
+ // start the search again
+ pPrevHit = pBuffer;
+ // within range
+ if (pTestHead - pPrevHit > ((1 << DISP_BITS) - 1))
+ pPrevHit = pTestHead - ((1 << DISP_BITS) - 1);
+ }
+ // got a match
+ else
+ {
+ pPrevHit = pHit;
+ }
+ // when we get to the end of the buffer, don't inc past the end
+ // this forces the residue chars out one at a time
+ if (pTestTail == pEnd)
+ pTestTail--;
+ }
+
+
+ // final scan to merge consecutive high chars together
+ // and merge space chars
+ unsigned int k;
+
+ for (i = k = 0; i < len; i++, k++)
+ {
+ buf[k] = buf[i];
+ // skip the run-length codes
+ if (buf[k] >= 0x80 && buf[k] < 0xC0)
+ buf[++k] = buf[++i];
+ // if we hit a high char marker, look ahead for another
+ // and merge multiples together
+ else if (buf[k] == 1)
+ {
+ buf[k + 1] = buf[i + 1];
+ while (i + 2 < len && buf[i + 2] == 1 && buf[k] < 8)
+ {
+ buf[k]++;
+ buf[k + buf[k]] = buf[i + 3];
+ i += 2;
+ }
+ k += buf[k];
+ i++;
+ }
+ else if (buf[k] == ' ' && i < len - 1 && buf[i + 1] <= 0x7F
+ && buf[i + 1] >= 0x40)
+ buf[k] = 0x80 | buf[++i];
+ }
+
+ // delete original buffer
+ delete[]pBuffer;
+ len = k;
+
+ isCompressed = true;
+ return k;
+}
+
+/*
+ Decompress
+
+ params: none
+
+ action: make a new buffer
+ run through the source data
+ check the 4 cases:
+ 0,9...7F represent self
+ 1...8 escape n chars
+ 80...bf reference earlier run
+ c0...ff space+ASCII
+
+*/
+unsigned tBuf::Decompress()
+{
+ if (!buf)
+ return 0;
+ if (!isCompressed) {
+// cout<<"Buffer already uncompressed. Doing nothing"<<endl;
+ return len;
+// } else {
+// cout<<"Decompressing buffer"<<endl;
+ }
+
+ // we "know" that all decompresses fit within 4096, right?
+ byte *pOut = new byte[6000];
+ byte *in_buf = buf;
+ byte *out_buf = pOut;
+
+ unsigned int i, j;
+
+ for (j = i = 0; j < len;)
+ {
+ unsigned int c;
+
+ // take a char from the input buffer
+ c = in_buf[j++];
+
+ // separate the char into zones: 0, 1...8, 9...0x7F, 0x80...0xBF, 0xC0...0xFF
+
+ // codes 1...8 mean copy that many bytes; for accented chars & binary
+ if (c > 0 && c < 9)
+ while (c--)
+ out_buf[i++] = in_buf[j++];
+
+ // codes 0, 9...0x7F represent themselves
+ else if (c < 0x80)
+ out_buf[i++] = c;
+
+ // codes 0xC0...0xFF represent "space + ascii char"
+ else if (c >= 0xC0)
+ out_buf[i++] = ' ', out_buf[i++] = c ^ 0x80;
+
+ // codes 0x80...0xBf represent sequences
+ else
+ {
+ int m, n;
+
+ c <<= 8;
+ c += in_buf[j++];
+ m = (c & 0x3FFF) >> COUNT_BITS;
+ n = c & ((1 << COUNT_BITS) - 1);
+ n += 3;
+ while (n--)
+ {
+ out_buf[i] = out_buf[i - m];
+ i++;
+ }
+ }
+ }
+ out_buf[i++]='\0';
+ out_buf[i++]='\0';
+ delete[]buf;
+ buf = pOut;
+ len = i;
+
+ isCompressed = false;
+ return i;
+}
+
+unsigned tBuf::DuplicateCR()
+{
+ if (!buf)
+ return 0;
+ byte *pBuf = new byte[2 * len];
+
+ unsigned int k, j;
+
+ for (j = k = 0; j < len; j++, k++)
+ {
+ pBuf[k] = buf[j];
+ if (pBuf[k] == 0x0A)
+ pBuf[k++] = 0x0D, pBuf[k] = 0x0A;
+ }
+ delete[]buf;
+ buf = pBuf;
+ len = k;
+ return k;
+}
+
+
+
+// this nasty little beast removes really low ASCII and 0's
+// and handles the CR problem
+//
+// if a cr appears before a lf, then remove the cr
+// if a cr appears in isolation, change to a lf
+unsigned tBuf::RemoveBinary()
+{
+ if (!buf)
+ return 0;
+ byte *in_buf = buf;
+ byte *out_buf = new byte[len];
+
+ unsigned int k, j;
+
+ for (j = k = 0; j < len; j++, k++)
+ {
+ // copy each byte
+ out_buf[k] = in_buf[j];
+
+ // throw away really low ASCII
+ if (( /*out_buf[k]>=0 && */ out_buf[k] < 9))
+ k--;
+
+ // for CR
+ if (out_buf[k] == 0x0D)
+ {
+ // if next is LF, then drop it
+ if (j < len - 1 && in_buf[j + 1] == 0x0A)
+ k--;
+ else // turn it into a LF
+ out_buf[k] = 0x0A;
+ }
+ }
+ delete[]buf;
+ buf = out_buf;
+ len = k;
+ return k;
+}
+
+void tBuf::setText(const byte * text, unsigned txtlen, bool txtcomp)
+{
+ if (buf)
+ delete[]buf;
+ buf = 0L;
+
+ if (txtlen <= 0)
+ txtlen = strlen((const char *) text);
+ len = txtlen;
+ buf = new byte[len];
+
+ memcpy(buf, text, len*sizeof(char));
+// strncpy((char *) buf, (const char *) text, len);
+ isCompressed = txtcomp;
+// cout<<"Setting text, compressed="<<txtcomp<<endl;
+}