diff options
Diffstat (limited to 'src/kernel/qscriptengine.cpp')
-rw-r--r-- | src/kernel/qscriptengine.cpp | 1624 |
1 files changed, 1624 insertions, 0 deletions
diff --git a/src/kernel/qscriptengine.cpp b/src/kernel/qscriptengine.cpp new file mode 100644 index 0000000..66aa07b --- /dev/null +++ b/src/kernel/qscriptengine.cpp @@ -0,0 +1,1624 @@ +/**************************************************************************** +** +** Copyright (C) 2003-2008 Trolltech ASA. All rights reserved. +** +** This file is part of the kernel module of the Qt GUI Toolkit. +** +** This file may be used under the terms of the GNU General +** Public License versions 2.0 or 3.0 as published by the Free +** Software Foundation and appearing in the files LICENSE.GPL2 +** and LICENSE.GPL3 included in the packaging of this file. +** Alternatively you may (at your option) use any later version +** of the GNU General Public License if such license has been +** publicly approved by Trolltech ASA (or its successors, if any) +** and the KDE Free Qt Foundation. +** +** Please review the following information to ensure GNU General +** Public Licensing requirements will be met: +** http://trolltech.com/products/qt/licenses/licensing/opensource/. +** If you are unsure which license is appropriate for your use, please +** review the following information: +** http://trolltech.com/products/qt/licenses/licensing/licensingoverview +** or contact the sales department at sales@trolltech.com. +** +** This file may be used under the terms of the Q Public License as +** defined by Trolltech ASA and appearing in the file LICENSE.QPL +** included in the packaging of this file. Licensees holding valid Qt +** Commercial licenses may use this file in accordance with the Qt +** Commercial License Agreement provided with the Software. +** +** This file is provided "AS IS" with NO WARRANTY OF ANY KIND, +** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR +** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted +** herein. +** +**********************************************************************/ + +#include "qscriptengine_p.h" + +#include "qstring.h" +#include "qrect.h" +#include "qfont.h" +#include <private/qunicodetables_p.h> +#include "qtextengine_p.h" +#include "qfontengine_p.h" +#include <stdlib.h> + +#undef None +#undef Pre +#undef Above +#undef Below + +const int Prealloc = 256; +template<class T> +class QVarLengthArray +{ +public: + inline explicit QVarLengthArray(int size = 0); + inline ~QVarLengthArray() { + if (ptr != reinterpret_cast<T *>(array)) + free(ptr); + } + + inline int size() const { return s; } + inline int count() const { return s; } + inline bool isEmpty() const { return (s == 0); } + inline void resize(int size); + inline void clear() { resize(0); } + + inline int capacity() const { return a; } + inline void reserve(int size); + + inline T &operator[](int idx) { + Q_ASSERT(idx >= 0 && idx < s); + return ptr[idx]; + } + inline const T &operator[](int idx) const { + Q_ASSERT(idx >= 0 && idx < s); + return ptr[idx]; + } + + inline void append(const T &t) { + const int idx = s; + resize(idx + 1); + ptr[idx] = t; + } + + inline T *data() { return ptr; } + inline const T *data() const { return ptr; } + inline const T * constData() const { return ptr; } + +private: + void realloc(int size, int alloc); + + int a; + int s; + T *ptr; + Q_UINT64 array[((Prealloc * sizeof(T)) / sizeof(Q_UINT64)) + 1]; +}; + +template <class T> +Q_INLINE_TEMPLATES QVarLengthArray<T>::QVarLengthArray(int asize) + : s(asize) { + if (s > Prealloc) { + ptr = reinterpret_cast<T *>(malloc(s * sizeof(T))); + a = s; + } else { + ptr = reinterpret_cast<T *>(array); + a = Prealloc; + } +} + + +// -------------------------------------------------------------------------------------------------------------------------------------------- +// +// Basic processing +// +// -------------------------------------------------------------------------------------------------------------------------------------------- + +static inline void positionCluster(QShaperItem *item, int gfrom, int glast) +{ + int nmarks = glast - gfrom; + if (nmarks <= 0) { + qWarning("positionCluster: no marks to position!"); + return; + } + + QFontEngine *f = item->font; + + glyph_metrics_t baseInfo = f->boundingBox(item->glyphs[gfrom]); + + if (item->script == QFont::Hebrew) + // we need to attach below the baseline, because of the hebrew iud. + baseInfo.height = QMAX(baseInfo.height, -baseInfo.y); + + QRect baseRect(baseInfo.x, baseInfo.y, baseInfo.width, baseInfo.height); + +// qDebug("---> positionCluster: cluster from %d to %d", gfrom, glast); +// qDebug("baseInfo: %f/%f (%f/%f) off=%f/%f", baseInfo.x, baseInfo.y, baseInfo.width, baseInfo.height, baseInfo.xoff, baseInfo.yoff); + + int size = (f->ascent()/10); + int offsetBase = (size - 4) / 4 + QMIN(size, 4) + 1; +// qDebug("offset = %f", offsetBase); + + bool rightToLeft = item->flags & QTextEngine::RightToLeft; + + int i; + unsigned char lastCmb = 0; + QRect attachmentRect; + + for(i = 1; i <= nmarks; i++) { + glyph_t mark = item->glyphs[gfrom+i]; + QPoint p; + glyph_metrics_t markInfo = f->boundingBox(mark); + QRect markRect(markInfo.x, markInfo.y, markInfo.width, markInfo.height); +// qDebug("markInfo: %f/%f (%f/%f) off=%f/%f", markInfo.x, markInfo.y, markInfo.width, markInfo.height, markInfo.xoff, markInfo.yoff); + + int offset = offsetBase; + unsigned char cmb = item->attributes[gfrom+i].combiningClass; + + // ### maybe the whole position determination should move down to heuristicSetGlyphAttributes. Would save some + // bits in the glyphAttributes structure. + if (cmb < 200) { + // fixed position classes. We approximate by mapping to one of the others. + // currently I added only the ones for arabic, hebrew, lao and thai. + + // for Lao and Thai marks with class 0, see below (heuristicSetGlyphAttributes) + + // add a bit more offset to arabic, a bit hacky + if (cmb >= 27 && cmb <= 36 && offset < 3) + offset +=1; + // below + if ((cmb >= 10 && cmb <= 18) || + cmb == 20 || cmb == 22 || + cmb == 29 || cmb == 32) + cmb = QChar::Combining_Below; + // above + else if (cmb == 23 || cmb == 27 || cmb == 28 || + cmb == 30 || cmb == 31 || (cmb >= 33 && cmb <= 36)) + cmb = QChar::Combining_Above; + //below-right + else if (cmb == 9 || cmb == 103 || cmb == 118) + cmb = QChar::Combining_BelowRight; + // above-right + else if (cmb == 24 || cmb == 107 || cmb == 122) + cmb = QChar::Combining_AboveRight; + else if (cmb == 25) + cmb = QChar::Combining_AboveLeft; + // fixed: + // 19 21 + + } + + // combining marks of different class don't interact. Reset the rectangle. + if (cmb != lastCmb) { + //qDebug("resetting rect"); + attachmentRect = baseRect; + } + + switch(cmb) { + case QChar::Combining_DoubleBelow: + // ### wrong in rtl context! + case QChar::Combining_BelowLeft: + p += QPoint(0, offset); + case QChar::Combining_BelowLeftAttached: + p += attachmentRect.bottomLeft() - markRect.topLeft(); + break; + case QChar::Combining_Below: + p += QPoint(0, offset); + case QChar::Combining_BelowAttached: + p += attachmentRect.bottomLeft() - markRect.topLeft(); + p += QPoint((attachmentRect.width() - markRect.width())/2 , 0); + break; + case QChar::Combining_BelowRight: + p += QPoint(0, offset); + case QChar::Combining_BelowRightAttached: + p += attachmentRect.bottomRight() - markRect.topRight(); + break; + case QChar::Combining_Left: + p += QPoint(-offset, 0); + case QChar::Combining_LeftAttached: + break; + case QChar::Combining_Right: + p += QPoint(offset, 0); + case QChar::Combining_RightAttached: + break; + case QChar::Combining_DoubleAbove: + // ### wrong in RTL context! + case QChar::Combining_AboveLeft: + p += QPoint(0, -offset); + case QChar::Combining_AboveLeftAttached: + p += attachmentRect.topLeft() - markRect.bottomLeft(); + break; + case QChar::Combining_Above: + p += QPoint(0, -offset); + case QChar::Combining_AboveAttached: + p += attachmentRect.topLeft() - markRect.bottomLeft(); + p += QPoint((attachmentRect.width() - markRect.width())/2 , 0); + break; + case QChar::Combining_AboveRight: + p += QPoint(0, -offset); + case QChar::Combining_AboveRightAttached: + p += attachmentRect.topRight() - markRect.bottomRight(); + break; + + case QChar::Combining_IotaSubscript: + default: + break; + } +// qDebug("char=%x combiningClass = %d offset=%d/%d", mark, cmb, p.x(), p.y()); + markRect.moveBy(p.x(), p.y()); + attachmentRect |= markRect; + lastCmb = cmb; + if (rightToLeft) { + item->offsets[gfrom+i].x = p.x(); + item->offsets[gfrom+i].y = p.y(); + } else { + item->offsets[gfrom+i].x = p.x() - baseInfo.xoff; + item->offsets[gfrom+i].y = p.y() - baseInfo.yoff; + } + item->advances[gfrom+i] = 0; + } + item->has_positioning = TRUE; +} + + +void qt_heuristicPosition(QShaperItem *item) +{ + int cEnd = -1; + int i = item->num_glyphs; + while (i--) { + if (cEnd == -1 && item->attributes[i].mark) { + cEnd = i; + } else if (cEnd != -1 && !item->attributes[i].mark) { + positionCluster(item, i, cEnd); + cEnd = -1; + } + } +} + + + +// set the glyph attributes heuristically. Assumes a 1 to 1 relationship between chars and glyphs +// and no reordering. +// also computes logClusters heuristically +static void heuristicSetGlyphAttributes(QShaperItem *item, const QChar *uc, int length) +{ + // justification is missing here!!!!! + + if ( item->num_glyphs != length ) + qWarning("QScriptEngine::heuristicSetGlyphAttributes: char length and num glyphs disagree" ); + + unsigned short *logClusters = item->log_clusters; + + int i; + for (i = 0; i < length; ++i) + logClusters[i] = i; + + // first char in a run is never (treated as) a mark + int cStart = 0; + item->attributes[0].mark = FALSE; + item->attributes[0].clusterStart = TRUE; + item->attributes[0].combiningClass = 0; + if (qIsZeroWidthChar(uc[0].unicode())) { + item->attributes[0].zeroWidth = TRUE; + item->advances[0] = 0; + item->has_positioning = TRUE; + } else { + item->attributes[0].zeroWidth = FALSE; + } + + int lastCat = ::category(uc[0]); + for (i = 1; i < length; ++i) { + int cat = ::category(uc[i]); + if (qIsZeroWidthChar(uc[i].unicode())) { + item->attributes[i].mark = FALSE; + item->attributes[i].clusterStart = TRUE; + item->attributes[i].zeroWidth = TRUE; + item->attributes[i].combiningClass = 0; + cStart = i; + item->advances[i] = 0; + item->has_positioning = TRUE; + } else if (cat != QChar::Mark_NonSpacing) { + item->attributes[i].mark = FALSE; + item->attributes[i].clusterStart = TRUE; + item->attributes[i].combiningClass = 0; + cStart = i; + } else { + int cmb = ::combiningClass(uc[i]); + + if (cmb == 0) { + // Fix 0 combining classes + if ((uc[i].unicode() & 0xff00) == 0x0e00) { + // thai or lao + unsigned char col = uc[i].cell(); + if (col == 0x31 || + col == 0x34 || + col == 0x35 || + col == 0x36 || + col == 0x37 || + col == 0x47 || + col == 0x4c || + col == 0x4d || + col == 0x4e) { + cmb = QChar::Combining_AboveRight; + } else if (col == 0xb1 || + col == 0xb4 || + col == 0xb5 || + col == 0xb6 || + col == 0xb7 || + col == 0xbb || + col == 0xcc || + col == 0xcd) { + cmb = QChar::Combining_Above; + } else if (col == 0xbc) { + cmb = QChar::Combining_Below; + } + } + } + + item->attributes[i].mark = TRUE; + item->attributes[i].clusterStart = FALSE; + item->attributes[i].combiningClass = cmb; + logClusters[i] = cStart; + item->advances[i] = 0; + item->has_positioning = TRUE; + } + + if (lastCat == QChar::Separator_Space) + item->attributes[i-1].justification = GlyphAttributes::Space; + else if (cat != QChar::Mark_NonSpacing) + item->attributes[i-1].justification = GlyphAttributes::Character; + else + item->attributes[i-1].justification = GlyphAttributes::NoJustification; + + lastCat = cat; + } +} + +static void heuristicSetGlyphAttributes(QShaperItem *item) +{ + heuristicSetGlyphAttributes(item, item->string->unicode() + item->from, item->length); +} + + +static bool basic_shape(QShaperItem *item) +{ + if (item->font->stringToCMap(item->string->unicode()+item->from, item->length, item->glyphs, item->advances, + &item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError) + return FALSE; + + heuristicSetGlyphAttributes(item); + qt_heuristicPosition(item); + return TRUE; +} + +// -------------------------------------------------------------------------------------------------------------------------------------------- +// +// Middle eastern languages +// +// -------------------------------------------------------------------------------------------------------------------------------------------- + +// Uniscribe also defines dlig for Hebrew, but we leave this out for now, as it's mostly +// ligatures one does not want in modern Hebrew (as lam-alef ligatures). +enum { + CcmpProperty = 0x1 +}; +#if defined(Q_WS_X11) && !defined(QT_NO_XFTFREETYPE) +static const QOpenType::Features hebrew_features[] = { + { FT_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty }, + {0, 0} +}; +#endif +/* Hebrew shaping. In the non opentype case we try to use the + presentation forms specified for Hebrew. Especially for the + ligatures with Dagesh this gives much better results than we could + achieve manually. +*/ +static bool hebrew_shape(QShaperItem *item) +{ + Q_ASSERT(item->script == QFont::Hebrew); + +#if defined(Q_WS_X11) && !defined(QT_NO_XFTFREETYPE) + QOpenType *openType = item->font->openType(); + + if (openType && openType->supportsScript(item->script)) { + openType->selectScript(item->script, hebrew_features); + + if (item->font->stringToCMap(item->string->unicode()+item->from, item->length, item->glyphs, item->advances, + &item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError) + return FALSE; + + heuristicSetGlyphAttributes(item); + openType->shape(item); + return openType->positionAndAdd(item); + } +#endif + + enum { + Dagesh = 0x5bc, + ShinDot = 0x5c1, + SinDot = 0x5c2, + Patah = 0x5b7, + Qamats = 0x5b8, + Holam = 0x5b9, + Rafe = 0x5bf + }; + unsigned short chars[512]; + QChar *shapedChars = item->length > 256 ? (QChar *)::malloc(2*item->length * sizeof(QChar)) : (QChar *)chars; + + const QChar *uc = item->string->unicode() + item->from; + unsigned short *logClusters = item->log_clusters; + + *shapedChars = *uc; + logClusters[0] = 0; + int slen = 1; + int cluster_start = 0; + int i; + for (i = 1; i < item->length; ++i) { + ushort base = shapedChars[slen-1].unicode(); + ushort shaped = 0; + bool invalid = FALSE; + if (uc[i].unicode() == Dagesh) { + if (base >= 0x5d0 + && base <= 0x5ea + && base != 0x5d7 + && base != 0x5dd + && base != 0x5df + && base != 0x5e2 + && base != 0x5e5) { + shaped = base - 0x5d0 + 0xfb30; + } else if (base == 0xfb2a || base == 0xfb2b /* Shin with Shin or Sin dot */) { + shaped = base + 2; + } else { + invalid = TRUE; + } + } else if (uc[i].unicode() == ShinDot) { + if (base == 0x05e9) + shaped = 0xfb2a; + else if (base == 0xfb49) + shaped = 0xfb2c; + else + invalid = TRUE; + } else if (uc[i].unicode() == SinDot) { + if (base == 0x05e9) + shaped = 0xfb2b; + else if (base == 0xfb49) + shaped = 0xfb2d; + else + invalid = TRUE; + } else if (uc[i].unicode() == Patah) { + if (base == 0x5d0) + shaped = 0xfb2e; + } else if (uc[i].unicode() == Qamats) { + if (base == 0x5d0) + shaped = 0xfb2f; + } else if (uc[i].unicode() == Holam) { + if (base == 0x5d5) + shaped = 0xfb4b; + } else if (uc[i].unicode() == Rafe) { + if (base == 0x5d1) + shaped = 0xfb4c; + else if (base == 0x5db) + shaped = 0xfb4d; + else if (base == 0x5e4) + shaped = 0xfb4e; + } + + if (invalid) { + shapedChars[slen] = 0x25cc; + item->attributes[slen].clusterStart = TRUE; + item->attributes[slen].mark = FALSE; + item->attributes[slen].combiningClass = 0; + cluster_start = slen; + ++slen; + } + if (shaped) { + if (item->font->canRender((QChar *)&shaped, 1)) { + shapedChars[slen-1] = QChar(shaped); + } else + shaped = 0; + } + if (!shaped) { + shapedChars[slen] = uc[i]; + if (::category(uc[i]) != QChar::Mark_NonSpacing) { + item->attributes[slen].clusterStart = TRUE; + item->attributes[slen].mark = FALSE; + item->attributes[slen].combiningClass = 0; + cluster_start = slen; + } else { + item->attributes[slen].clusterStart = FALSE; + item->attributes[slen].mark = TRUE; + item->attributes[slen].combiningClass = ::combiningClass(uc[i]); + } + ++slen; + } + logClusters[i] = cluster_start; + } + + if (item->font->stringToCMap(shapedChars, slen, item->glyphs, item->advances, + &item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError) + return FALSE; + for (i = 0; i < item->num_glyphs; ++i) { + if (item->attributes[i].mark) + item->advances[i] = 0; + } + qt_heuristicPosition(item); + + if (item->length > 256) + ::free(shapedChars); + return TRUE; +} + +// these groups correspond to the groups defined in the Unicode standard. +// Some of these groups are equal whith regards to both joining and line breaking behaviour, +// and thus have the same enum value +// +// I'm not sure the mapping of syriac to arabic enums is correct with regards to justification, but as +// I couldn't find any better document I'll hope for the best. +enum ArabicGroup { + // NonJoining + ArabicNone, + ArabicSpace, + // Transparent + Transparent, + // Causing + Center, + Kashida, + + // Arabic + // Dual + Beh, + Noon, + Meem = Noon, + Heh = Noon, + KnottedHeh = Noon, + HehGoal = Noon, + SwashKaf = Noon, + Yeh, + Hah, + Seen, + Sad = Seen, + Tah, + Kaf = Tah, + Gaf = Tah, + Lam = Tah, + Ain, + Feh = Ain, + Qaf = Ain, + // Right + Alef, + Waw, + Dal, + TehMarbuta = Dal, + Reh, + HamzaOnHehGoal, + YehWithTail = HamzaOnHehGoal, + YehBarre = HamzaOnHehGoal, + + // Syriac + // Dual + Beth = Beh, + Gamal = Ain, + Heth = Noon, + Teth = Hah, + Yudh = Noon, + Kaph = Noon, + Lamadh = Lam, + Mim = Noon, + Nun = Noon, + Semakh = Noon, + FinalSemakh = Noon, + SyriacE = Ain, + Pe = Ain, + ReversedPe = Hah, + Qaph = Noon, + Shin = Noon, + Fe = Ain, + + // Right + Alaph = Alef, + Dalath = Dal, + He = Dal, + SyriacWaw = Waw, + Zain = Alef, + YudhHe = Waw, + Sadhe = HamzaOnHehGoal, + Taw = Dal, + + // Compiler bug? Otherwise ArabicGroupsEnd would be equal to Dal + 1. + Dummy = HamzaOnHehGoal, + ArabicGroupsEnd +}; + +static const unsigned char arabic_group[0x150] = { + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + + ArabicNone, ArabicNone, Alef, Alef, + Waw, Alef, Yeh, Alef, + Beh, TehMarbuta, Beh, Beh, + Hah, Hah, Hah, Dal, + + Dal, Reh, Reh, Seen, + Seen, Sad, Sad, Tah, + Tah, Ain, Ain, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + + // 0x640 + Kashida, Feh, Qaf, Kaf, + Lam, Meem, Noon, Heh, + Waw, Yeh, Yeh, Transparent, + Transparent, Transparent, Transparent, Transparent, + + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, Transparent, Transparent, + Transparent, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, Beh, Qaf, + + Transparent, Alef, Alef, Alef, + ArabicNone, Alef, Waw, Waw, + Yeh, Beh, Beh, Beh, + Beh, Beh, Beh, Beh, + + // 0x680 + Beh, Hah, Hah, Hah, + Hah, Hah, Hah, Hah, + Dal, Dal, Dal, Dal, + Dal, Dal, Dal, Dal, + + Dal, Reh, Reh, Reh, + Reh, Reh, Reh, Reh, + Reh, Reh, Seen, Seen, + Seen, Sad, Sad, Tah, + + Ain, Feh, Feh, Feh, + Feh, Feh, Feh, Qaf, + Qaf, Gaf, SwashKaf, Gaf, + Kaf, Kaf, Kaf, Gaf, + + Gaf, Gaf, Gaf, Gaf, + Gaf, Lam, Lam, Lam, + Lam, Noon, Noon, Noon, + Noon, Noon, KnottedHeh, Hah, + + // 0x6c0 + TehMarbuta, HehGoal, HamzaOnHehGoal, HamzaOnHehGoal, + Waw, Waw, Waw, Waw, + Waw, Waw, Waw, Waw, + Yeh, YehWithTail, Yeh, Waw, + + Yeh, Yeh, YehBarre, YehBarre, + ArabicNone, TehMarbuta, Transparent, Transparent, + Transparent, Transparent, Transparent, Transparent, + Transparent, ArabicNone, ArabicNone, Transparent, + + Transparent, Transparent, Transparent, Transparent, + Transparent, ArabicNone, ArabicNone, Transparent, + Transparent, ArabicNone, Transparent, Transparent, + Transparent, Transparent, Dal, Reh, + + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, Seen, Sad, + Ain, ArabicNone, ArabicNone, KnottedHeh, + + // 0x700 + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + + Alaph, Transparent, Beth, Gamal, + Gamal, Dalath, Dalath, He, + SyriacWaw, Zain, Heth, Teth, + Teth, Yudh, YudhHe, Kaph, + + Lamadh, Mim, Nun, Semakh, + FinalSemakh, SyriacE, Pe, ReversedPe, + Sadhe, Qaph, Dalath, Shin, + Taw, Beth, Gamal, Dalath, + + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, Transparent, Transparent, + + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, Transparent, ArabicNone, + ArabicNone, Zain, Kaph, Fe, +}; + +static inline ArabicGroup arabicGroup(unsigned short uc) +{ + if (uc >= 0x0600 && uc < 0x750) + return (ArabicGroup) arabic_group[uc-0x600]; + else if (uc == 0x200d) + return Center; + else if (::category(uc) == QChar::Separator_Space) + return ArabicSpace; + else + return ArabicNone; +} + + +/* + Arabic shaping obeys a number of rules according to the joining classes (see Unicode book, section on + arabic). + + Each unicode char has a joining class (right, dual (left&right), center (joincausing) or transparent). + transparent joining is not encoded in QChar::joining(), but applies to all combining marks and format marks. + + Right join-causing: dual + center + Left join-causing: dual + right + center + + Rules are as follows (for a string already in visual order, as we have it here): + + R1 Transparent characters do not affect joining behaviour. + R2 A right joining character, that has a right join-causing char on the right will get form XRight + (R3 A left joining character, that has a left join-causing char on the left will get form XLeft) + Note: the above rule is meaningless, as there are no pure left joining characters defined in Unicode + R4 A dual joining character, that has a left join-causing char on the left and a right join-causing char on + the right will get form XMedial + R5 A dual joining character, that has a right join causing char on the right, and no left join causing char on the left + will get form XRight + R6 A dual joining character, that has a left join causing char on the left, and no right join causing char on the right + will get form XLeft + R7 Otherwise the character will get form XIsolated + + Additionally we have to do the minimal ligature support for lam-alef ligatures: + + L1 Transparent characters do not affect ligature behaviour. + L2 Any sequence of Alef(XRight) + Lam(XMedial) will form the ligature Alef.Lam(XLeft) + L3 Any sequence of Alef(XRight) + Lam(XLeft) will form the ligature Alef.Lam(XIsolated) + + The state table below handles rules R1-R7. +*/ + +enum Shape { + XIsolated, + XFinal, + XInitial, + XMedial, + // intermediate state + XCausing +}; + + +enum Joining { + JNone, + JCausing, + JDual, + JRight, + JTransparent +}; + + +static const Joining joining_for_group[ArabicGroupsEnd] = { + // NonJoining + JNone, // ArabicNone + JNone, // ArabicSpace + // Transparent + JTransparent, // Transparent + // Causing + JCausing, // Center + JCausing, // Kashida + // Dual + JDual, // Beh + JDual, // Noon + JDual, // Yeh + JDual, // Hah + JDual, // Seen + JDual, // Tah + JDual, // Ain + // Right + JRight, // Alef + JRight, // Waw + JRight, // Dal + JRight, // Reh + JRight // HamzaOnHehGoal +}; + + +struct JoiningPair { + Shape form1; + Shape form2; +}; + +static const JoiningPair joining_table[5][4] = +// None, Causing, Dual, Right +{ + { { XIsolated, XIsolated }, { XIsolated, XCausing }, { XIsolated, XInitial }, { XIsolated, XIsolated } }, // XIsolated + { { XFinal, XIsolated }, { XFinal, XCausing }, { XFinal, XInitial }, { XFinal, XIsolated } }, // XFinal + { { XIsolated, XIsolated }, { XInitial, XCausing }, { XInitial, XMedial }, { XInitial, XFinal } }, // XInitial + { { XFinal, XIsolated }, { XMedial, XCausing }, { XMedial, XMedial }, { XMedial, XFinal } }, // XMedial + { { XIsolated, XIsolated }, { XIsolated, XCausing }, { XIsolated, XMedial }, { XIsolated, XFinal } }, // XCausing +}; + + +/* +According to http://www.microsoft.com/middleeast/Arabicdev/IE6/KBase.asp + +1. Find the priority of the connecting opportunities in each word +2. Add expansion at the highest priority connection opportunity +3. If more than one connection opportunity have the same highest value, + use the opportunity closest to the end of the word. + +Following is a chart that provides the priority for connection +opportunities and where expansion occurs. The character group names +are those in table 6.6 of the UNICODE 2.0 book. + + +PrioritY Glyph Condition Kashida Location + +Arabic_Kashida User inserted Kashida The user entered a Kashida in a position. After the user + (Shift+j or Shift+[E with hat]) Thus, it is the highest priority to insert an inserted kashida + automatic kashida. + +Arabic_Seen Seen, Sad Connecting to the next character. After the character. + (Initial or medial form). + +Arabic_HaaDal Teh Marbutah, Haa, Dal Connecting to previous character. Before the final form + of these characters. + +Arabic_Alef Alef, Tah, Lam, Connecting to previous character. Before the final form + Kaf and Gaf of these characters. + +Arabic_BaRa Reh, Yeh Connected to medial Beh Before preceding medial Baa + +Arabic_Waw Waw, Ain, Qaf, Feh Connecting to previous character. Before the final form of + these characters. + +Arabic_Normal Other connecting Connecting to previous character. Before the final form + characters of these characters. + + + +This seems to imply that we have at most one kashida point per arabic word. + +*/ + +struct QArabicProperties { + unsigned char shape; + unsigned char justification; +}; + + +static void getArabicProperties(const unsigned short *chars, int len, QArabicProperties *properties) +{ +// qDebug("arabicSyriacOpenTypeShape: properties:"); + int lastPos = 0; + int lastGroup = ArabicNone; + + ArabicGroup group = arabicGroup(chars[0]); + Joining j = joining_for_group[group]; + Shape shape = joining_table[XIsolated][j].form2; + properties[0].justification = GlyphAttributes::NoJustification; + + for (int i = 1; i < len; ++i) { + // #### fix handling for spaces and punktuation + properties[i].justification = GlyphAttributes::NoJustification; + + group = arabicGroup(chars[i]); + j = joining_for_group[group]; + + if (j == JTransparent) { + properties[i].shape = XIsolated; + continue; + } + + properties[lastPos].shape = joining_table[shape][j].form1; + shape = joining_table[shape][j].form2; + + switch(lastGroup) { + case Seen: + if (properties[lastPos].shape == XInitial || properties[lastPos].shape == XMedial) + properties[i-1].justification = GlyphAttributes::Arabic_Seen; + break; + case Hah: + if (properties[lastPos].shape == XFinal) + properties[lastPos-1].justification = GlyphAttributes::Arabic_HaaDal; + break; + case Alef: + if (properties[lastPos].shape == XFinal) + properties[lastPos-1].justification = GlyphAttributes::Arabic_Alef; + break; + case Ain: + if (properties[lastPos].shape == XFinal) + properties[lastPos-1].justification = GlyphAttributes::Arabic_Waw; + break; + case Noon: + if (properties[lastPos].shape == XFinal) + properties[lastPos-1].justification = GlyphAttributes::Arabic_Normal; + break; + case ArabicNone: + break; + + default: + Q_ASSERT(FALSE); + } + + lastGroup = ArabicNone; + + switch(group) { + case ArabicNone: + case Transparent: + // ### Center should probably be treated as transparent when it comes to justification. + case Center: + break; + case ArabicSpace: + properties[i].justification = GlyphAttributes::Arabic_Space; + break; + case Kashida: + properties[i].justification = GlyphAttributes::Arabic_Kashida; + break; + case Seen: + lastGroup = Seen; + break; + + case Hah: + case Dal: + lastGroup = Hah; + break; + + case Alef: + case Tah: + lastGroup = Alef; + break; + + case Yeh: + case Reh: + if (properties[lastPos].shape == XMedial && arabicGroup(chars[lastPos]) == Beh) + properties[lastPos-1].justification = GlyphAttributes::Arabic_BaRa; + break; + + case Ain: + case Waw: + lastGroup = Ain; + break; + + case Noon: + case Beh: + case HamzaOnHehGoal: + lastGroup = Noon; + break; + case ArabicGroupsEnd: + Q_ASSERT(FALSE); + } + + lastPos = i; + } + properties[lastPos].shape = joining_table[shape][JNone].form1; + + +// for (int i = 0; i < len; ++i) +// qDebug("arabic properties(%d): uc=%x shape=%d, justification=%d", i, chars[i], properties[i].shape, properties[i].justification); +} + + + + + + +// The unicode to unicode shaping codec. +// does only presentation forms B at the moment, but that should be enough for +// simple display +static const ushort arabicUnicodeMapping[256][2] = { + // base of shaped forms, and number-1 of them (0 for non shaping, + // 1 for right binding and 3 for dual binding + + // These are just the glyphs available in Unicode, + // some characters are in R class, but have no glyphs in Unicode. + + { 0x0600, 0 }, // 0x0600 + { 0x0601, 0 }, // 0x0601 + { 0x0602, 0 }, // 0x0602 + { 0x0603, 0 }, // 0x0603 + { 0x0604, 0 }, // 0x0604 + { 0x0605, 0 }, // 0x0605 + { 0x0606, 0 }, // 0x0606 + { 0x0607, 0 }, // 0x0607 + { 0x0608, 0 }, // 0x0608 + { 0x0609, 0 }, // 0x0609 + { 0x060A, 0 }, // 0x060A + { 0x060B, 0 }, // 0x060B + { 0x060C, 0 }, // 0x060C + { 0x060D, 0 }, // 0x060D + { 0x060E, 0 }, // 0x060E + { 0x060F, 0 }, // 0x060F + + { 0x0610, 0 }, // 0x0610 + { 0x0611, 0 }, // 0x0611 + { 0x0612, 0 }, // 0x0612 + { 0x0613, 0 }, // 0x0613 + { 0x0614, 0 }, // 0x0614 + { 0x0615, 0 }, // 0x0615 + { 0x0616, 0 }, // 0x0616 + { 0x0617, 0 }, // 0x0617 + { 0x0618, 0 }, // 0x0618 + { 0x0619, 0 }, // 0x0619 + { 0x061A, 0 }, // 0x061A + { 0x061B, 0 }, // 0x061B + { 0x061C, 0 }, // 0x061C + { 0x061D, 0 }, // 0x061D + { 0x061E, 0 }, // 0x061E + { 0x061F, 0 }, // 0x061F + + { 0x0620, 0 }, // 0x0620 + { 0xFE80, 0 }, // 0x0621 HAMZA + { 0xFE81, 1 }, // 0x0622 R ALEF WITH MADDA ABOVE + { 0xFE83, 1 }, // 0x0623 R ALEF WITH HAMZA ABOVE + { 0xFE85, 1 }, // 0x0624 R WAW WITH HAMZA ABOVE + { 0xFE87, 1 }, // 0x0625 R ALEF WITH HAMZA BELOW + { 0xFE89, 3 }, // 0x0626 D YEH WITH HAMZA ABOVE + { 0xFE8D, 1 }, // 0x0627 R ALEF + { 0xFE8F, 3 }, // 0x0628 D BEH + { 0xFE93, 1 }, // 0x0629 R TEH MARBUTA + { 0xFE95, 3 }, // 0x062A D TEH + { 0xFE99, 3 }, // 0x062B D THEH + { 0xFE9D, 3 }, // 0x062C D JEEM + { 0xFEA1, 3 }, // 0x062D D HAH + { 0xFEA5, 3 }, // 0x062E D KHAH + { 0xFEA9, 1 }, // 0x062F R DAL + + { 0xFEAB, 1 }, // 0x0630 R THAL + { 0xFEAD, 1 }, // 0x0631 R REH + { 0xFEAF, 1 }, // 0x0632 R ZAIN + { 0xFEB1, 3 }, // 0x0633 D SEEN + { 0xFEB5, 3 }, // 0x0634 D SHEEN + { 0xFEB9, 3 }, // 0x0635 D SAD + { 0xFEBD, 3 }, // 0x0636 D DAD + { 0xFEC1, 3 }, // 0x0637 D TAH + { 0xFEC5, 3 }, // 0x0638 D ZAH + { 0xFEC9, 3 }, // 0x0639 D AIN + { 0xFECD, 3 }, // 0x063A D GHAIN + { 0x063B, 0 }, // 0x063B + { 0x063C, 0 }, // 0x063C + { 0x063D, 0 }, // 0x063D + { 0x063E, 0 }, // 0x063E + { 0x063F, 0 }, // 0x063F + + { 0x0640, 0 }, // 0x0640 C TATWEEL // ### Join Causing, only one glyph + { 0xFED1, 3 }, // 0x0641 D FEH + { 0xFED5, 3 }, // 0x0642 D QAF + { 0xFED9, 3 }, // 0x0643 D KAF + { 0xFEDD, 3 }, // 0x0644 D LAM + { 0xFEE1, 3 }, // 0x0645 D MEEM + { 0xFEE5, 3 }, // 0x0646 D NOON + { 0xFEE9, 3 }, // 0x0647 D HEH + { 0xFEED, 1 }, // 0x0648 R WAW + { 0x0649, 3 }, // 0x0649 ALEF MAKSURA // ### Dual, glyphs not consecutive, handle in code. + { 0xFEF1, 3 }, // 0x064A D YEH + { 0x064B, 0 }, // 0x064B + { 0x064C, 0 }, // 0x064C + { 0x064D, 0 }, // 0x064D + { 0x064E, 0 }, // 0x064E + { 0x064F, 0 }, // 0x064F + + { 0x0650, 0 }, // 0x0650 + { 0x0651, 0 }, // 0x0651 + { 0x0652, 0 }, // 0x0652 + { 0x0653, 0 }, // 0x0653 + { 0x0654, 0 }, // 0x0654 + { 0x0655, 0 }, // 0x0655 + { 0x0656, 0 }, // 0x0656 + { 0x0657, 0 }, // 0x0657 + { 0x0658, 0 }, // 0x0658 + { 0x0659, 0 }, // 0x0659 + { 0x065A, 0 }, // 0x065A + { 0x065B, 0 }, // 0x065B + { 0x065C, 0 }, // 0x065C + { 0x065D, 0 }, // 0x065D + { 0x065E, 0 }, // 0x065E + { 0x065F, 0 }, // 0x065F + + { 0x0660, 0 }, // 0x0660 + { 0x0661, 0 }, // 0x0661 + { 0x0662, 0 }, // 0x0662 + { 0x0663, 0 }, // 0x0663 + { 0x0664, 0 }, // 0x0664 + { 0x0665, 0 }, // 0x0665 + { 0x0666, 0 }, // 0x0666 + { 0x0667, 0 }, // 0x0667 + { 0x0668, 0 }, // 0x0668 + { 0x0669, 0 }, // 0x0669 + { 0x066A, 0 }, // 0x066A + { 0x066B, 0 }, // 0x066B + { 0x066C, 0 }, // 0x066C + { 0x066D, 0 }, // 0x066D + { 0x066E, 0 }, // 0x066E + { 0x066F, 0 }, // 0x066F + + { 0x0670, 0 }, // 0x0670 + { 0xFB50, 1 }, // 0x0671 R ALEF WASLA + { 0x0672, 0 }, // 0x0672 + { 0x0673, 0 }, // 0x0673 + { 0x0674, 0 }, // 0x0674 + { 0x0675, 0 }, // 0x0675 + { 0x0676, 0 }, // 0x0676 + { 0x0677, 0 }, // 0x0677 + { 0x0678, 0 }, // 0x0678 + { 0xFB66, 3 }, // 0x0679 D TTEH + { 0xFB5E, 3 }, // 0x067A D TTEHEH + { 0xFB52, 3 }, // 0x067B D BEEH + { 0x067C, 0 }, // 0x067C + { 0x067D, 0 }, // 0x067D + { 0xFB56, 3 }, // 0x067E D PEH + { 0xFB62, 3 }, // 0x067F D TEHEH + + { 0xFB5A, 3 }, // 0x0680 D BEHEH + { 0x0681, 0 }, // 0x0681 + { 0x0682, 0 }, // 0x0682 + { 0xFB76, 3 }, // 0x0683 D NYEH + { 0xFB72, 3 }, // 0x0684 D DYEH + { 0x0685, 0 }, // 0x0685 + { 0xFB7A, 3 }, // 0x0686 D TCHEH + { 0xFB7E, 3 }, // 0x0687 D TCHEHEH + { 0xFB88, 1 }, // 0x0688 R DDAL + { 0x0689, 0 }, // 0x0689 + { 0x068A, 0 }, // 0x068A + { 0x068B, 0 }, // 0x068B + { 0xFB84, 1 }, // 0x068C R DAHAL + { 0xFB82, 1 }, // 0x068D R DDAHAL + { 0xFB86, 1 }, // 0x068E R DUL + { 0x068F, 0 }, // 0x068F + + { 0x0690, 0 }, // 0x0690 + { 0xFB8C, 1 }, // 0x0691 R RREH + { 0x0692, 0 }, // 0x0692 + { 0x0693, 0 }, // 0x0693 + { 0x0694, 0 }, // 0x0694 + { 0x0695, 0 }, // 0x0695 + { 0x0696, 0 }, // 0x0696 + { 0x0697, 0 }, // 0x0697 + { 0xFB8A, 1 }, // 0x0698 R JEH + { 0x0699, 0 }, // 0x0699 + { 0x069A, 0 }, // 0x069A + { 0x069B, 0 }, // 0x069B + { 0x069C, 0 }, // 0x069C + { 0x069D, 0 }, // 0x069D + { 0x069E, 0 }, // 0x069E + { 0x069F, 0 }, // 0x069F + + { 0x06A0, 0 }, // 0x06A0 + { 0x06A1, 0 }, // 0x06A1 + { 0x06A2, 0 }, // 0x06A2 + { 0x06A3, 0 }, // 0x06A3 + { 0xFB6A, 3 }, // 0x06A4 D VEH + { 0x06A5, 0 }, // 0x06A5 + { 0xFB6E, 3 }, // 0x06A6 D PEHEH + { 0x06A7, 0 }, // 0x06A7 + { 0x06A8, 0 }, // 0x06A8 + { 0xFB8E, 3 }, // 0x06A9 D KEHEH + { 0x06AA, 0 }, // 0x06AA + { 0x06AB, 0 }, // 0x06AB + { 0x06AC, 0 }, // 0x06AC + { 0xFBD3, 3 }, // 0x06AD D NG + { 0x06AE, 0 }, // 0x06AE + { 0xFB92, 3 }, // 0x06AF D GAF + + { 0x06B0, 0 }, // 0x06B0 + { 0xFB9A, 3 }, // 0x06B1 D NGOEH + { 0x06B2, 0 }, // 0x06B2 + { 0xFB96, 3 }, // 0x06B3 D GUEH + { 0x06B4, 0 }, // 0x06B4 + { 0x06B5, 0 }, // 0x06B5 + { 0x06B6, 0 }, // 0x06B6 + { 0x06B7, 0 }, // 0x06B7 + { 0x06B8, 0 }, // 0x06B8 + { 0x06B9, 0 }, // 0x06B9 + { 0xFB9E, 1 }, // 0x06BA R NOON GHUNNA + { 0xFBA0, 3 }, // 0x06BB D RNOON + { 0x06BC, 0 }, // 0x06BC + { 0x06BD, 0 }, // 0x06BD + { 0xFBAA, 3 }, // 0x06BE D HEH DOACHASHMEE + { 0x06BF, 0 }, // 0x06BF + + { 0xFBA4, 1 }, // 0x06C0 R HEH WITH YEH ABOVE + { 0xFBA6, 3 }, // 0x06C1 D HEH GOAL + { 0x06C2, 0 }, // 0x06C2 + { 0x06C3, 0 }, // 0x06C3 + { 0x06C4, 0 }, // 0x06C4 + { 0xFBE0, 1 }, // 0x06C5 R KIRGHIZ OE + { 0xFBD9, 1 }, // 0x06C6 R OE + { 0xFBD7, 1 }, // 0x06C7 R U + { 0xFBDB, 1 }, // 0x06C8 R YU + { 0xFBE2, 1 }, // 0x06C9 R KIRGHIZ YU + { 0x06CA, 0 }, // 0x06CA + { 0xFBDE, 1 }, // 0x06CB R VE + { 0xFBFC, 3 }, // 0x06CC D FARSI YEH + { 0x06CD, 0 }, // 0x06CD + { 0x06CE, 0 }, // 0x06CE + { 0x06CF, 0 }, // 0x06CF + + { 0xFBE4, 3 }, // 0x06D0 D E + { 0x06D1, 0 }, // 0x06D1 + { 0xFBAE, 1 }, // 0x06D2 R YEH BARREE + { 0xFBB0, 1 }, // 0x06D3 R YEH BARREE WITH HAMZA ABOVE + { 0x06D4, 0 }, // 0x06D4 + { 0x06D5, 0 }, // 0x06D5 + { 0x06D6, 0 }, // 0x06D6 + { 0x06D7, 0 }, // 0x06D7 + { 0x06D8, 0 }, // 0x06D8 + { 0x06D9, 0 }, // 0x06D9 + { 0x06DA, 0 }, // 0x06DA + { 0x06DB, 0 }, // 0x06DB + { 0x06DC, 0 }, // 0x06DC + { 0x06DD, 0 }, // 0x06DD + { 0x06DE, 0 }, // 0x06DE + { 0x06DF, 0 }, // 0x06DF + + { 0x06E0, 0 }, // 0x06E0 + { 0x06E1, 0 }, // 0x06E1 + { 0x06E2, 0 }, // 0x06E2 + { 0x06E3, 0 }, // 0x06E3 + { 0x06E4, 0 }, // 0x06E4 + { 0x06E5, 0 }, // 0x06E5 + { 0x06E6, 0 }, // 0x06E6 + { 0x06E7, 0 }, // 0x06E7 + { 0x06E8, 0 }, // 0x06E8 + { 0x06E9, 0 }, // 0x06E9 + { 0x06EA, 0 }, // 0x06EA + { 0x06EB, 0 }, // 0x06EB + { 0x06EC, 0 }, // 0x06EC + { 0x06ED, 0 }, // 0x06ED + { 0x06EE, 0 }, // 0x06EE + { 0x06EF, 0 }, // 0x06EF + + { 0x06F0, 0 }, // 0x06F0 + { 0x06F1, 0 }, // 0x06F1 + { 0x06F2, 0 }, // 0x06F2 + { 0x06F3, 0 }, // 0x06F3 + { 0x06F4, 0 }, // 0x06F4 + { 0x06F5, 0 }, // 0x06F5 + { 0x06F6, 0 }, // 0x06F6 + { 0x06F7, 0 }, // 0x06F7 + { 0x06F8, 0 }, // 0x06F8 + { 0x06F9, 0 }, // 0x06F9 + { 0x06FA, 0 }, // 0x06FA + { 0x06FB, 0 }, // 0x06FB + { 0x06FC, 0 }, // 0x06FC + { 0x06FD, 0 }, // 0x06FD + { 0x06FE, 0 }, // 0x06FE + { 0x06FF, 0 } // 0x06FF +}; + +// the arabicUnicodeMapping does not work for U+0649 ALEF MAKSURA, this table does +static const ushort alefMaksura[4] = {0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9}; + +// this is a bit tricky. Alef always binds to the right, so the second parameter descibing the shape +// of the lam can be either initial of medial. So initial maps to the isolated form of the ligature, +// medial to the final form +static const ushort arabicUnicodeLamAlefMapping[6][4] = { + { 0xfffd, 0xfffd, 0xfef5, 0xfef6 }, // 0x622 R Alef with Madda above + { 0xfffd, 0xfffd, 0xfef7, 0xfef8 }, // 0x623 R Alef with Hamza above + { 0xfffd, 0xfffd, 0xfffd, 0xfffd }, // 0x624 // Just to fill the table ;-) + { 0xfffd, 0xfffd, 0xfef9, 0xfefa }, // 0x625 R Alef with Hamza below + { 0xfffd, 0xfffd, 0xfffd, 0xfffd }, // 0x626 // Just to fill the table ;-) + { 0xfffd, 0xfffd, 0xfefb, 0xfefc } // 0x627 R Alef +}; + +static inline int getShape(uchar cell, int shape) +{ + // the arabicUnicodeMapping does not work for U+0649 ALEF MAKSURA, handle this here + uint ch = (cell != 0x49) + ? (shape ? arabicUnicodeMapping[cell][0] + shape : 0x600+cell) + : alefMaksura[shape] ; + return ch; +} + + +/* + Two small helper functions for arabic shaping. +*/ +static inline const QChar prevChar(const QString *str, int pos) +{ + //qDebug("leftChar: pos=%d", pos); + pos--; + const QChar *ch = str->unicode() + pos; + while(pos > -1) { + if(::category(*ch) != QChar::Mark_NonSpacing) + return *ch; + pos--; + ch--; + } + return QChar::replacement; +} + +static inline const QChar nextChar(const QString *str, int pos) +{ + pos++; + int len = str->length(); + const QChar *ch = str->unicode() + pos; + while(pos < len) { + //qDebug("rightChar: %d isLetter=%d, joining=%d", pos, ch.isLetter(), ch.joining()); + if(::category(*ch) != QChar::Mark_NonSpacing) + return *ch; + // assume it's a transparent char, this might not be 100% correct + pos++; + ch++; + } + return QChar::replacement; +} + + +static void shapedString(const QString *uc, int from, int len, QChar *shapeBuffer, int *shapedLength, + bool reverse, GlyphAttributes *attributes, unsigned short *logClusters) +{ + Q_ASSERT((int)uc->length() >= from + len); + + if(len == 0) { + *shapedLength = 0; + return; + } + + QVarLengthArray<QArabicProperties> props(len + 2); + QArabicProperties *properties = props.data(); + int f = from; + int l = len; + if (from > 0) { + --f; + ++l; + ++properties; + } + if (f + l < (int)uc->length()) { + ++l; + } + getArabicProperties((const unsigned short *)(uc->unicode()+f), l, props.data()); + + const QChar *ch = uc->unicode() + from; + QChar *data = shapeBuffer; + int clusterStart = 0; + + for (int i = 0; i < len; i++) { + uchar r = ch->row(); + int gpos = data - shapeBuffer; + + if (r != 0x06) { + if (r == 0x20) { + uchar c = ch->cell(); + if (c == 0x0c || c == 0x0d) + // remove ZWJ and ZWNJ + goto skip; + } + if (reverse) + *data = mirroredChar(*ch); + else + *data = *ch; + } else { + uchar c = ch->cell(); + int pos = i + from; + int shape = properties[i].shape; +// qDebug("mapping U+%x to shape %d glyph=0x%x", ch->unicode(), shape, getShape(c, shape)); + // take care of lam-alef ligatures (lam right of alef) + ushort map; + switch (c) { + case 0x44: { // lam + const QChar pch = nextChar(uc, pos); + if (pch.row() == 0x06) { + switch (pch.cell()) { + case 0x22: + case 0x23: + case 0x25: + case 0x27: +// qDebug(" lam of lam-alef ligature"); + map = arabicUnicodeLamAlefMapping[pch.cell() - 0x22][shape]; + goto next; + default: + break; + } + } + break; + } + case 0x22: // alef with madda + case 0x23: // alef with hamza above + case 0x25: // alef with hamza below + case 0x27: // alef + if (prevChar(uc, pos).unicode() == 0x0644) { + // have a lam alef ligature + //qDebug(" alef of lam-alef ligature"); + goto skip; + } + default: + break; + } + map = getShape(c, shape); + next: + *data = map; + } + // ##### Fixme + //attributes[gpos].zeroWidth = zeroWidth; + if (::category(*ch) == QChar::Mark_NonSpacing) { + attributes[gpos].mark = TRUE; +// qDebug("glyph %d (char %d) is mark!", gpos, i); + } else { + attributes[gpos].mark = FALSE; + clusterStart = data - shapeBuffer; + } + attributes[gpos].clusterStart = !attributes[gpos].mark; + attributes[gpos].combiningClass = combiningClass(*ch); + attributes[gpos].justification = properties[i].justification; +// qDebug("data[%d] = %x (from %x)", gpos, (uint)data->unicode(), ch->unicode()); + data++; + skip: + ch++; + logClusters[i] = clusterStart; + } + *shapedLength = data - shapeBuffer; +} + +#if defined(Q_WS_X11) && !defined(QT_NO_XFTFREETYPE) + +enum { + InitProperty = 0x2, + IsolProperty = 0x4, + FinaProperty = 0x8, + MediProperty = 0x10, + RligProperty = 0x20, + CaltProperty = 0x40, + LigaProperty = 0x80, + DligProperty = 0x100, + CswhProperty = 0x200, + MsetProperty = 0x400 +}; + +static const QOpenType::Features arabic_features[] = { + { FT_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty }, + { FT_MAKE_TAG('i', 's', 'o', 'l'), IsolProperty }, + { FT_MAKE_TAG('f', 'i', 'n', 'a'), FinaProperty }, + { FT_MAKE_TAG('m', 'e', 'd', 'i'), MediProperty }, + { FT_MAKE_TAG('i', 'n', 'i', 't'), InitProperty }, + { FT_MAKE_TAG('r', 'l', 'i', 'g'), RligProperty }, + { FT_MAKE_TAG('c', 'a', 'l', 't'), CaltProperty }, + { FT_MAKE_TAG('l', 'i', 'g', 'a'), LigaProperty }, + { FT_MAKE_TAG('d', 'l', 'i', 'g'), DligProperty }, + { FT_MAKE_TAG('c', 's', 'w', 'h'), CswhProperty }, + // mset is used in old Win95 fonts that don't have a 'mark' positioning table. + { FT_MAKE_TAG('m', 's', 'e', 't'), MsetProperty }, + {0, 0} +}; + +static const QOpenType::Features syriac_features[] = { + { FT_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty }, + { FT_MAKE_TAG('i', 's', 'o', 'l'), IsolProperty }, + { FT_MAKE_TAG('f', 'i', 'n', 'a'), FinaProperty }, + { FT_MAKE_TAG('f', 'i', 'n', '2'), FinaProperty }, + { FT_MAKE_TAG('f', 'i', 'n', '3'), FinaProperty }, + { FT_MAKE_TAG('m', 'e', 'd', 'i'), MediProperty }, + { FT_MAKE_TAG('m', 'e', 'd', '2'), MediProperty }, + { FT_MAKE_TAG('i', 'n', 'i', 't'), InitProperty }, + { FT_MAKE_TAG('r', 'l', 'i', 'g'), RligProperty }, + { FT_MAKE_TAG('c', 'a', 'l', 't'), CaltProperty }, + { FT_MAKE_TAG('l', 'i', 'g', 'a'), LigaProperty }, + { FT_MAKE_TAG('d', 'l', 'i', 'g'), DligProperty }, + {0, 0} +}; + +static bool arabicSyriacOpenTypeShape(QOpenType *openType, QShaperItem *item, bool *ot_ok) +{ + *ot_ok = true; + + openType->selectScript(item->script, item->script == QFont::Arabic ? arabic_features : syriac_features); + int nglyphs = item->num_glyphs; + if (item->font->stringToCMap(item->string->unicode()+item->from, item->length, item->glyphs, item->advances, + &item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError) + return FALSE; + heuristicSetGlyphAttributes(item); + + unsigned short *logClusters = item->log_clusters; + const unsigned short *uc = (const unsigned short *)item->string->unicode() + item->from; + + QVarLengthArray<QArabicProperties> props(item->length+2); + QArabicProperties *properties = props.data(); + int f = 0; + int l = item->length; + if (item->from > 0) { + --f; + ++l; + ++properties; + } + if (f + l < (int)item->string->length()) { + ++l; + } + getArabicProperties((const unsigned short *)(uc+f), l, props.data()); + + QVarLengthArray<uint> apply(item->num_glyphs); + + + // Hack to remove ZWJ and ZWNJ from rendered output. + int j = 0; + for (int i = 0; i < item->num_glyphs; i++) { + if (uc[i] == 0x200c || uc[i] == 0x200d) + continue; + item->glyphs[j] = item->glyphs[i]; + item->attributes[j] = item->attributes[i]; + item->advances[j] = item->advances[i]; + item->offsets[j] = item->offsets[i]; + properties[j] = properties[i]; + item->attributes[j].justification = properties[i].justification; + logClusters[i] = logClusters[j]; + ++j; + } + item->num_glyphs = j; + + for (int i = 0; i < item->num_glyphs; i++) { + apply[i] = 0; + + if (properties[i].shape == XIsolated) + apply[i] |= MediProperty|FinaProperty|InitProperty; + else if (properties[i].shape == XMedial) + apply[i] |= IsolProperty|FinaProperty|InitProperty; + else if (properties[i].shape == XFinal) + apply[i] |= IsolProperty|MediProperty|InitProperty; + else if (properties[i].shape == XInitial) + apply[i] |= IsolProperty|MediProperty|FinaProperty; + } + + if (!openType->shape(item, apply.data())) { + *ot_ok = false; + return false; + } + item->num_glyphs = nglyphs; + return openType->positionAndAdd(item); +} + +#endif + +// #### stil missing: identify invalid character combinations +static bool arabic_shape(QShaperItem *item) +{ + Q_ASSERT(item->script == QFont::Arabic); + +#if defined(Q_WS_X11) && !defined(QT_NO_XFTFREETYPE) + QOpenType *openType = item->font->openType(); + + if (openType && openType->supportsScript(QFont::Arabic)) { + bool ot_ok; + if (arabicSyriacOpenTypeShape(openType, item, &ot_ok)) + return true; + if (ot_ok) + return false; + // fall through to the non OT code + } +#endif + + QVarLengthArray<ushort> shapedChars(item->length); + + int slen; + shapedString(item->string, item->from, item->length, (QChar *)shapedChars.data(), &slen, + item->flags & QTextEngine::RightToLeft, + item->attributes, item->log_clusters); + + if (item->font->stringToCMap((QChar *)shapedChars.data(), slen, item->glyphs, item->advances, + &item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError) + return FALSE; + + for (int i = 0; i < slen; ++i) + if (item->attributes[i].mark) + item->advances[i] = 0; + qt_heuristicPosition(item); + return TRUE; +} + +#if defined(Q_WS_X11) +# include "qscriptengine_x11.cpp" +#elif defined(Q_WS_WIN) +# include "qscriptengine_win.cpp" +#elif defined(Q_WS_MAC) +# include "qscriptengine_mac.cpp" +#elif defined(Q_WS_QWS) +# include "qscriptengine_qws.cpp" +#endif |