Browse files

working dynamic runtime, changes to the process start chain, to accom…

…modate multiple runtimes, changes to build scripts CUZ ORDER MATTERS FOR LINKER INVOCATIONS grrr
  • Loading branch information...
1 parent 725bad6 commit 47e1a7fc264b0434a4380f029b999bfb288e9df1 @wolfwood wolfwood committed Oct 8, 2011
View
507 app/d/dynhello/console.d
@@ -0,0 +1,507 @@
+module console;
+
+public import user.console;
+import SysConsole = libos.console;
+
+alias char[] string;
+
+struct Point {
+ uint x;
+ uint y;
+}
+
+class Console {
+static:
+
+ void initialize(ubyte* vidmem) {
+ SysConsole.Console.initialize(vidmem);
+ }
+
+ void resetColor() {
+ SysConsole.Console.resetColor();
+ }
+
+ void forecolor(Color clr) {
+ SysConsole.Console.forecolor = clr;
+ }
+
+ Color forecolor() {
+ return SysConsole.Console.forecolor();
+ }
+
+ void backcolor(Color clr) {
+ SysConsole.Console.backcolor = clr;
+ }
+
+ Color backcolor() {
+ return SysConsole.Console.backcolor();
+ }
+
+ void putString(string foo) {
+ return SysConsole.Console.putString(foo);
+ }
+
+ void putChar(char foo) {
+ return SysConsole.Console.putChar(foo);
+ }
+
+ uint width() {
+ return SysConsole.Console.width();
+ }
+
+ uint height() {
+ return SysConsole.Console.height();
+ }
+
+ void reset() {
+ resetColor();
+ clear();
+ }
+
+ void clear() {
+ SysConsole.Console.clear();
+ }
+
+ Point position() {
+ Point ret;
+ SysConsole.Console.getPosition(ret.x, ret.y);
+ return ret;
+ }
+
+ void position(uint x, uint y) {
+ SysConsole.Console.setPosition(x,y);
+ }
+
+ void scroll(uint numLines) {
+ SysConsole.Console.scroll(numLines);
+ }
+}
+
+private:
+string itoa(long val, uint base = 10) {
+ int intlen;
+ long tmp = val;
+
+ bool negative;
+
+ if (tmp < 0) {
+ negative = true;
+ tmp = -tmp;
+ intlen = 2;
+ }
+ else {
+ negative = false;
+ intlen = 1;
+ }
+
+ while (tmp >= base) {
+ tmp /= base;
+ intlen++;
+ }
+
+ //allocate
+
+ string ret = new char[intlen];
+
+ intlen--;
+
+ if (negative) {
+ tmp = -val;
+ } else {
+ tmp = val;
+ }
+
+ do {
+ uint off = cast(uint)(tmp % base);
+ char replace;
+ if (off < 10) {
+ replace = cast(char)('0' + off);
+ }
+ else if (off < 36) {
+ off -= 10;
+ replace = cast(char)('a' + off);
+ }
+ ret[intlen] = replace;
+ tmp /= base;
+ intlen--;
+ } while (tmp != 0);
+
+
+ if (negative) {
+ ret[intlen] = '-';
+ }
+
+ return ret;
+}
+
+string utoa(ulong val, uint base = 10) {
+ int intlen;
+ ulong tmp = val;
+
+ intlen = 1;
+
+ while (tmp >= base) {
+ tmp /= base;
+ intlen++;
+ }
+
+ //allocate
+ tmp = val;
+
+ string ret = new char[intlen];
+
+ intlen--;
+
+ do {
+ uint off = cast(uint)(tmp % base);
+ char replace;
+ if (off < 10) {
+ replace = cast(char)('0' + off);
+ }
+ else if (off < 36) {
+ off -= 10;
+ replace = cast(char)('a' + off);
+ }
+ ret[intlen] = replace;
+ tmp /= base;
+ intlen--;
+ } while (tmp != 0);
+
+ return ret;
+}
+
+private union intFloat {
+ int l;
+ float f;
+}
+
+private union longDouble {
+ long l;
+ double f;
+}
+
+private union longReal {
+ struct inner {
+ short exp;
+ long frac;
+ }
+
+ inner l;
+ real f;
+}
+
+string ctoa(cfloat val, uint base = 10) {
+ if (val is cfloat.infinity) {
+ return "inf";
+ }
+ else if (val.re !<>= 0.0 && val.im !<>= 0.0) {
+ return "nan";
+ }
+
+ return ftoa(val.re, base) ~ " + " ~ ftoa(val.im, base) ~ "i";
+}
+
+string ctoa(cdouble val, uint base = 10) {
+ if (val is cdouble.infinity) {
+ return "inf";
+ }
+ else if (val.re !<>= 0.0 && val.im !<>= 0.0) {
+ return "nan";
+ }
+
+ return dtoa(val.re, base) ~ " + " ~ ftoa(val.im, base) ~ "i";
+}
+
+string ctoa(creal val, uint base = 10) {
+ if (val is creal.infinity) {
+ return "inf";
+ }
+ else if (val is creal.nan) {
+ return "nan";
+ }
+
+ return rtoa(val.re, base) ~ " + " ~ ftoa(val.im, base) ~ "i";
+}
+
+string ftoa(float val, uint base = 10) {
+ if (val == float.infinity) {
+ return "inf";
+ }
+ else if (val !<>= 0.0) {
+ return "nan";
+ }
+ else if (val == 0.0) {
+ return "0";
+ }
+
+ long mantissa;
+ long intPart;
+ long fracPart;
+
+ short exp;
+
+ intFloat iF;
+ iF.f = val;
+
+ // Conform to the IEEE standard
+ exp = ((iF.l >> 23) & 0xff) - 127;
+ mantissa = (iF.l & 0x7fffff) | 0x800000;
+ fracPart = 0;
+ intPart = 0;
+
+ if (exp >= 31) {
+ return "0";
+ }
+ else if (exp < -23) {
+ return "0";
+ }
+ else if (exp >= 23) {
+ intPart = mantissa << (exp - 23);
+ }
+ else if (exp >= 0) {
+ intPart = mantissa >> (23 - exp);
+ fracPart = (mantissa << (exp + 1)) & 0xffffff;
+ }
+ else { // exp < 0
+ fracPart = (mantissa & 0xffffff) >> (-(exp + 1));
+ }
+
+ string ret;
+ if (iF.l < 0) {
+ ret = "-";
+ }
+
+ ret ~= itoa(intPart, base);
+ ret ~= ".";
+ for (uint k; k < 7; k++) {
+ fracPart *= 10;
+ ret ~= cast(char)((fracPart >> 24) + '0');
+ fracPart &= 0xffffff;
+ }
+
+ // round last digit
+ bool roundUp = (ret[$-1] >= '5');
+ ret = ret[0..$-1];
+
+ while (roundUp) {
+ if (ret.length == 0) {
+ return "0";
+ }
+ else if (ret[$-1] == '.' || ret[$-1] == '9') {
+ ret = ret[0..$-1];
+ continue;
+ }
+ ret[$-1]++;
+ break;
+ }
+
+ // get rid of useless zeroes (and point if necessary)
+ foreach_reverse(uint i, chr; ret) {
+ if (chr != '0' && chr != '.') {
+ ret = ret[0..i+1];
+ break;
+ }
+ else if (chr == '.') {
+ ret = ret[0..i];
+ break;
+ }
+ }
+
+ return ret;
+}
+
+string dtoa(double val, uint base = 10, bool doIntPart = true) {
+ if (val is double.infinity) {
+ return "inf";
+ }
+ else if (val !<>= 0.0) {
+ return "nan";
+ }
+ else if (val == 0.0) {
+ return "0";
+ }
+
+ long mantissa;
+ long intPart;
+ long fracPart;
+
+ long exp;
+
+ longDouble iF;
+ iF.f = val;
+
+ // Conform to the IEEE standard
+ exp = ((iF.l >> 52) & 0x7ff);
+ if (exp == 0) {
+ return "0";
+ }
+ else if (exp == 0x7ff) {
+ return "inf";
+ }
+ exp -= 1023;
+
+ mantissa = (iF.l & 0xfffffffffffff) | 0x10000000000000;
+ fracPart = 0;
+ intPart = 0;
+
+ if (exp < -52) {
+ return "0";
+ }
+ else if (exp >= 52) {
+ intPart = mantissa << (exp - 52);
+ }
+ else if (exp >= 0) {
+ intPart = mantissa >> (52 - exp);
+ fracPart = (mantissa << (exp + 1)) & 0x1fffffffffffff;
+ }
+ else { // exp < 0
+ fracPart = (mantissa & 0x1fffffffffffff) >> (-(exp + 1));
+ }
+
+ string ret;
+ if (iF.l < 0) {
+ ret = "-";
+ }
+
+ if (doIntPart) {
+ ret ~= itoa(intPart, base);
+ ret ~= ".";
+ }
+
+ for (uint k; k < 7; k++) {
+ fracPart *= 10;
+ ret ~= cast(char)((fracPart >> 53) + '0');
+ fracPart &= 0x1fffffffffffff;
+ }
+
+ // round last digit
+ bool roundUp = (ret[$-1] >= '5');
+ ret = ret[0..$-1];
+
+ while (roundUp) {
+ if (ret.length == 0) {
+ return "0";
+ }
+ else if (ret[$-1] == '.' || ret[$-1] == '9') {
+ ret = ret[0..$-1];
+ continue;
+ }
+ ret[$-1]++;
+ break;
+ }
+
+ // get rid of useless zeroes (and point if necessary)
+ foreach_reverse(uint i, chr; ret) {
+ if (chr != '0' && chr != '.') {
+ ret = ret[0..i+1];
+ break;
+ }
+ else if (chr == '.') {
+ ret = ret[0..i];
+ break;
+ }
+ }
+
+ return ret;
+}
+
+string rtoa(real val, uint base = 10) {
+ static if (real.sizeof == 10) {
+ // Support for 80-bit extended precision
+
+ if (val is real.infinity) {
+ return "inf";
+ }
+ else if (val !<>= 0.0) {
+ return "nan";
+ }
+ else if (val == 0.0) {
+ return "0";
+ }
+
+ long mantissa;
+ long intPart;
+ long fracPart;
+
+ long exp;
+
+ longReal iF;
+ iF.f = val;
+
+ // Conform to the IEEE standard
+ exp = iF.l.exp & 0x7fff;
+ if (exp == 0) {
+ return "0";
+ }
+ else if (exp == 32767) {
+ return "inf";
+ }
+ exp -= 16383;
+
+ mantissa = iF.l.frac;
+ fracPart = 0;
+ intPart = 0;
+
+ if (exp >= 31) {
+ return "0";
+ }
+ else if (exp < -64) {
+ return "0";
+ }
+ else if (exp >= 64) {
+ intPart = mantissa << (exp - 64);
+ }
+ else if (exp >= 0) {
+ intPart = mantissa >> (64 - exp);
+ fracPart = mantissa << (exp + 1);
+ }
+ else { // exp < 0
+ fracPart = mantissa >> (-(exp + 1));
+ }
+
+ string ret;
+ if (iF.l.exp < 0) {
+ ret = "-";
+ }
+
+ ret ~= itoa(intPart, base);
+ ret ~= ".";
+ for (uint k; k < 7; k++) {
+ fracPart *= 10;
+ ret ~= cast(char)((fracPart >> 64) + '0');
+ }
+
+ // round last digit
+ bool roundUp = (ret[$-1] >= '5');
+ ret = ret[0..$-1];
+
+ while (roundUp) {
+ if (ret.length == 0) {
+ return "0";
+ }
+ else if (ret[$-1] == '.' || ret[$-1] == '9') {
+ ret = ret[0..$-1];
+ continue;
+ }
+ ret[$-1]++;
+ break;
+ }
+
+ // get rid of useless zeroes (and point if necessary)
+ foreach_reverse(uint i, chr; ret) {
+ if (chr != '0' && chr != '.') {
+ ret = ret[0..i+1];
+ break;
+ }
+ else if (chr == '.') {
+ ret = ret[0..i];
+ break;
+ }
+ }
+
+ return ret;
+ }
+ else {
+ return ftoa(cast(double)val, base);
+ }
+}
View
38 app/d/dynhello/dsss.conf
@@ -0,0 +1,38 @@
+name = xomb
+
+CC = x86_64-pc-elf-gcc
+CFLAGS = -nostdlib -nodefaultlibs -g -DUSE_ASSERT -mcmodel=kernel
+
+[*]
+buildflags=-dc=ldc-xomb
+
+[hello.d]
+
+buildflags=-dc=ldc-xomb -I../../.. -I../../../runtimes -I../../../runtimes/mindrt
+
+prebuild= \
+\
+mkdir -p dsss_imports;\
+mkdir -p dsss_objs;\
+mkdir -p dsss_objs/G;\
+mkdir -p dsss_objs/O;\
+
+target = hello
+
+# we will need some post build foo to link to the runtime
+
+postbuild = \
+\
+echo ; \
+echo Creating Application Executable; \
+echo '--> hello';\
+ld -nostdlib -nodefaultlibs -T../../build/elf.ld -o hello `ls dsss_objs/O/*.o` ../../../runtimes/mindrt/drt0.a ../../../runtimes/mindrt/libd.a ../../../runtimes/djehuty/djrt.a;\
+\
+echo ;\
+echo Creating App Symbol File; \
+echo '--> hello.sym';\
+../../../build/mkldsym.sh hello hello.sym; \
+\
+echo ;\
+echo Copying;\
+cp hello ../../../build/root/binaries/dynhello
View
46 app/d/dynhello/hello.d
@@ -0,0 +1,46 @@
+/* xsh.d
+
+ XOmB Native Shell
+
+*/
+
+module hello;
+
+import console;
+
+// requied by entry.
+import libos.keyboard;
+import libos.libdeepmajik.threadscheduler;
+
+void main(char[][] argv) {
+ Console.backcolor = Color.Black;
+ Console.forecolor = Color.Green;
+
+ char[] string = "\nHello, and Welcome to XOmB\n";
+
+ foreach(str; argv){
+ string ~= str;
+ string ~= "\n";
+ }
+
+
+ string ~= "-=-=-=-=-=-=-=-\n\n";
+
+ Console.backcolor = Color.Black;
+ Console.forecolor = Color.LightGray;
+
+
+ Console.putString(string ~ "\n");
+
+
+ char[][char[]] dictionary;
+
+ dictionary["foo"] = "bar";
+ dictionary["zig"] = "zag";
+ dictionary["a"] = "b";
+ dictionary["c"] = "d";
+
+ foreach (word; dictionary.keys){
+ Console.putString(word ~ " " ~ dictionary[word] ~"\n");
+ }
+}
View
2 app/d/hello/dsss.conf
@@ -26,7 +26,7 @@ postbuild = \
echo ; \
echo Creating Application Executable; \
echo '--> hello';\
-ld -nostdlib -nodefaultlibs -T../../build/elf.ld -o hello `ls dsss_objs/O/*.o` ../../../runtimes/mindrt/mindrt.a ../../../runtimes/mindrt/drt0.a ../../../runtimes/mindrt/libd.a;\
+ld -nostdlib -nodefaultlibs -T../../build/elf.ld -o hello `ls dsss_objs/O/*.o` ../../../runtimes/mindrt/drt0.a ../../../runtimes/mindrt/mindrt.a ../../../runtimes/mindrt/libd.a;\
\
echo ;\
echo Creating App Symbol File; \
View
4 app/d/init/dsss.conf
@@ -26,8 +26,8 @@ postbuild = \
echo ; \
echo Creating Application Executable; \
echo '--> init';\
-ld -nostdlib -nodefaultlibs -T../../build/flat.ld -o init `ls dsss_objs/O/*.o` ../../../runtimes/mindrt/mindrt.a ../../../runtimes/mindrt/drt0.a;\
-ld -nostdlib -nodefaultlibs -T../../build/skinny-elf.ld -o init-elf `ls dsss_objs/O/*.o` ../../../runtimes/mindrt/mindrt.a ../../../runtimes/mindrt/drt0.a;\
+ld -nostdlib -nodefaultlibs -T../../build/flat.ld -o init `ls dsss_objs/O/*.o` ../../../runtimes/mindrt/drt0.a ../../../runtimes/mindrt/mindrt.a;\
+ld -nostdlib -nodefaultlibs -T../../build/skinny-elf.ld -o init-elf `ls dsss_objs/O/*.o` ../../../runtimes/mindrt/drt0.a ../../../runtimes/mindrt/mindrt.a;\
\
echo ;\
echo Copying;\
View
2 app/d/posix/dsss.conf
@@ -27,7 +27,7 @@ postbuild = \
echo ; \
echo Creating Application Executable; \
echo '--> posix';\
-ld -nostdlib -nodefaultlibs -T../../build/elf.ld -o posix `ls dsss_objs/O/*.o` ../../../runtimes/mindrt/mindrt.a ../../../runtimes/mindrt/drt0.a;\
+ld -nostdlib -nodefaultlibs -T../../build/elf.ld -o posix `ls dsss_objs/O/*.o` ../../../runtimes/mindrt/drt0.a ../../../runtimes/mindrt/mindrt.a;\
\
echo ;\
echo Creating App Symbol File; \
View
2 app/d/xsh/dsss.conf
@@ -27,7 +27,7 @@ postbuild = \
echo ; \
echo Creating Application Executable; \
echo '--> xsh';\
-ld -nostdlib -nodefaultlibs -T../../build/elf.ld -o xsh `ls dsss_objs/O/*.o` ../../../runtimes/mindrt/mindrt.a ../../../runtimes/mindrt/drt0.a ../../../runtimes/mindrt/libd.a;\
+ld -nostdlib -nodefaultlibs -T../../build/elf.ld -o xsh `ls dsss_objs/O/*.o` ../../../runtimes/mindrt/drt0.a ../../../runtimes/mindrt/mindrt.a ../../../runtimes/mindrt/libd.a;\
\
echo ;\
echo Creating App Symbol File; \
View
12 runtimes/djehuty/Makefile
@@ -0,0 +1,12 @@
+DFLAGS = -I../. -I../../. -O2 -release -od=dsss_objs/O -oq -d-version=PlatformXOmB
+
+djrt.a: *.d typeinfos/*.d binding/*.d core/*.d data/*.d synch/*.d
+ mkdir -p dsss_objs/G;
+ mkdir -p dsss_objs/O;
+ ldc -nodefaultlib ${DFLAGS} -c *.d
+ ldc -nodefaultlib ${DFLAGS} -c typeinfos/*.d
+ ldc -nodefaultlib ${DFLAGS} -c binding/*.d core/*.d data/*.d synch/*.d
+ ar rcs djrt.a dsss_objs/O/*.o
+
+clean:
+ rm djrt.a
View
14 runtimes/djehuty/apply.d
@@ -12,7 +12,7 @@ import runtime.common;
import core.unicode;
-import io.console;
+//import io.console;
extern(D) typedef int delegate(void*) apply_dg_t;
extern(D) typedef int delegate(size_t*, void*) apply_dg2_t;
@@ -27,7 +27,7 @@ private {
foreach(size_t idx, ref chr; array) {
// Call the loop body of the foreach, passing the pointer to the character
result = loopBody(&array[idx]);
-
+
// It will return nonzero when it breaks out of the loop early
if (result) {
return result;
@@ -45,7 +45,7 @@ private {
foreach(size_t idx, ref chr; array) {
// Call the loop body of the foreach, passing the pointer to the character and index
result = loopBody(&idx, &array[idx]);
-
+
// It will return nonzero when it breaks out of the loop early
if (result) {
return result;
@@ -62,7 +62,7 @@ private {
foreach_reverse(size_t idx, ref chr; array) {
// Call the loop body of the foreach, passing the pointer to the character and index
result = loopBody(&array[idx]);
-
+
// It will return nonzero when it breaks out of the loop early
if (result) {
return result;
@@ -78,7 +78,7 @@ private {
foreach_reverse(size_t idx, ref chr; array) {
// Call the loop body of the foreach, passing the pointer to the character and index
result = loopBody(&idx, &array[idx]);
-
+
// It will return nonzero when it breaks out of the loop early
if (result) {
return result;
@@ -250,14 +250,14 @@ int _aApplyRwd2(wchar[] input, apply_dg2_t loopBody) {
}
// Description: This runtime function will decode a UTF32 string into char
-// elements. Used with a foreach_reverse loop of the form:
+// elements. Used with a foreach_reverse loop of the form:
// foreach_reverse(i, char ; dchar[]).
int _aApplyRdc2(dchar[] input, apply_dg2_t loopBody) {
mixin(_apply!(char, indexedApplyReverseCode));
}
// Description: This runtime function will decode a UTF32 string into wchar
-// elements. Used with a foreach_reverse loop of the form:
+// elements. Used with a foreach_reverse loop of the form:
// foreach_reverse(i, wchar ; dchar[]).
int _aApplyRdw2(dchar[] input, apply_dg2_t loopBody) {
mixin(_apply!(wchar, indexedApplyReverseCode));
View
20 runtimes/djehuty/array.d
@@ -13,9 +13,9 @@ import core.unicode;
import data.iterable;
import runtime.common;
-import math.random;
+//import math.random;
-import core.util;
+//import core.util;
// Arrays in D are represented as such:
@@ -132,10 +132,10 @@ ubyte[] _adSort(ubyte[] array, TypeInfo ti) {
}
// Special quicksort implementation
-private void _qsort(ubyte[] array, size_t size, TypeInfo ti, Random rnd = null) {
- if (rnd is null) {
+private void _qsort(ubyte[] array, size_t size, TypeInfo ti/*, Random rnd = null*/) {
+ /*if (rnd is null) {
rnd = new Random();
- }
+ }*/
// Base case
if ((array.length/size) < 2) {
@@ -144,7 +144,7 @@ private void _qsort(ubyte[] array, size_t size, TypeInfo ti, Random rnd = null)
// Selecting a pivot
size_t length = array.length / size;
- size_t element = cast(size_t)rnd.nextLong(length);
+ size_t element = 0; //cast(size_t)rnd.nextLong(length);
element *= size;
//Console.putln("pivot: ", element/size, " array.length: ", array.length/size);
@@ -205,8 +205,8 @@ private void _qsort(ubyte[] array, size_t size, TypeInfo ti, Random rnd = null)
}
}
- _qsort(array[0..element], size, ti, rnd);
- _qsort(array[element+size..$], size, ti, rnd);
+ _qsort(array[0..element], size, ti/*, rnd*/);
+ _qsort(array[element+size..$], size, ti/*, rnd*/);
}//*/
// Description: This runtime function sorts a char array and is invoked with
@@ -311,7 +311,7 @@ ubyte[] _d_arraycast(size_t toElementSize, size_t fromElementSize, ubyte[] array
// Error
throw new Exception("Array cast misalignment");
}
-
+
size_t newLength = numbytes / toElementSize;
// Return the updated array length
@@ -346,7 +346,7 @@ void _d_array_slice_copy(ubyte* dst, size_t dstLength, ubyte* src, size_t srcLen
if (dstLength != srcLength) {
throw new Exception("Length mismatch for array copy");
}
-
+
if (dst + dstLength > src && src + srcLength > dst) {
// Overlapping copy
throw new Exception("Array copy overlaps");
View
10 runtimes/djehuty/assocarray.d
@@ -12,8 +12,8 @@ import runtime.gc;
import synch.atomic;
-import binding.c;
-import io.console;
+//import binding.c;
+//import io.console;
extern(C):
@@ -242,7 +242,7 @@ ubyte[] _aaKeys(ref AssocArray aa, size_t keysize) {
ubyte[] ret;
foreach(bucket; aa.buckets) {
if (bucket.usedCount == 0) {
- continue;
+ continue;
}
foreach(entry; bucket.entries) {
@@ -266,7 +266,7 @@ ubyte[] _aaValues(ref AssocArray aa, size_t keysize, size_t valuesize) {
ubyte[] ret;
foreach(bucket; aa.buckets) {
if (bucket.usedCount == 0) {
- continue;
+ continue;
}
foreach(entry; bucket.entries) {
if (entry.key !is null) {
@@ -302,7 +302,7 @@ AssocArray* _aaRehash(ref AssocArray* aa, TypeInfo keyti) {
foreach(ref newElement; aa.buckets[newBucketIndex].entries) {
if (newElement.key is null) {
newElement = element;
-
+
// We found one, break
// Otherwise, we end up moving element twice :P
break;
View
283 runtimes/djehuty/binding/c.d
@@ -0,0 +1,283 @@
+/*
+ * c.d
+ *
+ * This module binds the C language to D.
+ *
+ * Author: Dave Wilkinson
+ * Originated: July 7th, 2009
+ *
+ */
+
+module binding.c;
+
+/* C long types */
+version(PlatformWindows) {
+}
+else {
+ pragma(lib, `"c"`);
+}
+
+version(GNU) {
+ import gcc.builtins;
+ alias __builtin_Clong Clong_t;
+ alias __builtin_Culong Culong_t;
+}
+else version(X86_64) {
+ alias long Clong_t;
+ alias ulong Culong_t;
+}
+else {
+ alias int Clong_t;
+ alias uint Culong_t;
+}
+
+/* stdarg */
+
+version(GNU) {
+ private import std.c.stdarg;
+}
+else version(LDC) {
+ private import ldc.cstdarg;
+}
+else {
+ private import dmd.cstdarg;
+}
+
+alias va_list Cva_list;
+alias va_start Cva_start;
+alias va_end Cva_end;
+
+/* stdout */
+
+align(1) struct _iobuf {
+ version( Win32 ) {
+ char* _ptr;
+ int _cnt;
+ char* _base;
+ int _flag;
+ int _file;
+ int _charbuf;
+ int _bufsiz;
+ int __tmpnum;
+ }
+ else version( linux ) {
+ char* _read_ptr;
+ char* _read_end;
+ char* _read_base;
+ char* _write_base;
+ char* _write_ptr;
+ char* _write_end;
+ char* _buf_base;
+ char* _buf_end;
+ char* _save_base;
+ char* _backup_base;
+ char* _save_end;
+ void* _markers;
+ _iobuf* _chain;
+ int _fileno;
+ int _blksize;
+ int _old_offset;
+ ushort _cur_column;
+ byte _vtable_offset;
+ char[1] _shortbuf;
+ void* _lock;
+ }
+ else version( darwin ) {
+ ubyte* _p;
+ int _r;
+ int _w;
+ short _flags;
+ short _file;
+ __sbuf _bf;
+ int _lbfsize;
+
+ int* function(void*) _close;
+ int* function(void*, char*, int) _read;
+ fpos_t* function(void*, fpos_t, int) _seek;
+ int* function(void*, char *, int) _write;
+
+ __sbuf _ub;
+ __sFILEX* _extra;
+ int _ur;
+
+ ubyte[3] _ubuf;
+ ubyte[1] _nbuf;
+
+ __sbuf _lb;
+
+ int _blksize;
+ fpos_t _offset;
+ }
+ else version( freebsd ) {
+ ubyte* _p;
+ int _r;
+ int _w;
+ short _flags;
+ short _file;
+ __sbuf _bf;
+ int _lbfsize;
+
+ void* function() _cookie;
+ int* function(void*) _close;
+ int* function(void*, char*, int) _read;
+ fpos_t* function(void*, fpos_t, int) _seek;
+ int* function(void*, char *, int) _write;
+
+ __sbuf _ub;
+ __sFILEX* _extra;
+ int _ur;
+
+ ubyte[3] _ubuf;
+ ubyte[1] _nbuf;
+
+ __sbuf _lb;
+
+ int _blksize;
+ fpos_t _offset;
+ }
+ else version( solaris ) {
+ // From OpenSolaris <ast/sfio_s.h>
+ ubyte* _next; /* next position to read/write from */
+ ubyte* _endw; /* end of write buffer */
+ ubyte* _endr; /* end of read buffer */
+ ubyte* _endb; /* end of buffer */
+ _iobuf* _push; /* the stream that was pushed on */
+ ushort _flags; /* type of stream */
+ short _file; /* file descriptor */
+ ubyte* _data; /* base of data buffer */
+ ptrdiff_t _size; /* buffer size */
+ ptrdiff_t _val; /* values or string lengths */
+
+ // #ifdef _SFIO_PRIVATE
+ // .. I don't think we really need this in D
+ // #endif
+ }
+ else {
+ static assert( false, "Platform not supported." );
+ }
+}
+
+const int _NFILE = 60;
+alias _iobuf FILE;
+alias int fpos_t;
+
+version(Win32) {
+ extern(C) extern FILE[_NFILE] _iob;
+ FILE* stdin = &_iob[0];
+ FILE* stdout = &_iob[1];
+ FILE* stderr = &_iob[2];
+}
+else version(linux) {
+ extern(C) extern FILE* stdin;
+ extern(C) extern FILE* stdout;
+ extern(C) extern FILE* stderr;
+}
+else version(darwin) {
+ extern(C) extern FILE* __stdinp;
+ extern(C) extern FILE* __stdoutp;
+ extern(C) extern FILE* __stderrp;
+
+ alias __stdinp stdin;
+ alias __stdoutp stdout;
+ alias __stderrp stderr;
+}
+else version(freebsd) {
+ extern(C) extern FILE[3] __sF;
+
+ FILE* stdin = &__sF[0];
+ FILE* stdout = &__sF[1];
+ FILE* stderr = &__sF[2];
+}
+else version(solaris) {
+ extern(C) extern FILE[_NFILE] __iob;
+
+ FILE* stdin = &__iob[0];
+ FILE* stdout = &__iob[1];
+ FILE* stderr = &__iob[2];
+}
+else {
+ static assert(false, "Platform not supported.");
+}
+
+// wchar_t
+version(Win32) {
+ alias ushort wchar_t;
+}
+else {
+ alias uint wchar_t;
+}
+
+extern(C) FILE[_NFILE]* _imp__iob;
+
+ //public import std.c.stdarg;
+ //public import std.c.stdio;
+
+extern(C) int printf(char *,...);
+
+const int EOF = -1;
+const int FOPEN_MAX = 16;
+const int FILENAME_MAX = 4095;
+const int TMP_MAX = 238328;
+const int L_tmpnam = 20;
+
+enum { SEEK_SET, SEEK_CUR, SEEK_END }
+
+extern(C):
+
+// Standard C
+
+void exit(int);
+
+int system(char*);
+
+char * tmpnam(char *); ///
+FILE * fopen(char *,char *); ///
+FILE * _fsopen(char *,char *,int ); ///
+FILE * freopen(char *,char *,FILE *); ///
+int fseek(FILE *,Clong_t,int); ///
+Clong_t ftell(FILE *); ///
+char * fgets(char *,int,FILE *); ///
+int fgetc(FILE *); ///
+int _fgetchar(); ///
+int fflush(FILE *); ///
+int fclose(FILE *); ///
+int fputs(char *,FILE *); ///
+char * gets(char *); ///
+int fputc(int,FILE *); ///
+int _fputchar(int); ///
+int puts(char *); ///
+int ungetc(int,FILE *); ///
+size_t fread(void *,size_t,size_t,FILE *); ///
+size_t fwrite(void *,size_t,size_t,FILE *); ///
+int fprintf(FILE *,char *,...); ///
+int vfprintf(FILE *,char *,Cva_list); ///
+int vprintf(char *,Cva_list); ///
+int sprintf(char *,char *,...); ///
+int vsprintf(char *,char *,Cva_list); ///
+int scanf(char *,...); ///
+int fscanf(FILE *,char *,...); ///
+int sscanf(char *,char *,...); ///
+void setbuf(FILE *,char *); ///
+int setvbuf(FILE *,char *,int,size_t); ///
+int remove(char *); ///
+int rename(char *,char *); ///
+void perror(char *); ///
+int fgetpos(FILE *,fpos_t *); ///
+int fsetpos(FILE *,fpos_t *); ///
+FILE * tmpfile(); ///
+int _rmtmp();
+int _fillbuf(FILE *);
+int _flushbu(int, FILE *);
+
+int getw(FILE *FHdl); ///
+int putw(int Word, FILE *FilePtr); ///
+
+int getchar(); ///
+int putchar(int c); ///
+int getc(FILE *fp); ///
+int putc(int c,FILE *fp); ///
+
+void* malloc(size_t len);
+void* realloc(void* ptr, size_t len);
+void* calloc(size_t len);
+void free(void* ptr);
View
58 runtimes/djehuty/core/error.d
@@ -0,0 +1,58 @@
+/*
+ * error.d
+ *
+ * This module implements the Error objects useable by the system.
+ * This objects are for irrecoverable failures.
+ *
+ * Originated: May 8th, 2010
+ *
+ */
+
+module core.error;
+
+import core.exception;
+
+// Description: This is for non irrecoverable failure.
+class Error : Exception {
+ this(string msg, string file = "", ulong line = 0) {
+ super(msg, file, line);
+ }
+}
+
+abstract class RuntimeError : Error {
+ this(string msg, string file, ulong line){
+ super(msg,file,line);
+ }
+
+static:
+
+ // Description: This Error is thrown when assertions fail.
+ class Assert : RuntimeError {
+ this(string msg, string file, ulong line) {
+ super("Assertion `" ~ msg ~ "` failed", file, line);
+ }
+
+ this(string file, ulong line) {
+ super("Assertion failed",file,line);
+ }
+ }
+
+ class CyclicDependency : RuntimeError {
+ this(string moduleNameA, string moduleNameB) {
+ super("Cyclic Dependency detected between " ~ moduleNameA ~ " and " ~ moduleNameB, "", 0);
+ }
+ }
+
+ // Description: This Error is thrown when a switch statement does not have a default and there is no case available.
+ class NoDefaultCase : RuntimeError {
+ this(string file, ulong line) {
+ super("Switch has no default",file,line);
+ }
+ }
+
+ class NoCompare : RuntimeError {
+ this(string className) {
+ super("Class " ~ className ~ " needs an opCmp.", "", 0);
+ }
+ }
+}
View
112 runtimes/djehuty/core/exception.d
@@ -0,0 +1,112 @@
+/*
+ * exception.d
+ *
+ * This module defines common exceptions.
+ *
+ * Author: Dave Wilkinson
+ * Originated: August 20th, 2009
+ *
+ */
+
+module core.exception;
+
+class Exception : Object {
+private:
+ char[] _msg;
+ char[] _file;
+ ulong _line;
+
+public:
+ this(string msg, string file = "", ulong line = 0) {
+ _msg = msg.dup;
+ _file = file.dup;
+ _line = line;
+ }
+
+ string name() {
+ return this.classinfo.name.dup;
+ }
+
+ string msg() {
+ return _msg.dup;
+ }
+
+ string file() {
+ return _file;
+ }
+
+ ulong line() {
+ return _line;
+ }
+
+ string toString() {
+ return this.name() ~ " caught at " ~ _file ~ "@" ~ ": " ~ _msg;
+ }
+}
+
+// Exceptions for IO
+abstract class IOException : Exception {
+ this(string msg) {
+ super(msg);
+ }
+
+static:
+
+ class CreationFailure : IOException {
+ this(string filename) {
+ super(filename ~ " could not be created.");
+ }
+ }
+
+ class ExistenceFailure : IOException {
+ this(string filename) {
+ super(filename ~ " not found.");
+ }
+ }
+
+ class PermissionFailure : IOException {
+ this(string filename) {
+ super(filename ~ " has the wrong permissions for the operation.");
+ }
+ }
+}
+
+// Exceptions for data structures
+abstract class DataException : Exception {
+ this(string msg, string file = "", ulong line = 0) {
+ super(msg, file, line);
+ }
+
+static:
+
+ class OutOfElements : DataException {
+ this(string objectName) {
+ super("Out of items in " ~ objectName);
+ }
+ }
+
+ class OutOfBounds : DataException {
+ this(string objectName, string file = "", ulong line = 0) {
+ super("Index out of bounds in " ~ objectName, file, line);
+ }
+ }
+
+ class ElementNotFound : DataException {
+ this(string objectName) {
+ super("Element does not exist in " ~ objectName);
+ }
+ }
+}
+
+abstract class MemoryException : Exception {
+ this(string msg) {
+ super(msg);
+ }
+
+static:
+ class OutOfMemory : MemoryException {
+ this() {
+ super("Out of memory");
+ }
+ }
+}
View
1,622 runtimes/djehuty/core/unicode.d
@@ -0,0 +1,1622 @@
+/*
+ * unicode.d
+ *
+ * This module implements unicode functions that were badly needed.
+ *
+ * Author: Dave Wilkinson
+ *
+ */
+
+module core.unicode;
+
+//import core.definitions;
+
+private static const uint halfShift = 10;
+private static const uint halfBase = 0x0010000;
+private static const uint halfMask = 0x3FF;
+
+private const auto UNI_SUR_HIGH_START = 0xD800;
+private const auto UNI_SUR_HIGH_END = 0xDBFF;
+private const auto UNI_SUR_LOW_START = 0xDC00;
+private const auto UNI_SUR_LOW_END = 0xDFFF;
+
+private const auto UNI_REPLACEMENT_CHAR = cast(dchar)0x0000FFFD;
+private const auto UNI_MAX_BMP = cast(dchar)0x0000FFFF;
+private const auto UNI_MAX_UTF16 = cast(dchar)0x0010FFFF;
+private const auto UNI_MAX_UTF32 = cast(dchar)0x7FFFFFFF;
+private const auto UNI_MAX_LEGAL_UTF32 = cast(dchar)0x0010FFFF;
+
+private static const ubyte firstByteMark[7] = [ 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC ];
+
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms.
+ */
+static const char trailingBytesForUTF8[256] = [
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+];
+
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence.
+ */
+static const uint offsetsFromUTF8[6] = [ 0x00000000, 0x00003080, 0x000E2080,
+ 0x03C82080, 0xFA082080, 0x82082080 ];
+
+
+/*
+ * Utility routine to tell whether a sequence of bytes is legal UTF-8.
+ * This must be called with the length pre-determined by the first byte.
+ * If not calling this from ConvertUTF8to*, then the length can be set by:
+ * length = trailingBytesForUTF8[*source]+1;
+ * and the sequence is illegal right away if there aren't that many bytes
+ * available.
+ * If presented with a length > 4, this returns false. The Unicode
+ * definition of UTF-8 goes up to 4-byte sequences.
+ */
+
+private bool isLegalUTF8(char* source, int length) {
+ char a;
+ char *srcptr = source+length;
+ switch (length) {
+ default: return false;
+ /* Everything else falls through when "true"... */
+ case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+ case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+ case 2: if ((a = (*--srcptr)) > 0xBF) return false;
+
+ switch (*source) {
+ /* no fall-through in this inner switch */
+ case 0xE0: if (a < 0xA0) return false; break;
+ case 0xED: if (a > 0x9F) return false; break;
+ case 0xF0: if (a < 0x90) return false; break;
+ case 0xF4: if (a > 0x8F) return false; break;
+ default: if (a < 0x80) return false;
+ }
+
+ case 1: if (*source >= 0x80 && *source < 0xC2) return false;
+ }
+ if (*source > 0xF4) return false;
+ return true;
+}
+
+// For efficiency, we have full
+// control of the buffer length.
+
+struct Unicode {
+static:
+private:
+ // Codepage Encodings
+
+ dchar CP866_to_UTF32[] = [
+
+ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
+ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
+
+ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
+ 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
+ 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
+
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044F,
+ 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E, 0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0,
+
+ ];
+
+public:
+
+ string toUtf8(string src) {
+ return cast(string)src.dup;
+ }
+
+ string toUtf8(wstring src) {
+ if (src.length == 0) {
+ return cast(string)"";
+ }
+
+ char[] container = new char[src.length*4];
+
+ const auto byteMask = 0xBF;
+ const auto byteMark = 0x80;
+
+ wchar* source = src.ptr;
+ wchar* sourceEnd = &src[$-1] + 1;
+
+ char* target = container.ptr;
+ char* targetEnd = &container[$-1] + 1;
+
+ uint bytesToWrite;
+
+ dchar ch;
+
+ while(source !is sourceEnd) {
+
+ ch = *source++;
+
+ // If we have a surrogate pair, we convert to UTF-32
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ dchar ch2 = cast(dchar)*source;
+
+ /* If it's a low surrogate, convert to UTF32. */
+ if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+ ch = ((ch - UNI_SUR_HIGH_START) << 10) + (ch2 - UNI_SUR_LOW_START) + halfBase;
+ source++;
+ }
+ else {
+ // unpaired high surrogate
+ // illegal
+
+ // TODO: do not break, just add a character and continue to produce valid string
+ source--;
+ break;
+ }
+ }
+ else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+ // illegal
+
+ // TODO: do not break, just add a character and continue to produce valid string
+ source--;
+ break;
+ }
+
+ /* Figure out how many bytes the result will require */
+ if (ch < cast(dchar)0x80) {
+ bytesToWrite = 1;
+ }
+ else if (ch < cast(dchar)0x800) {
+ bytesToWrite = 2;
+ }
+ else if (ch < cast(dchar)0x10000) {
+ bytesToWrite = 3;
+ }
+ else if (ch < cast(dchar)0x110000) {
+ bytesToWrite = 4;
+ }
+ else {
+ bytesToWrite = 3;
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+
+ target += bytesToWrite;
+
+ switch (bytesToWrite) { /* note: everything falls through. */
+ case 4: *--target = cast(char)((ch | byteMark) & byteMask); ch >>= 6;
+ case 3: *--target = cast(char)((ch | byteMark) & byteMask); ch >>= 6;
+ case 2: *--target = cast(char)((ch | byteMark) & byteMask); ch >>= 6;
+ case 1: *--target = cast(char)(ch | firstByteMark[bytesToWrite]);
+
+ default: break;
+ }
+ target += bytesToWrite;
+ }
+
+ return container[0..target - container.ptr];
+// return "";
+ }
+
+ string toUtf8(dstring src) {
+ if (src is null || src.length == 0) {
+ return cast(string)"";
+ }
+
+ char[] container = new char[src.length*4];
+
+ const auto byteMask = 0xBF;
+ const auto byteMark = 0x80;
+
+ dchar* source = src.ptr;
+ dchar* sourceEnd = &src[$-1] + 1;
+
+ char* target = container.ptr;
+ char* targetEnd = &container[$-1] + 1;
+
+ uint bytesToWrite;
+
+ dchar ch;
+
+ while (source < sourceEnd) {
+
+ bytesToWrite = 0;
+ ch = *source++;
+
+ /*
+ * Figure out how many bytes the result will require. Turn any
+ * illegally large UTF32 things (> Plane 17) into replacement chars.
+ */
+
+ if (ch < cast(dchar)0x80) {
+ bytesToWrite = 1;
+ }
+ else if (ch < cast(dchar)0x800) {
+ bytesToWrite = 2;
+ }
+ else if (ch < cast(dchar)0x10000) {
+ bytesToWrite = 3;
+ }
+ else if (ch <= UNI_MAX_LEGAL_UTF32) {
+ bytesToWrite = 4;
+ }
+ else {
+ bytesToWrite = 3;
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+
+ target += bytesToWrite;
+
+ switch (bytesToWrite) { /* note: everything falls through. */
+ case 4: *--target = cast(char)((ch | byteMark) & byteMask); ch >>= 6;
+ case 3: *--target = cast(char)((ch | byteMark) & byteMask); ch >>= 6;
+ case 2: *--target = cast(char)((ch | byteMark) & byteMask); ch >>= 6;
+ case 1: *--target = cast(char) (ch | firstByteMark[bytesToWrite]);
+
+ default: break;
+ }
+ target += bytesToWrite;
+ }
+
+ uint targetLen = target - container.ptr;
+
+ string ret = cast(string)container[0..targetLen];
+ return ret;
+ }
+
+ wstring toUtf16(string src) {
+ if (src.length == 0) {
+ return cast(wstring)"";
+ }
+
+ wchar[] container = new wchar[src.length];
+
+ char* source = src.ptr;
+ char* sourceEnd = &src[$-1] + 1;
+
+ wchar* target = container.ptr;
+ wchar* targetEnd = &container[$-1] + 1;
+
+ dchar ch;
+
+ while (source < sourceEnd) {
+ ch = 0;
+
+ ushort extraBytesToRead = trailingBytesForUTF8[*source];
+
+ if (source + extraBytesToRead >= sourceEnd) {
+ // sourceExhausted
+ break;
+ }
+ /* Do this check whether lenient or strict */
+ if (! isLegalUTF8(source, extraBytesToRead+1)) {
+ // sourceIllegal
+ break;
+ }
+
+ switch (extraBytesToRead) {
+ case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+ case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+ case 3: ch += *source++; ch <<= 6;
+ case 2: ch += *source++; ch <<= 6;
+ case 1: ch += *source++; ch <<= 6;
+ case 0: ch += *source++;
+ default: break;
+ }
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ // illegal
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ else {
+ *target++ = cast(wchar)ch; /* normal case */
+ }
+ }
+ else if (ch > UNI_MAX_UTF16) {
+ // illegal
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ else {
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
+
+ ch -= halfBase;
+ *target++ = cast(wchar)((ch >> halfShift) + UNI_SUR_HIGH_START);
+ *target++ = cast(wchar)((ch & halfMask) + UNI_SUR_LOW_START);
+ }
+ }
+
+ return cast(wstring)container[0..target - container.ptr];
+ }
+
+ wstring toUtf16(wstring src) {
+ return cast(wstring)src.dup;
+ }
+
+ wstring toUtf16(dstring src) {
+ if (src.length == 0) {
+ return cast(wstring)"";
+ }
+
+ wchar[] container = new wchar[src.length];
+
+ dchar* source = src.ptr;
+ dchar* sourceEnd = &src[$-1] + 1;
+
+ wchar* target = container.ptr;
+ wchar* targetEnd = &container[$-1] + 1;
+
+ dchar ch;
+
+ while (source < sourceEnd) {
+ ch = *source++;
+ if (ch <= UNI_MAX_BMP) {
+ /* Target is a character <= 0xFFFF */
+
+ /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ else {
+ *target++ = cast(wchar)ch; /* normal case */
+ }
+ }
+ else if (ch > UNI_MAX_LEGAL_UTF32) {
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ else {
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
+ ch -= halfBase;
+ *target++ = cast(wchar)((ch >> halfShift) + UNI_SUR_HIGH_START);
+ *target++ = cast(wchar)((ch & halfMask) + UNI_SUR_LOW_START);
+ }
+ }
+
+ return cast(wstring)container[0..target - container.ptr];
+ }
+
+ dstring toUtf32(string src) {
+ if (src.length == 0) {
+ return cast(dstring)"";
+ }
+
+ dchar[] container = new dchar[src.length];
+
+ char* source = src.ptr;
+ char* sourceEnd = &src[$-1] + 1;
+
+ dchar* target = container.ptr;
+ dchar* targetEnd = &container[$-1] + 1;
+
+ ushort extraBytesToRead;
+
+ dchar ch;
+
+ while (source < sourceEnd) {
+ ch = 0;
+ extraBytesToRead = trailingBytesForUTF8[*source];
+
+ if (source + extraBytesToRead >= sourceEnd) {
+ // sourceExhausted
+ break;
+ }
+
+ if (!isLegalUTF8(source, extraBytesToRead+1)) {
+ // sourceIllegal
+ break;
+ }
+
+ /*
+ * The cases all fall through. See "Note A" below.
+ */
+ switch (extraBytesToRead) {
+ case 5: ch += *source++; ch <<= 6;
+ case 4: ch += *source++; ch <<= 6;
+ case 3: ch += *source++; ch <<= 6;
+ case 2: ch += *source++; ch <<= 6;
+ case 1: ch += *source++; ch <<= 6;
+ case 0: ch += *source++;
+ default: break;
+ }
+
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ if (ch <= UNI_MAX_LEGAL_UTF32) {
+ /*
+ * UTF-16 surrogate values are illegal in UTF-32, and anything
+ * over Plane 17 (> 0x10FFFF) is illegal.
+ */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ else {
+ *target++ = ch;
+ }
+ }
+ else {
+ /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
+ // sourceIllegal
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ }
+
+ return cast(dstring)container[0..target - container.ptr];
+ }
+
+ dstring toUtf32(wstring src) {
+ if (src.length == 0) {
+ return cast(dstring)"";
+ }
+
+ dchar[] container = new dchar[src.length];
+
+ wchar* source = src.ptr;
+ wchar* sourceEnd = &src[$-1] + 1;
+
+ dchar* target = container.ptr;
+ dchar* targetEnd = &container[$-1] + 1;
+
+ dchar ch, ch2;
+
+ while (source < sourceEnd) {
+ ch = *source++;
+ /* If we have a surrogate pair, convert to UTF32 first. */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ /* If the 16 bits following the high surrogate are in the source buffer... */
+ if (source < sourceEnd) {
+ ch2 = *source;
+ /* If it's a low surrogate, convert to UTF32. */
+ if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+ ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + halfBase;
+ source++;
+ }
+ }
+ else {
+ /* We don't have the 16 bits following the high surrogate. */
+ //--source; /* return to the high surrogate */
+ // sourceExhausted
+ break;
+ }
+ }
+
+ *target++ = ch;
+ }
+
+ return cast(dstring)container[0..target - container.ptr];
+ }
+
+ dstring toUtf32(dstring src) {
+ return cast(dstring)src.dup;
+ }
+
+ // character conversions
+ dchar toUtf32Char(string src) {
+ // grab the first character,
+ // convert it to a UTF-32 character,
+ // and then return
+ return toUtf32(src)[0];
+ }
+
+ dchar toUtf32Char(wstring src) {
+ return toUtf32(src)[0];
+ }
+
+ dchar toUtf32Char(dstring src) {
+ // Useless function
+ return src[0];
+ }
+
+ bool isDeadChar(char[] chr) {
+ dchar dchr = toUtf32Char(chr);
+ return isDeadChar(dchr);
+ }
+
+ bool isDeadChar(wchar[] chr) {
+ dchar dchr = toUtf32Char(chr);
+ return isDeadChar(dchr);
+ }
+
+ bool isDeadChar(dchar[] chr) {
+ return isDeadChar(chr[0]);
+ }
+
+ bool isDeadChar(dchar chr) {
+ // if it is a dead character
+ return ((
+ (chr >= 0x300 && chr <= 0x36F) || // Combining Diacritical Marks
+ (chr >= 0x1DC0 && chr <= 0x1DFF) || // Combining Diacritical Marks Supplement
+ (chr >= 0x20D0 && chr <= 0x20FF) || // Combining Diacritical Marks for Symbols
+ (chr >= 0xFE20 && chr <= 0xFE2F) // Combining Half Marks
+ ));
+ }
+
+ // character conversions
+ dchar[] toUtf32Chars(string src) {
+ // grab the first character,
+ // convert it to a UTF-32 character,
+ // and then return
+
+ dchar[] container;
+
+ if (src.length == 0) {
+ return [];
+ }
+
+ char* source = src.ptr;
+ char* sourceEnd = &src[$-1] + 1;
+
+ ushort extraBytesToRead;
+
+ dchar ch;
+
+ while (source < sourceEnd) {
+ ch = 0;
+
+ extraBytesToRead = trailingBytesForUTF8[*source];
+
+ if (source + extraBytesToRead >= sourceEnd) {
+ // sourceExhausted
+ if (container.length == 0) {
+ container ~= UNI_REPLACEMENT_CHAR;
+ }
+ return container;
+ }
+
+ if (!isLegalUTF8(source, extraBytesToRead+1)) {
+ // sourceIllegal
+ if (container.length == 0) {
+ container ~= UNI_REPLACEMENT_CHAR;
+ }
+ return container;
+ }
+
+ /*
+ * The cases all fall through. See "Note A" below.
+ */
+ switch (extraBytesToRead) {
+ case 5: ch += *source++; ch <<= 6;
+ case 4: ch += *source++; ch <<= 6;
+ case 3: ch += *source++; ch <<= 6;
+ case 2: ch += *source++; ch <<= 6;
+ case 1: ch += *source++; ch <<= 6;
+ case 0: ch += *source++;
+ default: break;
+ }
+
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ if (ch <= UNI_MAX_LEGAL_UTF32) {
+ /*
+ * UTF-16 surrogate values are illegal in UTF-32, and anything
+ * over Plane 17 (> 0x10FFFF) is illegal.
+ */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ if (container.length == 0) {
+ container ~= UNI_REPLACEMENT_CHAR;
+ }
+ return container;
+ }
+ // else: found a valid character
+ }
+ else {
+ /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
+ // sourceIllegal
+ if (container.length == 0) {
+ container ~= UNI_REPLACEMENT_CHAR;
+ }
+ return container;
+ }
+
+ if (container.length > 0) {
+ if (!isDeadChar(ch)) {
+ break;
+ }
+ }
+ container ~= ch;
+ }
+
+ return container;
+ }
+
+ dchar[] toUtf32Chars(wstring src) {
+ // grab the first character,
+ // convert it to a UTF-32 character,
+ // and then return
+ dchar[] container;
+
+ if (src.length == 0) {
+ return [];
+ }
+
+ wchar* source = src.ptr;
+ wchar* sourceEnd = &src[$-1] + 1;
+
+ dchar ch, ch2;
+
+ while(source < sourceEnd) {
+ ch = *source++;
+ /* If we have a surrogate pair, convert to UTF32 first. */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ /* If the 16 bits following the high surrogate are in the source buffer... */
+ if (source < sourceEnd) {
+ ch2 = *source;
+ /* If it's a low surrogate, convert to UTF32. */
+ if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+ ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + halfBase;
+ // found a valid character
+ }
+ else {
+ container ~= UNI_REPLACEMENT_CHAR;
+ return container;
+ }
+ }
+ else {
+ /* We don't have the 16 bits following the high surrogate. */
+ // sourceExhausted
+ container ~= UNI_REPLACEMENT_CHAR;
+ return container;
+ }
+ }
+ // else: found a valid character
+ if (container.length > 0) {
+ if (isDeadChar(ch)) {
+ container ~= ch;
+ }
+ else {
+ break;
+ }
+ }
+ else {
+ container ~= ch;
+ }
+ }
+
+ return container;
+ }
+
+ dchar[] toUtf32Chars(dstring src) {
+ dchar[] container;
+
+ if (src.length == 0) {
+ return [];
+ }
+
+ container ~= src[0];
+
+ foreach(s; src[1..$]) {
+ if (isDeadChar(s)) {
+ container ~= s;
+ }
+ else {
+ break;
+ }
+ }
+
+ return cast(dchar[])container;
+ }
+
+ wchar[] toUtf16Chars(dstring src) {
+ wchar[] container;
+
+ if (src.length == 0) {
+ return cast(wchar[])container;
+ }
+
+ dchar* source = src.ptr;
+ dchar* sourceEnd = &src[$-1] + 1;
+
+ dchar ch;
+
+ while (source < sourceEnd) {
+ ch = *source++;
+ if (ch <= UNI_MAX_BMP) {
+ /* Target is a character <= 0xFFFF */
+
+ /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ if (container.length == 0) {
+ container ~= UNI_REPLACEMENT_CHAR;
+ }
+ return cast(wchar[])container;
+ }
+ else {
+ if (container.length > 0 && !isDeadChar(ch)) {
+ break;
+ }
+ container ~= cast(wchar)ch; /* normal case */
+ }
+ }
+ else if (ch > UNI_MAX_LEGAL_UTF32) {
+ if (container.length == 0) {
+ container ~= UNI_REPLACEMENT_CHAR;
+ }
+ return cast(wchar[])container;
+ }
+ else {
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
+ ch -= halfBase;
+ if (container.length > 0 && !isDeadChar(ch)) {
+ break;
+ }
+ container ~= cast(wchar)((ch >> halfShift) + UNI_SUR_HIGH_START);
+ container ~= cast(wchar)((ch & halfMask) + UNI_SUR_LOW_START);
+ }
+ }
+
+ return cast(wchar[])container;
+ }
+
+ char[] toUtf8Chars(dstring src) {
+ char[] container;
+
+ if (src.length == 0) {
+ return [];
+ }
+
+ dchar* source = src.ptr;
+ dchar* sourceEnd = &src[$-1] + 1;
+
+ dchar ch;
+
+ return cast(char[])container;
+ }
+
+ // string length stuffs
+ uint utflen(string src) {
+ if (src.length == 0) {
+ return 0;
+ }
+
+ char* source = src.ptr;
+ char* sourceEnd = &src[$-1] + 1;
+
+ ushort extraBytesToRead;
+
+ dchar ch;
+
+ uint len;
+
+ while (source < sourceEnd) {
+ ch = 0;
+ extraBytesToRead = trailingBytesForUTF8[*source];
+
+ if (source + extraBytesToRead >= sourceEnd) {
+ // sourceExhausted
+ break;
+ }
+
+ if (!isLegalUTF8(source, extraBytesToRead+1)) {
+ // sourceIllegal
+ break;
+ }
+
+ /*
+ * The cases all fall through. See "Note A" below.
+ */
+ switch (extraBytesToRead) {
+ case 5: ch += *source++; ch <<= 6;
+ case 4: ch += *source++; ch <<= 6;
+ case 3: ch += *source++; ch <<= 6;
+ case 2: ch += *source++; ch <<= 6;
+ case 1: ch += *source++; ch <<= 6;
+ case 0: ch += *source++;
+ default: break;
+ }
+
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ if (ch <= UNI_MAX_LEGAL_UTF32) {
+ /*
+ * UTF-16 surrogate values are illegal in UTF-32, and anything
+ * over Plane 17 (> 0x10FFFF) is illegal.
+ */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+ }
+ else {
+ /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
+ // sourceIllegal
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+
+ // if it is not a dead character
+ if (!isDeadChar(ch)) {
+ // it is a valid character
+ len++;
+ }
+ }
+
+ return len;
+ }
+
+ uint utflen(wstring src) {
+ if (src.length == 0) {
+ return 0;
+ }
+
+ wchar* source = src.ptr;
+ wchar* sourceEnd = &src[$-1] + 1;
+
+ uint len = 0;
+
+ dchar ch, ch2;
+
+ while(source < sourceEnd) {
+ ch = *source++;
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ if (source < sourceEnd) {
+ ch2 = *source;
+ if (!(ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)) {
+ // invalid surrogate
+ source--;
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+ else {
+ ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + halfBase;
+ }
+ }
+ else {
+ break;
+ }
+ }
+
+ // if it is not a dead character
+ if (!isDeadChar(ch)) {
+ // it is a valid character
+ len++;
+ }
+ }
+
+ return len;
+ }
+
+ uint utflen(dstring src) {
+ if (src.length == 0) {
+ return 0;
+ }
+
+ uint len;
+
+ for (int i=0; i<src.length; i++) {
+ // if it is not a dead character
+ if (!isDeadChar(src[i])) {
+ // it is a valid character
+ len++;
+ }
+ }
+
+ return len;
+ }
+
+ // Unicode Indices
+
+ uint[] calcIndices(string src) {
+ if (src is null || src == "") {
+ return [];
+ }
+
+ uint[] ret = new uint[src.length];
+
+ char* source = src.ptr;
+ char* sourceEnd = &src[$-1] + 1;
+
+ ushort extraBytesToRead;
+
+ dchar ch;
+
+ uint len;
+ uint i;
+ uint* retPtr = ret.ptr;
+
+ while (source < sourceEnd) {
+ ch = 0;
+ extraBytesToRead = trailingBytesForUTF8[*source];
+
+ if (source + extraBytesToRead >= sourceEnd) {
+ // sourceExhausted
+ break;
+ }
+
+ if (!isLegalUTF8(source, extraBytesToRead+1)) {
+ // sourceIllegal
+ break;
+ }
+
+ /*
+ * The cases all fall through. See "Note A" below.
+ */
+ switch (extraBytesToRead) {
+ case 5: ch += *source++; ch <<= 6;
+ case 4: ch += *source++; ch <<= 6;
+ case 3: ch += *source++; ch <<= 6;
+ case 2: ch += *source++; ch <<= 6;
+ case 1: ch += *source++; ch <<= 6;
+ case 0: ch += *source++;
+ default: break;
+ }
+
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ if (ch <= UNI_MAX_LEGAL_UTF32) {
+ /*
+ * UTF-16 surrogate values are illegal in UTF-32, and anything
+ * over Plane 17 (> 0x10FFFF) is illegal.
+ */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+ }
+ else {
+ /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
+ // sourceIllegal
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+
+ // if it is not a dead character
+ if (!isDeadChar(ch)) {
+ // it is a valid character
+ *retPtr++ = i;
+ len++;
+ }
+
+ i += extraBytesToRead+1;
+ }
+
+ return ret[0..len];
+ }
+
+ uint[] calcIndices(wstring src) {
+ if (src is null || src == "") {
+ return [];
+ }
+
+ uint[] ret = new uint[src.length];
+
+ wchar* source = src.ptr;
+ wchar* sourceEnd = &src[$-1] + 1;
+
+ uint len;
+ uint i;
+ uint mv;
+ uint* retPtr = ret.ptr;
+
+ dchar ch, ch2;
+
+ while(source < sourceEnd) {
+ ch = *source++;
+ mv++;
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ if (source < sourceEnd) {
+ ch2 = *source++;
+ mv++;
+ if (!(ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)) {
+ // invalid surrogate
+ mv--;
+ source--;
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+ else {
+ ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + halfBase;
+ }
+ }
+ else {
+ break;
+ }
+ }
+
+ // if it is not a dead character
+ if (!isDeadChar(ch)) {
+ // it is a valid character
+ *retPtr++ = i;
+ len++;
+ }
+
+ i += mv;
+ mv = 0;
+ }
+
+ return ret[0..len];
+ }
+
+ uint[] calcIndices(dstring src) {
+ if (src is null || src == "") {
+ return [];
+ }
+
+ uint[] ret = new uint[src.length];
+
+ uint len;
+
+ for (int i=0; i<src.length; i++) {
+ // if it is not a dead character
+ if (!isDeadChar(src[i])) {
+ // it is a valid character
+ ret[len] = i;
+ len++;
+ }
+ }
+
+ return ret;
+ }
+
+ bool isStartChar(char chr) {
+ // Look for non-surrogate entries
+ if ((chr & 0b11000000) == 0b10000000) { // Signature for a follow up byte
+ return false;
+ }
+ return true;
+ }
+
+ bool isStartChar(wchar chr) {
+ // Look for non-surrogate entries
+ if (chr >= UNI_SUR_LOW_START && chr <= UNI_SUR_LOW_END) {
+ return false;
+ }
+ return true;
+ }
+
+ bool isStartChar(dchar chr) {
+ // Obvious
+ return true;
+ }
+
+ dchar fromCP866(char chr) {
+ if (chr < 0x80) {
+ return cast(dchar)chr;
+ }
+
+ return CP866_to_UTF32[chr-128];
+ }
+
+ dchar[] combine(dchar chr, dchar combiningMark) {
+ return combine([chr], combiningMark);
+ }
+
+ dchar[] combine(dchar[] chr, dchar combiningMark) {
+ switch(combiningMark) {
+ case '\u0300': // grave
+ switch(chr[0]) {
+ case 'a':
+ return "\u00e0";
+ case 'e':