Sunday, June 12, 2016

A better way to split a node in a trie

This is the previous function for splitting a branch in the trie.


static typeNode* SplitBranch(typeNode* parent,unsigned long index)
  {
  typeNode*     working;
  char*         data;
  unsigned long I;

  if ((index < 1) || (index >= parent->Length))
    return 0;

  working = CreateBranch(0,parent->Data + index,parent->Length - index);
  if (working == 0)
    return 0;

  working->Child = parent->Child;
  parent->Child = working;

  data = (char*)malloc(index);
  if (data == 0)
    return 0;

  for (I = 0;I < index;I++)
    data[I] = parent->Data[I];

  free(parent->Data);

  parent->Length = index;
  parent->Data   = data;

  return working;
  }

This is a bit complicated. We first create a new node for the back end of the split point, that part is fine. But then we go and allocate a whole new buffer for the front end, fill it up with the front part of the split buffer, and then replace the node string with the new buffer. The problem with this besides too much code is, what if we want to improve the create string function later.


static typeNode* SplitBranch(typeNode* parent,unsigned long index)
  {
  typeNode*     node1;
  typeNode*     node2;
  char*         data;
  unsigned long I;

  if ((index < 1) || (index >= parent->Length))
    return 0;

  node1 = CreateBranch(0,parent->Data,index);
  if (node1 == 0)
    return 0;

  node2 = CreateBranch(0,parent->Data + index,parent->Length - index);
  if (node2 == 0)
    return 0;
 
  free(parent->Data);

  parent->Length = node1->Length;
  parent->Data   = node1->Data;

  free(node1)

  node2->Child = parent->Child;
  parent->Child = node2;

  return working;
  }

This is the improved version of the code. We have not really changed much, except that it is a lot easier to read. What we have done here is made the code simpler and more robust by pushing all the hard work to earlier functions. Instead of creating the first part of the split buffer int a second function, it is now done in the one function that is dedicated to that one thing. If we want to change something later, we can just change create branch and not have to worry about too many other things.

  node1 = CreateBranch(0,parent->Data,index);
  if (node1 == 0)
    return 0;

  node2 = CreateBranch(0,parent->Data + index,parent->Length - index);
  if (node2 == 0)
    return 0;
In this part of the code we just create two new nodes that contain the two parts of the string we are splitting. Because of the way a trie works, we have to keep the original node and discard the first node.

  free(parent->Data);

  parent->Length = node1->Length;
  parent->Data   = node1->Data;

  free(node1)

  node2->Child = parent->Child;
  parent->Child = node2;
In this part, we free the original string buffer of the parent node, then replace it with the buffer of the first node we have just created. Then we free the first node. We only created it a in order to get a new buffer. Doing that by calling create branch is a lot easier and safer than doing it manually inside a new function. As I said before, it is a good habit to keep memory operations isolated in a few places. In fact, even freeing the node and the data here would be a mistake, but it is fine because all the operations are isolated inside the CreateBranch and SplitBranch functions.

The next two line do something a bit complicated and not really obvious. The nodes inside a trie form strings with their children. The parent is the first part of the string, and each child is part of a separate string. When we split a node, we still need to have a single string. This is why all the children that the split node had before must be moved to the new node we have created, because the parent node and the new node are a single prefix for all the nodes that the parent had before. This means that after the split, nothing has changed. The next to last line in the code above gives all the children of the parent to the new node, and the last line sets the new node as the parent's only child. Later, after the split, the parent will receive new children, that is why we need to split the node.

This is not quite clear yet, but I will try to make a visual explanation later.

Simple JavaScript Canvas Animation with source code

A quick and simple canvas example. I am going to do some canvas animation later, I am going to use this to set up the basics.


<html>
<head>
  <title>Bouncing balls with gravity</title>
</head>
<body>
<canvas id="canvas" width="640" height="480">
<script>
  var canvas  = document.getElementById("canvas");
  var context = canvas.getContext("2d");
  var width   = canvas.width;
  var height  = canvas.height;
  var maxspeed = 5;
  var particlecount = 20;
  var gravity = -1;
  var density = 0.5;

  function particle(x, y, radius, color)
    {
this.tx = x;
    this.ty = y;
    this.vx = 0;
    this.vy = 0;
this.color = color;
this.radius = radius
this.mass = radius * density;
};

  particle.prototype.draw = function()
    {  
    context.beginPath();
    context.arc(this.tx, this.ty, this.radius, 0, 2 * Math.PI, false);
    context.fillStyle = this.color;
    context.fill();
    };

  particle.prototype.move = function()
    {
    this.tx += this.vx;
this.ty += this.vy;
    };

  particle.prototype.limit = function(maxspeed)
    {
    if (this.vx > +maxspeed)
      this.vx = +maxspeed;
    if (this.vx < -maxspeed)
      this.vx = -maxspeed;
    if (this.vy > +maxspeed)
      this.vy = +maxspeed;
    if (this.vy < -maxspeed)
      this.vy = -maxspeed;
    };

  particle.prototype.clip = function(left, top, right, bottom)
    {
    if (this.tx < left + this.radius)
      {
      this.tx = left + this.radius;
      this.vx = 0 - this.vx;
      }
    if (this.ty < top + this.radius)
      {
      this.ty = top + this.radius;
      this.vy = 0 - this.vy;
      }
    if (this.tx > right - this.radius)
      {
      this.tx = right - this.radius;
      this.vx = 0 - this.vx;
      }
    if (this.ty > bottom - this.radius)
      {
      this.ty = bottom - this.radius;
      this.vy = 0 - this.vy;
      }
    };

  particle.prototype.gravitate = function(target)
    {
    var dx = target.tx - this.tx;
    var dy = target.ty - this.ty;
    var distance = Math.sqrt(dx * dx + dy * dy);
if (distance > (this.radius + target.radius))
 {
      var force = (gravity * target.mass * this.mass) / (distance * distance);

      this.vx += (dx / distance * force) / this.mass;
      this.vy += (dy / distance * force) / this.mass;

      target.vx -= (dx / distance * force) / target.mass;
      target.vy -= (dy / distance * force) / target.mass;
      }
    };

  particle.prototype.interact = function(target)
    {
    var dx = target.tx - this.tx;
    var dy = target.ty - this.ty;
    var distance = Math.sqrt(dx * dx + dy * dy);
if (distance < (this.radius + target.radius))
 {
 var intersect = ((this.radius + target.radius) - distance);

      this.vx += (-dx / distance * intersect * density) / this.mass;
      this.vy += (-dy / distance * intersect * density) / this.mass;

      target.vx += (dx / distance * intersect * density) / target.mass;
      target.vy += (dy / distance * intersect * density) / target.mass;
 }
}

  var particles = [];

  function init()
    {
    for (var i = 0; i < particlecount; i++)
      {
 var radius = Math.random() * 50 + 5;
 var color = Math.random() * 5;

 if (color < 1)
        color = "yellow";
 else
 if (color < 2)
        color = "cyan";
 else
 if (color < 3)
        color = "red";
 else
 if (color < 4)
        color = "magenta";
 else
 if (color < 5)
        color = "green";
 else
        color = "blue";
   
      particles[i] = new particle(Math.random() * width, Math.random() * height,radius,color);
      }
    };

  function drawframe()
    {
    requestAnimationFrame(drawframe);

    context.fillStyle = "black";
    context.fillRect(0, 0, width, height);

    for (i = 0;i < particlecount;i++)
 {
      for (j = i+1;j < particlecount;j++)
        {
particles[i].gravitate(particles[j]);
        particles[i].interact(particles[j]);
   }
 }

for (i = 0;i < particlecount;i++)
 {
      particles[i].clip(0,0,width,height);
      particles[i].limit(maxspeed);
      particles[i].move();
      particles[i].draw();
 }
    };

  init();
  drawframe();

</script>
</body>
</html>

Saturday, June 11, 2016

Converting a sixty four bit number into a string

This is one of the functions that I had the most trouble with, even though it is quite simple. The point here is to translate a sixty four bit number into a string using any base from two to sixty four.


long stringAppendInt64(typeString* str,unsigned long long value,long base)
  {
  static const char digits[] = "0123456789ABCDEFGHIGKLMNOPQRSTUVWXYZabcdefghigklmnopqrstuvwxyz-_";
  unsigned long     start,length;

  if ((base < 2) || (base > 64))
    return 0;

  start = str->Length;

  do
    {
    stringAppendCharacter(str,digits[value % base]);

    value = value/base;
    }
  while (value != 0);

  length = str->Length - start;

  stringReverseRange(str,start,length);

  return 1;
  }

long stringAppendInt64(typeString* str,unsigned long long value,long base)
This function takes a sixty four bit number, and appends a string with the text equivalent. The parameters are the string object, the number to be converted as a sixty four bit integer, and the base that we use to represent the number. The return value is just a Boolean value. I call it long because for some reason, my compiler does not like bool as value.

static const char digits[] = "0123456789ABCDEFGHIGKLMNOPQRSTUVWXYZabcdefghigklmnopqrstuvwxyz-_";
The first thing to notice after that is the digits array. In the video below, Tom Scott Explains how the YouTube video ID's use a sixty four bit number system with eleven digits. This code does not limit itself to eleven digits, it just converts any number you give it into a string, and the string is as long as it needs to be. Anyway, the characters used to represent the resulting number are, zero to nine, followed by uppercase A through Z, followed by lowercase a through z followed by two other characters. As it says in the video, the characters that YouTube uses are dash and underscore because that is what works best in URL's. If you want to change what symbols are used to build your number, just change the symbols, just make sure that there are enough of them. This function limits itself to sixty four, but you can just use any base you want up to infinity, or until you run out of symbols.

 if ((base < 2) || (base > 64))
    return 0;
The first thing the function does is check that the base is more than two and less than sixty four. A base of zero or one would not make any sense, and any base of more than sixty four would not work simply because we don't have enough symbols. If you don't understand what we mean by base with numbers, read this article on Wikipedia about Positional Notation to get the idea.

 start = str->Length;
 length = str->Length - start;
 stringReverseRange(str,start,length);
These three lines of code are important because of the way the string object is structured, they really have nothing to do with building the number string. We are appending the numbers to the end of an existing string. We also need to reverse the resulting string because of the way the math works. We need the start and length variables in order to know where the resulting numbers string is located inside the main string. The first line saves the current length of the string before we start adding digits, the second lines calculates how far we have gone into after we have added our digits. After that, the third line reverses that portion of the string. The reason we reverse is because we are subtracting digits out of the number starting with the least significant digits at the right, but we are adding characters to the string by starting at the left and going right. This results in the number being inverted. There is a way we could do this from left to right, but it would be slow and complicated.

 do
    {
    stringAppendCharacter(str,digits[value % base]);

    value = value/base;
    }
  while (value != 0);
This is the part were we actually convert the number into a string. First, the reason why it is a do while loop is for when the number we are converting is zero. In that case, we want to run through the loop at least once, otherwise nothing would be displayed. We just loop as long as the result is not zero. This method would work for negative numbers, but there is the problem of what to do with the minus sign. I don't want to include that here because with the various bases, that is best left with the client function to decide how to represent extra symbols like plus or minus signs as well as hexadecimal or bit filed markers. The math is, divide the number by the base, and use the remainder as in index into the list of symbols. Here we do the division twice, first with the modulo operator to get the remainder, and then again to set the value to the result of the division. I am wondering if there is a way to do this in one operation, I don't think there is. Each time we loop, the value last digit of the number is taken out. For example, with the number one hundred and twenty three in base ten, we would go through the loop three times. After the first time the digit to be printed would be three, and the value would become twelve. After the second loop the digit to be printed will be two, and the value will become one. And after the third loop , the digit we print will be one, and the value is now zero and the loop will not run again because it will hit the while part of the while loop. After all that we reverse the string to get it printed correctly and we are done. Note here that there is no way to know how long the final string will be. A number that is three digits long is bases sixty four may be dozens of digits long is base two, so make sure that there is enough space allocated before trying to print large numbers.

Main file for testing the Trie

This is the main file that I use to test my string builder and Trie classes.

#include "typestring.h"
#include "typetrie.h"

typeString   str;
typeNode     trie;
char         buffer[4096];
const char*  strings[] =  {
  "remove",
  "rename",
  "renumber",
  "repack",
  "repaint",
  "repay",
  "replace",
  "replay",
  "reread",
  "rerun",
  "resale",
  "reshape",
  "retell",
  "rethink",
  "retrace",
  "reword",
  "rewrite",
  "rethinking",
  "retracing",
  "rewording",
  "impatience",
  "imperfect",
  "impolite",
  "impossible",
  "impure",
  "inactive",
  "incorrect",
  "indefinite",
  "incident",
  "ant",
  "apple",
  "zebra",
  "cross",
  "rock",
  "cloud",
  "turtle",
  "rodent",
  "solar",
  "volume",
  "purple",
  "romeo",
  "overlord",
  "sequence",
  "virtue",
  "parent",
  "starlight",
  "source",
  "theory",
  "underworld" };

int main()
  {
  stringCreate(&str,4096);

  stringAppendCString(&str,"Hello World = ");
  stringAppendInt64(&str,92233736854775807,2);
  stringGetCString(&str,buffer,4096);
  printf("%s\n",buffer);

  stringSetLength(&str,0);
  stringAppendInt64(&str,92233720364775807,16);
  stringGetCString(&str,buffer,4096);
  printf("%s\n",buffer);

  stringSetLength(&str,0);
  stringAppendInt64(&str,92233720364775807,64);
  stringGetCString(&str,buffer,4096);
  printf("%s\n",buffer);

  trieCreate(&trie);
  int length = sizeof(strings) / sizeof(strings[0]);
  for (int I = 0;I < length;I++)
    trieInsertCString(&trie,strings[I]);
  trieDraw(&trie,0);

  return 1;
  }

Source code for a string builder class

This one is a string class. I should really call it string builder, but that name is just too long. At some point I am going to explain how each part of the code works, but I really don't feel like writing right now. I am including the full source code here just so I can refer to it later. None of these code files are really that good, they are just some quick stuff that I made without much thought.

#ifndef TYPESTRING_H
#define TYPESTRING_H

#include <stdlib.h>

#define STRING_MIN     1
#define STRING_MAX     4096

typedef struct
  {
  unsigned long Capacity;
  unsigned long Length;
  char*         Data;
  } typeString;

long stringCreate(typeString* str,unsigned long capacity);
long stringGetCString(typeString* str,char* buffer,unsigned long buffersize);
long stringSetLength(typeString* str,unsigned long length);
long stringSetCapacity(typeString* str,unsigned long capacity);
long stringReverse(typeString* str);
long stringReverseRange(typeString* str,unsigned long start,unsigned long length);
long stringAppendCharacter(typeString* str,char value);
long stringAppendCString(typeString* str,const char* value);
long stringAppendString(typeString* str,typeString* value);
long stringAppendInt64(typeString* str,unsigned long long value,long base);
long stringDestroy(typeString* str);

long stringCreate(typeString* str,unsigned long capacity)
  {
  char*  buffer;

  if (capacity < STRING_MIN)
    capacity = STRING_MIN;

  buffer = (char*)malloc(capacity);

  if (buffer == 0)
    return 0;

  str->Capacity = capacity;
  str->Length   = 0;
  str->Data     = buffer;

  return 1;
  }

long stringGetCString(typeString* str,char* buffer,unsigned long buffersize)
  {
  unsigned long I;

  I = 0;
  while ((I < str->Length) && (I < buffersize-1))
    {
    buffer[I] = str->Data[I];
    buffer[I+1] = 0;
    I++;
    }

  return 1;
  }

long stringSetLength(typeString* str,unsigned long length)
  {
  if (length > str->Capacity)
    return 0;

  str->Length = length;

  return 1;
  }

long stringSetCapacity(typeString* str,unsigned long capacity)
  {
  unsigned long I;
  char*         buffer;

  if (capacity == str->Capacity)
    return 1;

  if (capacity < STRING_MIN)
    capacity = STRING_MIN;

  buffer = (char*)malloc(capacity);

  if (buffer == 0)
    return 0;

  I = 0;
  while (I < capacity)
    {
    if (I < str->Length)
      buffer[I] = str->Data[I];
    else
      buffer[I] = 0;
    }

  free(str->Data);

  if (str->Length > capacity)
    str->Length = capacity;

  str->Capacity = capacity;
  str->Data     = buffer;

  return 1;
  }

long stringReverse(typeString* str)
  {
  unsigned long I,J;
  char          temp;
  I = 0;
  J = str->Length - 1;

  while (I < J)
    {
    temp = str->Data[I];
    str->Data[I] = str->Data[J];
    str->Data[J] = temp;

    I++;
    J--;
    }

  return 1;
  }

long stringReverseRange(typeString* str,unsigned long start,unsigned long length)
  {
  unsigned long I,J;
  char          temp;

  I = start;
  J = start + length - 1;

  if ((I < 0) || (J > str->Length))
    return 0;

  while (I < J)
    {
    temp = str->Data[I];
    str->Data[I] = str->Data[J];
    str->Data[J] = temp;

    I++;
    J--;
    }

  return 1;
  }

long stringAppendCharacter(typeString* str,char value)
  {
  if (str->Length >= str->Capacity)
    return 0;

  str->Data[str->Length] = value;

  str->Length++;

  return 1;
  }

long stringAppendCString(typeString* str,const char* value)
  {
  unsigned long I,J;

  I = str->Length;
  J = 0;
  while ((value[J] != 0) &&
         (J < STRING_MAX) &&
         (I < str->Capacity))
    {
    str->Data[I] = value[J];

    I++;
    J++;
    }

  str->Length = I;

  return 1;
  }

long stringAppendString(typeString* str,typeString* value)
  {
  unsigned long I,J;

  I = str->Length;
  J = 0;
  while ((I < str->Capacity) && (J < value->Length))
    {
    str->Data[I] = value->Data[J];

    I++;
    J++;
    }

  str->Length = I;

  return 1;
  }

long stringAppendInt64(typeString* str,unsigned long long value,long base)
  {
  static const char digits[] = "0123456789ABCDEFGHIGKLMNOPQRSTUVWXYZabcdefghigklmnopqrstuvwxyz-_";
  unsigned long     start,length;

  if ((base < 2) || (base > 64))
    return 0;

  start = str->Length;

  do
    {
    stringAppendCharacter(str,digits[value % base]);

    value = value/base;
    }
  while (value != 0);

  length = str->Length - start;

  stringReverseRange(str,start,length);

  return 1;
  }

long stringDestroy(typeString* str)
  {
  free(str->Data);

  return 1;
  }

#endif // TYPESTRING_H

Source code for a Trie Dictionary

This is a piece of code for inserting a string into a Trie dictionary. This is a bit complicated but it works.


#ifndef TYPETRIE_H
#define TYPETRIE_H

#include <stdio.h>
#include <stdlib.h>

#define TRIE_ROOT     01
#define TRIE_BRANCH   02
#define TRIE_LEAF     03

typedef struct tagNode
  {
  long          Type;
  unsigned long Length;
  char*         Data;
  tagNode*      Child;
  tagNode*      Next;
  } typeNode;

long trieCreate(typeNode* working)
  {
  working->Type   = TRIE_ROOT;
  working->Length = 0;
  working->Data   = 0;
  working->Child  = 0;
  working->Next   = 0;

  return 1;
  }

static typeNode* CreateNode(unsigned long type)
  {
  typeNode*   working;

  working = (typeNode*)malloc(sizeof(typeNode));
  if (working == 0)
    return 0;

  working->Type   = type;
  working->Length = 0;
  working->Data   = 0;
  working->Child  = 0;
  working->Next   = 0;

  return working;
  }

static typeNode* CreateBranch(typeNode* parent,char* buffer,unsigned long buffersize)
  {
  typeNode*     working;
  char*         data;
  unsigned long I;

  working = CreateNode(TRIE_BRANCH);
  if (working == 0)
    return 0;

  data = (char*)malloc(buffersize);
  if (data == 0)
    return 0;

  for (I = 0;I < buffersize;I++)
    data[I] = buffer[I];

  working->Length = buffersize;
  working->Data   = data;

  if (parent)
    {
    working->Next   = parent->Child;
    parent->Child   = working;
    }

  return working;
  }

static typeNode* SplitBranch(typeNode* parent,unsigned long index)
  {
  typeNode*     working;
  char*         data;
  unsigned long I;

  if ((index < 1) || (index >= parent->Length))
    return 0;

  working = CreateBranch(0,parent->Data + index,parent->Length - index);
  if (working == 0)
    return 0;

  working->Child = parent->Child;
  parent->Child = working;

  data = (char*)malloc(index);
  if (data == 0)
    return 0;

  for (I = 0;I < index;I++)
    data[I] = parent->Data[I];

  free(parent->Data);

  parent->Length = index;
  parent->Data   = data;

  return working;
  }

static typeNode* FindBranch(typeNode* parent,char value)
  {
  typeNode*      working;

  working = parent->Child;

  while (working)
    {
    if (working->Type == TRIE_BRANCH)
      {
      if (working->Data[0] == value)
        return working;
      }

    working = working->Next;
    }

  return 0;
  }

static typeNode* CreateLeaf(typeNode* parent)
  {
  typeNode*      working;

  working = CreateNode(TRIE_LEAF);
  if (working == 0)
    return 0;

  working->Next = parent->Child;
  parent->Child = working;

  return working;
  }

static long FindLeaf(typeNode* parent)
  {
  typeNode*      working;

  working = parent->Child;

  while (working)
    {
    if (working->Type == TRIE_LEAF)
      return 1;

    working = working->Next;
    }

  return 0;
  }

long trieFind(typeNode* root,char* buffer,unsigned long buffersize)
  {
  typeNode*     working;
  typeNode*     result;
  unsigned long I,J;

  working = root;
  if (working == 0)
    return 0;

  I = 0;
  while (I < buffersize)
    {
    result = FindBranch(working,buffer[I]);

    if (result == 0)
      return 0;
    else
      working = result;

    J = 0;
    while (J < working->Length)
      {
      if (buffer[I] != working->Data[J])
        return 0;

      if (I >= buffersize)
        return 0;

      I++;
      J++;
      }
    }

  return FindLeaf(working);
  }

long trieInsert(typeNode* root,char* buffer,unsigned long buffersize)
  {
  typeNode*     working;
  typeNode*     result;
  unsigned long I,J;

  working = root;
  if (working == 0)
    return 0;

  I = 0;
  while (I < buffersize)
    {
    result = FindBranch(working,buffer[I]);

    if (result == 0)
      {
      working = CreateBranch(working,buffer + I,buffersize - I);
      break;
      }
    else
      working = result;

    J = 0;
    while (J < working->Length)
      {
      if (buffer[I] != working->Data[J])
        {
        SplitBranch(working,J);
        working = CreateBranch(working,buffer + I,buffersize - I);
        I = buffersize;

        break;
        }

      if (I >= buffersize)
        {
        SplitBranch(working,J);

        break;
        }

      I++;
      J++;
      }
    }

  if (working == 0)
    return 0;

  if (FindLeaf(working) == 0)
    CreateLeaf(working);

  return 1;
  }

long trieInsertCString(typeNode* root,const char* buffer)
  {
  unsigned long I;

  I = 0;
  while ((buffer[I] != 0) && (I < STRING_MAX))
    I++;

  return trieInsert(root,(char*)buffer,I);
  }

int trieDraw(typeNode* root,int indent)
  {
  typeNode*   working;

  if (root == 0)
    return 0;

  if (root->Type == TRIE_ROOT)
    printf("\nRoot\n");
  else
  if (root->Type == TRIE_BRANCH)
    {
    printf("%*s\"",indent," ");
    fwrite(root->Data,1,root->Length,stdout);
    printf("\"\n");
    }

  working = root->Child;

  while (working)
    {
    trieDraw(working,indent + 2);

    working = working->Next;
    }

  return 1;
  }

long trieDestroy(typeNode* parent)
  {
  typeNode* working;
  typeNode* temp;

  if (parent->Type == TRIE_BRANCH)
    free(parent->Data;
 
  working = (parent->Child);

  while (working)
    {
    temp = working->Next;

    trieDestroy(working);
    free(working);

    working = temp;
    }

  return 1;
  }

#endif // TYPETRIE_H