Re: Pattern Matching

Jetzt unterstützt das ganze Ding Steuerzeichen, wie 'n', 'r', ich habe für alle Zustände, ausser der Zeichen negative Werte eingeführt, und statt char verwende ich int. In Zukunft soll alles Hex-Values unterstützen

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define     X                           1024
#define     END                         -1
#define     IF_BEGIN                    -2
#define     IF_ELSE                     -3
#define     IF_END                      -4
#define     REPEAT_BEGIN                -5
#define     ROUND_BRACKET_OPEN          -6
#define     ROUND_BRACKET_CLOSE         -7
#define     STACKLASTWASLABEL           0
#define     STACKXREALLOOPLABEL         1
#define     STACKXREALIFBEGIN           2
#define     STACKXREALELSEBEGIN         3
#define     REALEND                     -8
#define     CONTROLSTATE                -9
#define     ASCIIMIN                    32
#define     ASCIIMAX                    255
#define     LEX_ROUND_BRACKET_OPEN      -10
#define     LEX_ROUND_BRACKET_CLOSE     -11
#define     LEX_SQUARE_BRACKET_OPEN     -12
#define     LEX_SQUARE_BRACKET_CLOSE    -13
#define     LEX_COMMA                   -14
#define     LEX_STAR                    -15
#define     LEX_DOT                     -16
#define     LEX_END                     -17
#define     LEX_EMPTY                   -18


int statechar [X];
int x;

int initstates () {
    int l;

    for (l = 0;  l < X;  l++) {
        statechar [l] = LEX_EMPTY;
    }

}

int jx = 0;

char text [] = "ab cdpkcdpkpkpkcdpkfhpkpkfhpkpkpkfhpkpkpkcdpkpkpkcdpkefghqqqqaber\nnnedb\nsuper";

//char expr [] = "abc*de[fasd,asddsr]qdsda*ghijk";
//char expr [] = "[*([[[([a,[[[p,q*],ca],[d,*(ead)]]]f*(mmm(nnn)mm)),a],asd],semdu]),*poller]";
//char expr [] = "[*([[[([a,[[[p,*(q)],ca],[d,*(ead)]]]f*(mmm(nnn)mm)),a],asd],semdu]),*poller]";
//char expr [] = "*(mmm(nnn)mm)";
//char expr [] = "a(b*(*(cd)aaa)(efg))";
char expr [] = "*ab *([cd,fh]*(pk))efgh*(.)*aber\\*nedb\\\nsuper";
//char expr [] = "abcd";
int i = 0;

int gettoken () {
    if ((expr [i] == 0))
      return LEX_END;
    else if (expr [i] == '(') {
      i++;
      return LEX_ROUND_BRACKET_OPEN;
    }
    else if (expr [i] == ')') {
      i++;
      return LEX_ROUND_BRACKET_CLOSE;
    }
    else if (expr [i] == '[') {
      i++;
      return LEX_SQUARE_BRACKET_OPEN;
    }
    else if (expr [i] == ']') {
      i++;
      return LEX_SQUARE_BRACKET_CLOSE;
    }
    else if (expr [i] == '*') {
      i++;
      return LEX_STAR;
    }
    else if (expr [i] == ',') {
      i++;
      return LEX_COMMA;
    }
    else if (expr [i] == '.') {
      i++;
      return LEX_DOT;
    }
    else if (expr [i] == '\') {
       i++;
       if (expr [i] == 'a') {
          i++;
          return 'a';
       }
       else if (expr [i] == 'b') {
          i++;
          return 'b';
       }
       else if (expr [i] == 'f') {
          i++;
          return 'f';
       }
       else if (expr [i] == 'n') {
          i++;
          return 'n';
       }
       else if (expr [i] == 'r') {
          i++;
          return 'r';
       }
       else if (expr [i] == 't') {
          i++;
          return 't';
        }
        else if (expr [i] == 'v') {
          i++;
          return 'v';
        }

       return expr [i++];
    }
    else if ((expr [i] >= ASCIIMIN) \&amp;\&amp; (expr [i] <= ASCIIMAX)) {
      return expr [i++];
    }
}

int tokenback_rec_mask1(int);
int tokenback_rec_mask2(int);

int tokenback_rec_mask1 (int j) {
  if (expr [j] == '\')
    return tokenback_rec_mask2 (j-1);
  else
    return 1;
}
int tokenback_rec_mask2 (int j) {
  if (expr [j] == '\')
    return tokenback_rec_mask1 (j-1);
  else
    return 0;
}

void tokenback () {
  i-=(2-tokenback_rec_mask1(i-2));
}

/*
aaaa
aaaaaa
aaaaaaa
aaaaaaaa()
*/

int stream ();
int followed ();
int compound ();
int or_operator ();
int repeat_operator ();

int or_operator () {
    int ch;
    if ((ch = gettoken ()) == LEX_SQUARE_BRACKET_OPEN) {
        statechar [x] = IF_BEGIN;
        x++;
        or_operator ();
        if (gettoken () != LEX_COMMA) {
            fprintf (stderr, "Komma vergessen");
            exit (1);
        }
        statechar [x] = IF_ELSE;
        x++;
        or_operator ();
        if ((ch = gettoken ()) != LEX_SQUARE_BRACKET_CLOSE) {
            fprintf (stderr, "Klammer vergessen ]");
            exit (1);
        }
        statechar [x] = IF_END;
        x++;
        repeat_operator ();
    }
    else if (ch == LEX_END)
      return 0;
    else {
        tokenback ();
        repeat_operator ();
    }
}


int repeat_operator () {
    int ch;
    if ((ch = gettoken ()) == LEX_STAR) {
        statechar [x] = REPEAT_BEGIN;
        x++;
        stream ();
    }
    else if (ch == LEX_END)
      return 0;
    else {
        tokenback ();
        stream ();
    }
}

int stream () {
    int r = 0;
    r = compound ();
    r |= followed ();
    if (r) {
        or_operator();
    }
}

int followed () {
    int ch = gettoken ();
    int st, xtmp;
    if ((ch >= ASCIIMIN) \&amp;\&amp; (ch <= ASCIIMAX)) {
        statechar [x] = ch;
        x = x+1;
        or_operator ();
        return 1;
    }
    else if (ch == LEX_DOT) {
        statechar [x] = ch;
        x = x+1;
        or_operator ();
        return 1;
    }
    else if (ch == LEX_END)
      return 0;
    else {
        tokenback ();
        return 0;
    }
}

int compound () {
    int ch;
    if ((ch = gettoken ()) == LEX_ROUND_BRACKET_OPEN) {
        statechar [x] = ROUND_BRACKET_OPEN;
        x++;
        or_operator ();
        if ((ch = gettoken ()) != LEX_ROUND_BRACKET_CLOSE) {
            fprintf (stderr, "fehler klammer vergessen %c %in", expr [i], i);
            exit (1);
        }
        statechar [x] = ROUND_BRACKET_CLOSE;
        x++;
        return 1;
    }
    else if (ch == LEX_END)
      return 0;
    else {
        tokenback ();
        return 0;
    }
}

int realstates1 [1024];
int realstates2 [1024];
int realstateschar [1024];


int stacks [32][1024];
int stacksp [32];

void push (int stackid, int state) {
  stacks [stackid][stacksp[stackid]++] = state;
}

int pop (int stackid) {
  stacksp [stackid]--;
  return stacks [stackid][stacksp[stackid]];
}

int emptystack (int stackid) {
    return (stacksp[stackid] == 0);
}

void automatgen ( ) {
    int xi = 0;
    int xreal = 0;
    int itwas = 0;
    int tmp;

    for (xi = 0; xi <= x;  ) {

        if (((statechar [xi] >= ASCIIMIN) \&amp;\&amp; (statechar [xi] <= ASCIIMAX)) || (statechar [xi] == LEX_DOT)) {
            realstates1 [xreal] = xreal+1;
            realstates2 [xreal] = xreal+1;
            realstateschar [xreal] = statechar [xi];
            xreal++;
        }
        else if (statechar [xi] == ROUND_BRACKET_OPEN) {
            //push (STACKLASTWASLABEL, ROUND_BRACKET_OPEN);
        }
        else if (statechar [xi] == REPEAT_BEGIN) {
            xi++;
            if (statechar [xi] == ROUND_BRACKET_OPEN) {
                push (STACKXREALLOOPLABEL, xreal);
                push (STACKLASTWASLABEL, REPEAT_BEGIN);
            }
            else if ((statechar [xi] >= ASCIIMIN) \&amp;\&amp; (statechar [xi] <= ASCIIMAX)) {
                realstates1 [xreal] = xreal+1;
                realstates2 [xreal] = xreal+1;
                realstateschar [xreal] = statechar [xi];
                xreal++;
                realstates1 [xreal] = xreal+1;
                realstates2 [xreal] = xreal-1;
                realstateschar [xreal] = CONTROLSTATE;
                xreal++;
            }
        }
        else if (statechar [xi] == ROUND_BRACKET_CLOSE) {
            if ((itwas = pop (STACKLASTWASLABEL)) == REPEAT_BEGIN) {
                realstateschar [xreal] = CONTROLSTATE;
                realstates1 [xreal] = pop (STACKXREALLOOPLABEL);
                realstates2 [xreal] = xreal+1;
                xreal++;
            }
            else if (itwas = ROUND_BRACKET_OPEN);
        }
        else if (statechar [xi] == IF_BEGIN) {
            realstateschar [xreal] = CONTROLSTATE;
            realstates1 [xreal] = xreal+1;
            push (STACKXREALIFBEGIN, xreal);
            xreal++;
        }
        else if (statechar [xi] == IF_ELSE) {
            realstates2 [pop (STACKXREALIFBEGIN)] = xreal;
            push (STACKXREALELSEBEGIN, xreal-1);
        }
        else if (statechar [xi] == IF_END) {
            realstates2 [tmp = pop (STACKXREALELSEBEGIN)] = xreal;
            realstates1 [tmp] = xreal;
        }
        xi++;
    }

    realstateschar [xreal] = REALEND;
    realstates1 [xreal] = REALEND;
    realstates2 [xreal] = REALEND;


    int k;
    for (k = 0;  k <= xreal;  k++) {
        printf ("(%i, ", realstateschar [k]);
        printf ("%i, ", realstates1 [k]);
        printf ("%i)n", realstates2 [k]);
    }

}

int xtext = 0;

int automat (int xreal) {
    int xtext2;
    int flag = 0;


    for (;  (realstates1 [xreal] != REALEND) \&amp;\&amp; (realstates2 [xreal] != REALEND) \&amp;\&amp; (xtext < strlen(text));  xreal = realstates1 [xreal], xtext++) {
        if (realstateschar [xreal] == LEX_DOT)
            printf ("succeded: %c %sn", '.', text + xtext);
        else if (realstateschar [xreal] == text [xtext])
            printf ("succeded: %c %sn", text [xtext], text + xtext);
        else if ((realstateschar [xreal] == CONTROLSTATE) \&amp;\&amp; (realstates1 [xreal] != realstates2 [xreal])) {
            xtext2 = xtext;
            flag = (automat (realstates1 [xreal]) == 1);
            xtext = xtext2;
            flag |= (automat (realstates2 [xreal]) == 1);

            if (flag == 0) return -1;
            else return 1;
        }
        else if (realstates1 [xreal] == REALEND ) return 1;
        else {return -1;}
    }
    if (realstates1 [xreal] == REALEND) {
        return 1;
    }
    else {

        return -1;
    }
}

int main (void) {
    int k, l;
    initstates ();
    or_operator (0);

    for (l = 0;  l <= x;  l++) {
        if (statechar [l] == ROUND_BRACKET_OPEN)
            printf ("(ROUND_BRACKET_OPEN, %2i)n", l);
        else if (statechar [l] == ROUND_BRACKET_CLOSE)
            printf ("(ROUND_BRACKET_CLOSE, %2i)n", l);
        else if (statechar [l] == REPEAT_BEGIN)
            printf ("(REPEAT_BEGIN, %2i)n", l);
        else
            printf ("(%2i %2i)n", statechar [l], l);
    }

    automatgen ();
    printf ("%in", automat (0));
}