Re: Pattern Matching

Ok, ich habe eine relativ elegante Lösung gefunden

int tokenback_rec_mask1(int);
int tokenback_rec_mask2(int);

int tokenback_rec_mask1 (int j) {
  if (expr [j] == '\')
    return tokenback_rec_mask2 (j-1);
  else
    return 1;
}
int tokenback_rec_mask2 (int j) {
  if (expr [j] == '\')
    return tokenback_rec_mask1 (j-1);
  else
    return 0;
}

void tokenback () {
  i-=(2-tokenback_rec_mask1(i-2));
}

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define     X                           1024
#define     END                         -1
#define     IF_BEGIN                    0
#define     IF_ELSE                     1
#define     IF_END                      2
#define     REPEAT_BEGIN                3
#define     ROUND_BRACKET_OPEN          4
#define     ROUND_BRACKET_CLOSE         5
#define     STACKLASTWASLABEL           0
#define     STACKXREALLOOPLABEL         1
#define     STACKXREALIFBEGIN           2
#define     STACKXREALELSEBEGIN         3
#define     REALEND                     -1
#define     CONTROLSTATE                31
#define     ASCIIMIN                    32
#define     ASCIIMAX                    255
#define     LEX_ROUND_BRACKET_OPEN      1
#define     LEX_ROUND_BRACKET_CLOSE     2
#define     LEX_SQUARE_BRACKET_OPEN     3
#define     LEX_SQUARE_BRACKET_CLOSE    4
#define     LEX_COMMA                   5
#define     LEX_STAR                    6
#define     LEX_DOT                     7
#define     LEX_END                     0
#define     LEX_EMPTY                   8


char statechar [X];
int x;

int initstates () {
    int l;

    for (l = 0;  l < X;  l++) {
        statechar [l] = LEX_EMPTY;
    }

}

int jx = 0;

char text [] = "ab cdpkcdpkpkpkcdpkfhpkpkfhpkpkpkfhpkpkpkcdpkpkpkcdpkef*gh";

//char expr [] = "abc*de[fasd,asddsr]qdsda*ghijk";
//char expr [] = "[*([[[([a,[[[p,q*],ca],[d,*(ead)]]]f*(mmm(nnn)mm)),a],asd],semdu]),*poller]";
//char expr [] = "[*([[[([a,[[[p,*(q)],ca],[d,*(ead)]]]f*(mmm(nnn)mm)),a],asd],semdu]),*poller]";
//char expr [] = "*(mmm(nnn)mm)";
//char expr [] = "a(b*(*(cd)aaa)(efg))";
char expr [] = "*ab *([cd,fh]*(pk))ef\\\\\\*gh";
//char expr [] = "abcd";
int i = 0;

char gettoken () {
    if ((expr [i] == 0))
      return LEX_END;
    else if (expr [i] == '(') {
      i++;
      return LEX_ROUND_BRACKET_OPEN;
    }
    else if (expr [i] == ')') {
      i++;
      return LEX_ROUND_BRACKET_CLOSE;
    }
    else if (expr [i] == '[') {
      i++;
      return LEX_SQUARE_BRACKET_OPEN;
    }
    else if (expr [i] == ']') {
      i++;
      return LEX_SQUARE_BRACKET_CLOSE;
    }
    else if (expr [i] == '*') {
      i++;
      return LEX_STAR;
    }
    else if (expr [i] == ',') {
      i++;
      return LEX_COMMA;
    }
    else if (expr [i] == '\') {
       i++;
       return expr [i++];
    }
    else if ((expr [i] >= ASCIIMIN) \&amp;\&amp; (expr [i] <= ASCIIMAX)) {
      return expr [i++];
    }
}

int tokenback_rec_mask1(int);
int tokenback_rec_mask2(int);

int tokenback_rec_mask1 (int j) {
  if (expr [j] == '\')
    return tokenback_rec_mask2 (j-1);
  else
    return 1;
}
int tokenback_rec_mask2 (int j) {
  if (expr [j] == '\')
    return tokenback_rec_mask1 (j-1);
  else
    return 0;
}

void tokenback () {
  i-=(2-tokenback_rec_mask1(i-2));
}

/*
aaaa
aaaaaa
aaaaaaa
aaaaaaaa()
*/

int stream ();
int followed ();
int compound ();
int or_operator ();
int repeat_operator ();

int or_operator () {
    int ch;
    if ((ch = gettoken ()) == LEX_SQUARE_BRACKET_OPEN) {
        statechar [x] = IF_BEGIN;
        x++;
        or_operator ();
        if (gettoken () != LEX_COMMA) {
            fprintf (stderr, "Komma vergessen");
            exit (1);
        }
        statechar [x] = IF_ELSE;
        x++;
        or_operator ();
        if ((ch = gettoken ()) != LEX_SQUARE_BRACKET_CLOSE) {
            fprintf (stderr, "Klammer vergessen ]");
            exit (1);
        }
        statechar [x] = IF_END;
        x++;
        repeat_operator ();
    }
    else if (ch == LEX_END)
      return 0;
    else {
        tokenback ();
        repeat_operator ();
    }
}


int repeat_operator () {
    int ch;
    if ((ch = gettoken ()) == LEX_STAR) {
        statechar [x] = REPEAT_BEGIN;
        x++;
        stream ();
    }
    else if (ch == LEX_END)
      return 0;
    else {
        tokenback ();
        stream ();
    }
}

int stream () {
    int r = 0;
    r = compound ();
    r |= followed ();
    if (r) {
        or_operator();
    }
}

int followed () {
    int ch = gettoken ();
    int st, xtmp;
    if ((ch >= ASCIIMIN) \&amp;\&amp; (ch <= ASCIIMAX)) {
        statechar [x] = ch;
        x = x+1;
        or_operator ();
        return 1;
    }
    else if (ch == LEX_END)
      return 0;
    else {
        tokenback ();
        return 0;
    }
}

int compound () {
    int ch;
    if ((ch = gettoken ()) == LEX_ROUND_BRACKET_OPEN) {
        statechar [x] = ROUND_BRACKET_OPEN;
        x++;
        or_operator ();
        if ((ch = gettoken ()) != LEX_ROUND_BRACKET_CLOSE) {
            fprintf (stderr, "fehler klammer vergessen %c %in", expr [i], i);
            exit (1);
        }
        statechar [x] = ROUND_BRACKET_CLOSE;
        x++;
        return 1;
    }
    else if (ch == LEX_END)
      return 0;
    else {
        tokenback ();
        return 0;
    }
}

int realstates1 [1024];
int realstates2 [1024];
int realstateschar [1024];


int stacks [32][1024];
int stacksp [32];

void push (int stackid, int state) {
  stacks [stackid][stacksp[stackid]++] = state;
}

int pop (int stackid) {
  stacksp [stackid]--;
  return stacks [stackid][stacksp[stackid]];
}

int emptystack (int stackid) {
    return (stacksp[stackid] == 0);
}

void automatgen ( ) {
    int xi = 0;
    int xreal = 0;
    int itwas = 0;
    int tmp;

    for (xi = 0; xi <= x;  ) {
        if ((statechar [xi] >= ASCIIMIN) \&amp;\&amp; (statechar [xi] <= ASCIIMAX)) {
            realstates1 [xreal] = xreal+1;
            realstates2 [xreal] = xreal+1;
            realstateschar [xreal] = statechar [xi];
            xreal++;
        }
        else if (statechar [xi] == ROUND_BRACKET_OPEN) {
            push (STACKLASTWASLABEL, ROUND_BRACKET_OPEN);
        }
        else if (statechar [xi] == REPEAT_BEGIN) {
            xi++;
            if (statechar [xi] == ROUND_BRACKET_OPEN) {
                push (STACKXREALLOOPLABEL, xreal);
                push (STACKLASTWASLABEL, REPEAT_BEGIN);
            }
            else if ((statechar [xi] >= ASCIIMIN) \&amp;\&amp; (statechar [xi] <= ASCIIMAX)) {
                realstates1 [xreal] = xreal+1;
                realstates2 [xreal] = xreal+1;
                realstateschar [xreal] = statechar [xi];
                xreal++;
                realstates1 [xreal] = xreal+1;
                realstates2 [xreal] = xreal-1;
                realstateschar [xreal] = CONTROLSTATE;
                xreal++;
            }
        }
        else if (statechar [xi] == ROUND_BRACKET_CLOSE) {
            if ((itwas = pop (STACKLASTWASLABEL)) == REPEAT_BEGIN) {
                realstateschar [xreal] = CONTROLSTATE;
                realstates1 [xreal] = pop (STACKXREALLOOPLABEL);
                realstates2 [xreal] = xreal+1;
                xreal++;
            }
            else if (itwas = ROUND_BRACKET_OPEN);
        }
        else if (statechar [xi] == IF_BEGIN) {
            realstateschar [xreal] = CONTROLSTATE;
            realstates1 [xreal] = xreal+1;
            push (STACKXREALIFBEGIN, xreal);
            xreal++;
        }
        else if (statechar [xi] == IF_ELSE) {
            realstates2 [pop (STACKXREALIFBEGIN)] = xreal;
            push (STACKXREALELSEBEGIN, xreal-1);
        }
        else if (statechar [xi] == IF_END) {
            realstates2 [tmp = pop (STACKXREALELSEBEGIN)] = xreal;
            realstates1 [tmp] = xreal;
        }
        xi++;
    }
    realstateschar [xreal] = REALEND;
    realstates1 [xreal] = REALEND;
    realstates2 [xreal] = REALEND;


    int k;
    for (k = 0;  k <= xreal;  k++) {
        printf ("(%i, ", realstateschar [k]);
        printf ("%i, ", realstates1 [k]);
        printf ("%i)n", realstates2 [k]);
    }

}

int xtext = 0;

int automat (int xreal) {
    int xtext2;
    int flag = 0;

    for (;  (realstates1 [xreal] != REALEND) \&amp;\&amp; (realstates2 [xreal] != REALEND) \&amp;\&amp; (xtext < strlen(text));  xreal = realstates1 [xreal], xtext++) {
        if (realstateschar [xreal] == text [xtext])
            printf ("succeded: %c %sn", text [xtext], text + xtext);
        else if ((realstateschar [xreal] == CONTROLSTATE) \&amp;\&amp; (realstates1 [xreal] != realstates2 [xreal])) {
            xtext2 = xtext;
            flag = (automat (realstates1 [xreal]) == 1);
            xtext = xtext2;
            flag |= (automat (realstates2 [xreal]) == 1);

            if (flag == 0) return -1;
            else return 1;
        }
        else if (realstates1 [xreal] == REALEND ) return 1;
        else {return -1;}
    }
    if (realstates1 [xreal] == REALEND) {
        return 1;
    }
    else {

        return -1;
    }
}

int main (void) {
    int k, l;
    initstates ();
    or_operator (0);

    for (l = 0;  l <= x;  l++) {
        if (statechar [l] == ROUND_BRACKET_OPEN)
            printf ("(ROUND_BRACKET_OPEN, %2i)n", l);
        else if (statechar [l] == ROUND_BRACKET_CLOSE)
            printf ("(ROUND_BRACKET_CLOSE, %2i)n", l);
        else if (statechar [l] == REPEAT_BEGIN)
            printf ("(REPEAT_BEGIN, %2i)n", l);
        else
            printf ("(%2c %2i)n", statechar [l], l);
    }
    automatgen ();
    printf ("%in", automat (0));
}