Hatena::ブログ(Diary)

IT戦記 このページをアンテナに追加 RSSフィード Twitter

2008-03-01

今日は Haskell Hackathon !

経過をここに晒していくよ!

ヽ(゚∀゚)ノウンコー!

lexer を考える

ここを参考に考えるよ!

今回作るやつは

  • Unicode は考えない!
  • オフサイドルールなし!
  • 8 進数なし!
  • 16 進数なし!
  • 指数(0.23e10 みたいなやつ)なし!
  • 文字列内の \ で改行を無効にするやつなし!
  • コメントなし!(ちょw
  • 全部正規表現使うよ!

工エエェェ(´д`)ェェエエ工

気にしない!

正規表現を考えるよ!
var regexps = {
  digit: /\d/,
  decimal: /\d+/,
  float: /\d+\.\d+/,
  integer: /\d+/,
  char: /'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'/,
  string: /"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"/,
  symbol: /[!#$%&*+\.\/<=>\?@^|-~\\]/,
  small: /[a-z_]/,
  large: /[A-Z]/,
  varid: /[a-z_]\w*/,
  varsym: /[!#$%&*+\.\/<=>\?@^|-~\\]+/
  conid: /[A-Z]\w*/,
  consym: /:[!#$%&*+\.\/<=>\?@^|-~\\]+:/,
  special: /\(|\)|,|;|\[|\]|`|{|}/,
  reservedop: /=>|\.\.|<-|->|::|:|=|\\|\||@|~/,
  literal: /\d+(?:\.\d+)?|'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'|"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"/,
  'qvarid or reservedid': /(?:[A-Z]\w*\.)?[a-z_]\w*/,
  qconid: /(?:[A-Z]\w*\.)?[A-Z]\w*/,
  qvarsym: /(?:[A-Z]\w*\.)?[!#$%&*+\.\/<=>\?@^|-~\\]+/
  qconsym: /(?:[A-Z]\w*\.)?:[!#$%&*+\.\/<=>\?@^|-~\\]+:/
  'qvarid or reservedid or qconid': /(?:[A-Z]\w*\.)?[A-Za-z_]\w*/,
  'qvarsym or qconsym': /(?:[A-Z]\w*\.)?(?::[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)/, 
  'qvarsym or qconsym or reservedop': /=>|\.\.|<-|->|::|(?:[A-Z]\w*\.)?(?::[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)|:/, 
  program: /\d+(?:\.\d+)?|'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'|"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"|=>|\.\.|<-|->|::|(?:[A-Z]\w*\.)?([A-Za-z_]\w*|:[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)|./, 
}
できたー゚+.(・∀・)゚+.゚
var source; // <- この source に Haskell のコードを入れる

// これでトークナイズできるお!
var tokens = source.match(/\d+(?:\.\d+)?|'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'|"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"|=>|\.\.|<-|->|::|(?:[A-Z]\w*\.)?([A-Za-z_]\w*|:[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)|./g);
今回は kmyacc を使うから yylex を作るよ
var tokens;

function prepareTokens(source) {

  // トークナイズするよ!
    tokens = source.match(/\d+(?:\.\d+)?|'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'|"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"|=>|\.\.|<-|->|::|(?:[A-Z]\w*\.)?([A-Za-z_]\w*|:[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)|./g);

    // 空白消すよ! (Firefox only
    tokens = tokens.filter(/[^\s]/);
}

function yylex() {
    var token = tokens.shift();

    yylex = token;
    if (token == undefined) return 0;

    // keyword
    switch (token) {
    case "infixl": return INFIXL; 
    case "infixr": return INFIXR; 
    case "infix": return INFIXN; 
    case "instance": return TINSTANCE;
    case "class": return TCLASS; 
    case "primitive": return PRIMITIVE;
    case "case": return CASEXP; 
    case "of": return OF;
    case "if": return IF;
    case "then": return THEN;
    case "else": return ELSE;
    case "where": return WHERE;
    case "type": return TYPE;
    case "data": return DATA;
    case "newtype": return TNEWTYPE;
    case "let": return LET;
    case "in": return IN;
    case "deriving": return DERIVING;
    case "default": return DEFAULT;
    case "import": return IMPORT; 
    case "module": return TMODULE;
    case "hiding": return HIDING;
    case "qualified": return QUALIFIED;
    case "as": return ASMOD;
    }

    // literal
    if (token.match(/^\d+(?:\.\d+)?$/)) return NUMLIT;
    if (token.match(/^'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'$/)) return CHARLIT;
    if (token.match(/^"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"$/)) return STRINGLIT;

    // var or con
    if (token.match(/^[!#$%&*+\.\/<=>\?@^|-~\\]+$/)) return VAROP;
    if (token.match(/^[a-z_]\w*$/)) return VARID;
    if (token.match(/^:[!#$%&*+\.\/<=>\?@^|-~\\]+:$/)) return CONOP;
    if (token.match(/^[A-Z]\w*$/)) return CONID;

    // qvar or qcon
    if (token.match(/^(?:[A-Z]\w*\.)?[!#$%&*+\.\/<=>\?@^|-~\\]+$/)) return QVAROP;
    if (token.match(/^(?:[A-Z]\w*\.)?[a-z_]\w*$/)) return QVARID;
    if (token.match(/^(?:[A-Z]\w*\.)?:[!#$%&*+\.\/<=>\?@^|-~\\]+:$/)) return QCONOP;
    if (token.match(/^(?:[A-Z]\w*\.)?[A-Z]\w*$/)) return QCONID;

    // op
    if (token == '::') return COCO;
    if (token == '..') return UPTO;
    if (token == '->') return ARROW;
    if (token == '<-') return FROM;
    if (token == '=>') return IMPLIES;

    return token.charCodeAt(0);
}

もう正規表現書きたくない(゚∀゚)=3ムッハー

parser を考える

Ψ(゚д゚)Ψ「あ」<(゚д゚)>「い」(q゚з゚)p「う」∠( ゚д゚)/「え」(┌゚Д゚)┌「お」

kmyacc 使うよ

http://www005.upp.so-net.ne.jp/kmori/kmyacc/

%token を定義するよ

Hugs のソースから持って来たよ

%token CASEXP     OF         DATA       TYPE       IF
%token THEN       ELSE       WHERE      LET        IN
%token INFIXN     INFIXL     INFIXR     PRIMITIVE  TNEWTYPE
%token DEFAULT    DERIVING   DO         TCLASS     TINSTANCE
%token NUMLIT     CHARLIT    STRINGLIT
%token VAROP      VARID      CONOP      CONID   
%token QVAROP     QVARID     QCONOP     QCONID  
%token COCO       '='        UPTO       '@'        '\\'
%token '|'        '-'        FROM       ARROW      '~'     
%token '!'        IMPLIES    '('        ','        ')'
%token '['        ';'        ']'        '`'    '.'
%token TMODULE    IMPORT     HIDING     QUALIFIED  ASMOD
yacc のルールを書いていくよくるよ

Hugs のソースとここを参考にするよ

start: module
      | modBody
      ;

module: TMODULE modname expspec WHERE '{' modBody '}'
      ;

modname: qconid
      ;

modid: qconid
      | STRINGLIT
      ;

modBody: // empty
      | ';' modBody
      | topDecls
      | impDecls
      | impDecls ';' topDecls
      ;

expspec: // empty
      | '(' ')'
      | '(' ',' ')'
      | '(' exports ')'
      | '(' exports ',' ')'
      ; 

exports: exports ',' export
      | export
      ;

export: qvar
      | qcon
      | qconid '(' UPTO ')'
      | qconid '(' qnames ')'
      | TMODULE modid
      ; 

qnames: // empty
      | ','
      | qnames1
      | qnames1 ','
      ; 

qnames1: qnames1 ',' qname
      | qname
      ; 

qname: qvar
      | qcon
      ;


impDecls: impDecls ';' impDecl
      | impDecls ';'
      | impDecl
      ;

impDecl: IMPORT modid impspec
      | IMPORT modid ASMOD modid impspec 
      | IMPORT QUALIFIED modid ASMOD modid impspec
      | IMPORT QUALIFIED modid impspec
      ;

impspec: // empty
      | HIDING '(' imports ')'
      | '(' imports ')'
      ;

imports: // empty
      | ','
      | imports1 
      | imports1 ','
      ;

imports1: imports1 ',' import
      | import
      ;

import: var
      | CONID 
      | CONID '(' UPTO ')'
      | CONID '(' names ')'
      ;

names: // empty
      | ','
      | names1 
      | names1 ','
      ;

names1: names1 ',' name
      | name
      ;

name: var
      | con
      ;

topDecl: TYPE tyLhs '=' type
      | TYPE tyLhs '=' type IN invars
      | DATA btype2 '=' constrs deriving
      | DATA context IMPLIES tyLhs '=' constrs deriving
      | DATA btype2
      | DATA context IMPLIES tyLhs
      | TNEWTYPE btype2 '=' constr deriving
      | TNEWTYPE context IMPLIES tyLhs '=' constr deriving
      | PRIMITIVE prims COCO topType
      | TCLASS crule fds wherePart 
      | TINSTANCE irule wherePart 
      | DEFAULT '(' dtypes ')' 
      ;

tyLhs: tyLhs varid
      | CONID
      ;

invars: invars ',' invar
      | invar
      ;

invar: var COCO topType
      | var
      ;

constrs: constrs '|' constr
      | constr
      ;

constr: '!' btype conop bbtype
      | btype1    conop bbtype
      | btype2    conop bbtype
      | polyType conop bbtype
      | btype2
      | btype3
      | con '{' fieldspecs '}'
      | con '{' '}'
      ;

btype3: btype2 '!' atype
      | btype2 polyType
      | btype3 atype
      | btype3 '!' atype
      | btype3 polyType
      | '(' CONOP ')'
      ;

bbtype: '!' btype
      | btype
      | polyType
      ;

fieldspecs: fieldspecs ',' fieldspec
      | fieldspec
      ;

fieldspec: vars COCO polyType
      | vars COCO type
      | vars COCO '!' type
      ;

deriving: // empty
      | DERIVING qconid
      | DERIVING '(' derivs0 ')'
      ;

derivs0: // empty
      | derivs
      ;

derivs    : derivs ',' qconid
      | qconid
      ;

prims: prims ',' prim
      | prim
      ; 

prim: var STRINGLIT
      | var
      ; 

crule: context IMPLIES btype2
      | btype2
      ;

irule: context IMPLIES btype2
      | btype2
      ;
dtypes: // empty
      | dtypes1
      ;

dtypes1: dtypes1 ',' type
      | type
      ;

fds: // empty
      | '|' fds1
      ;

fds1: fds1 ',' fd
      | fd
      ;

fd: varids0 ARROW varids0
      ;

varids0: // empty
      | varids0 varid
      ;

topType  : context IMPLIES topType0
      | topType0
      ;

topType0  : polyType ARROW topType0
      | btype1    ARROW topType0
      | btype2    ARROW topType0
      | btype
      ;

polyType: polyType : '(' polyType ')'
      | '(' lcontext IMPLIES type ')'
      ;

varids: varids varid
      | varid
      ;

sigType: context IMPLIES type
      | type
      ;

context: '(' ')'
      | btype2
      | '(' btype2 ')'
      | '(' btypes2 ')'
      | lacks
      | '(' lacks1 ')'
      ;

lcontext: lacks
      | '(' lacks1 ')'
      ;

lacks: varid '\\' varid
      ;

lacks1: btypes2 ',' lacks
      | lacks1  ',' btype2
      | lacks1  ',' lacks
      | btype2  ',' lacks
      | lacks
      ;

type: type1
      | btype2
      ;

type1: btype1
      | bpolyType ARROW type
      | btype1    ARROW type
      | btype2    ARROW type
      ;

btype: btype1
      | btype2
      ;

btype1: btype1 atype
      | atype1
      ;

btype2: btype2 atype
      | qconid
      ;

atype: atype1
      | qconid
      ;

atype1: varid
      | '(' ')'
      | '(' ARROW ')'
      | '(' type1 ')'
      | '(' btype2 ')'
      | '(' tupCommas ')'
      | '(' btypes2 ')'
      | '(' typeTuple ')'
      | '(' tfields ')'
      | '[' type ']'
      | '[' ']'
      | '_' 
      ;

btypes2: btypes2 ',' btype2
      | btype2  ',' btype2
      ;

typeTuple: type1     ',' type
      | btype2    ',' type1
      | btypes2   ',' type1
      | typeTuple ',' type
      ;

gendecl   : INFIXN optDigit ops
      | INFIXL optDigit ops
      | INFIXR optDigit ops
      | vars COCO topType
      ;

optDigit  : NUMLIT
      | // empty
      ;

ops   : ops ',' op
      | op
      ;

vars: vars ',' var
      | var
      ;

decls: '{' decls0 '}'
      | '{' decls1 '}'
      ;

decls0: // empty
      | decls0 ';'
      | decls1 ';'
      ;

decls1: decls0 decl
      ;

decl: gendecl
      | funlhs rhs
      | funlhs COCO type rhs
      | pat0 rhs
      ;

funlhs: funlhs0
      | funlhs1
      | npk
      ;

funlhs0: pat10_vI varop    pat0
      | infixPat varop    pat0
      | NUMLIT   varop    pat0
      | var      varop_pl pat0
      | var      '+'      pat0_INT
      ;

funlhs1: '(' funlhs0 ')' apat
      | '(' funlhs1 ')' apat
      | '(' npk     ')' apat
      | var     apat
      | funlhs1 apat
      ;

rhs: rhs1 wherePart
      ;

rhs1: '=' exp
      | gdrhs
      ;

gdrhs: gdrhs gddef
      | gddef
      ;

gddef: '|' exp0 '=' exp
      ;

wherePart : // empty
      | WHERE decls
      ;

lwherePart : // empty
      | WHERE ldecls
      ;

ldecls: '{' ldecls0 '}'
      | '{' ldecls1 '}'
      ;

ldecls0: // empty
      | ldecls0 ';'
      | ldecls1 ';'
      ;

ldecls1: ldecls0 ldecl
      ;

ldecl: decl
      ;

pat: npk
      | pat_npk
      ;

pat_npk: pat0 COCO type
      | pat0
      ;

npk: var '+' NUMLIT
      ;

pat0: var
      | NUMLIT
      | pat0_vI
      ;

pat0_INT: var
      | pat0_vI
      ;

pat0_vI: pat10_vI
      | infixPat
      ;

infixPat: '-' pat10
      | var qconop pat10
      | var qconop '-' pat10
      | NUMLIT qconop pat10
      | NUMLIT qconop '-' pat10
      | pat10_vI qconop pat10
      | pat10_vI qconop '-' pat10
      | infixPat qconop pat10
      | infixPat qconop '-' pat10
      ;

pat10: fpat
      | apat
      ;

pat10_vI: fpat
      | apat_vI
      ;

fpat: fpat apat
      | gcon apat
      ;

apat: NUMLIT
      | var
      | apat_vI
      ;

apat_vI: var '@' apat
      | gcon
      | qcon '{' patbinds '}'
      | CHARLIT
      | STRINGLIT  
      | '_'
      | '(' pat_npk ')'
      | '(' npk ')'
      | '(' pats2 ')'
      | '[' pats1 ']'
      | '~' apat
      ;

pats2: pats2 ',' pat
      | pat ',' pat
      ;                       

pats1: pats1 ',' pat
      | pat
      ;

patbinds  : // empty
      | patbinds1
      ;

patbinds1: patbinds1 ',' patbind
      | patbind
      ;              

patbind: qvar '=' pat
      | var
      ;

exp: exp0a COCO sigType
      | exp0
      ;
exp0: exp0a
      | exp0b
      ;

exp0a: infixExpa
      | exp10a
      ; 

exp0b: infixExpb
      | exp10b
      ; 

infixExpa : infixExpa qop '-' exp10a
      | infixExpa qop exp10a
      | '-' exp10a
      | exp10a qop '-' exp10a
      | exp10a qop exp10a
      ; 

infixExpb : infixExpa qop '-' exp10b
      | infixExpa qop exp10b
      | '-' exp10b
      | exp10a qop '-' exp10b
      | exp10a qop exp10b
      ; 

exp10a    : CASEXP exp OF '{' alts '}'
      | DO '{' stmts '}'
      | LET ldecls IN exp
      | IF exp then_exp else_exp
      ;

exp10b    : '\\' pats ARROW exp
      | LET ldecls IN exp
      | IF exp then_exp else_exp
      ;

then_exp  : ';' THEN exp
      | THEN exp
      ;

else_exp  : ';' ELSE exp
      | ELSE exp
      ;

pats      : pats apat
      | apat
      ;

appExp    : appExp aexp
      | aexp
      ;

aexp      : qvar
      | qvar '@' aexp
      | '~' aexp
      | '_'
      | gcon
      | qcon '{' fbinds '}'
      | aexp '{' fbinds '}'
      | NUMLIT
      | CHARLIT
      | STRINGLIT
      | '(' exp ')'
      | '(' exps2 ')'
      | '[' list ']'
      | '(' exp10a qop ')'
      | '(' qvarop_mi exp0 ')'
      | '(' qconop exp0 ')'
      ; 

exps2     : exps2 ',' exp
      | exp ',' exp
      ; 

alts      : alts1
      | ';' alts
      ;

alts1     : alts1 ';' alt
      | alts1 ';'
      | alt
      ;

alt   : pat altRhs wherePart
      ;

altRhs    : guardAlts
      | ARROW exp
      ;

guardAlts : guardAlts guardAlt
      | guardAlt
      ;

guardAlt  : '|' exp0 ARROW exp
      ;

stmts     : stmts1
      | ';' stmts
      ;
stmts1    : stmts1 ';' stmt
      | stmts1 ';'
      | stmt
      ;

stmt : exp FROM exp
      | LET ldecls
      | exp
      ;

fbinds: // empty
      | fbinds1
      ;

fbinds1   : fbinds1 ',' fbind
      | fbind
      ;

fbind: var
      | qvar '=' exp
      ;

list      : exp
      | exps2
      | exp zipquals
      | exp         UPTO exp
      | exp ',' exp UPTO
      | exp         UPTO
      | exp ',' exp UPTO exp
      ;

zipquals  : zipquals '|' quals
      | '|' quals
      ;

quals     : quals ',' qual
      | qual
      ;

qual      : exp FROM exp
      | exp
      | LET ldecls
      ;

gcon      : qcon
      | '(' ')'
      | '[' ']'
      | '(' tupCommas ')'
      ; 

tupCommas : tupCommas ','
      | ','
      ; 

varid     : VARID
      | HIDING
      | QUALIFIED
      | ASMOD
      ; 

qconid    : QCONID
      | CONID
      ; 

var   : varid
      | '(' VAROP ')'
      | '(' '+' ')'
      | '(' '-' ')'
      | '(' '!' ')'
      | '(' '.' ')'
      ;

qvar      : QVARID
      | '(' QVAROP ')'
      | var
      ;

con   : CONID
      | '(' CONOP ')'
      ;

qcon      : QCONID
      | '(' QCONOP ')'
      | con
      ;

varop     : '+'
      | '-'
      | varop_mipl
      ;

varop_mi  : '+'
      | varop_mipl
      ;

varop_pl  : '-'
      | varop_mipl
      ;

varop_mipl: VAROP
      | '`' varid '`'
      | '!'
      | '.'
      ;

qvarop    : '-'
      | qvarop_mi
      ;

qvarop_mi : QVAROP
      | '`' QVARID '`'
      | varop_mi
      ;

conop     : CONOP
      | '`' CONID  '`'
      ;

qconop    : QCONOP
      | '`' QCONID '`'
      | conop
      ;

op    : varop
      | conop
      ;

qop   : qvarop
      | qconop
      ;

Hugs のをほぼ、写経してみたが旨くいかない。。。

作戦変更(こつこつやっていく)