#!/usr/bin/env python # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - """\ Very first try to parse delphi source """ __author__ = "Benoit Kogut-Kubiak" __email__ = "benoit.kogutkubiak@netasq.com" __version__ = "$Revision: 0.0 $"[11:-2] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - import string import tpg # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class Statement(tpg.Parser): r""" set word_boundary= True set lexer_ignorecase= True # SEPARATORS - - - - - - - - - - - - - - - - - - - - - - - - - - - - - separator spaces '[\s\n]+' ; separator sngleComment '//.*?(?:\n|$)' ; # (?s) => DOTALL separator multiComment '(?s)(?:\(\*.*?\*\)|\{.*?\})' ; # TOKENS - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Hopefully we won't need to parse assembler within delphi code; Sadly # we have to EXPLICTLY take out comments to avoid any wrong 'end' match token ASSEMBLER '(?s)\bASM\b(?:(?://|;).*?(?:\n|$)|\(\*.*?\*\)|\{.*?\}|.)*?\bEND\b' ; token VREAL '\d+\.\d+(?:e[-+]?\d+)?|\d+e[-+]?\d+' ; token VINT '\d+' ; token VBOOL '\b(?:TRUE|FALSE)\b' ; # no backslashing madness :) token VSTR '(?:\'[^\n]*?\')+' ; # parenthesis tokens should not be confused with multi line comment delimiters token LP '\((?!\*)' ; token RP '(?=' ; token NE '<>(?!=)' ; token LS '<(?![>=])' ; token GT '(?(?!=)' ; token EQ '(?:])=' ; token CIRC '\^' ; token AT '@' ; token COMMA ',' ; token AFFECT ':=' ; token COLON ':(?!=)' ; token SCOLN ';' ; # .. must be matched before . token RANGE '\.\.' ; # avoid .. and . confusion token DOT '(? StmtList/s ; # Statement - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Statement/s -> LabelId COLON Statement/s | SimpleStatement/s | StructStmt/s | $ s = '' ; StmtList/ls -> Statement/ls ( SCOLN Statement/s $ ls = '%s;\n%s' % (ls, s) )* ; SimpleStatement/ss -> # !WARNING rules order does matter here Designator/d AFFECT Expression/e $ ss = '%s:= %s' % (d, e) | ProcedureCall/ss | 'INHERITED'/ss | 'GOTO' LabelId/ss $ ss = 'GOTO %s' % (ss,) ; StructStmt/ss -> CompoundStmt/ss | ConditionalStmt/ss | LoopStmt/ss | WithStmt/ss | TryFinallyStmt/ss | TryExceptStmt/ss | AsmStmt/ss | RaiseStmt/ss ; CompoundStmt/sc -> 'BEGIN' StmtList/sc 'END' $ sc = 'BEGIN %s END' % (sc,) ; # ConditionalStmt - - - - - - - - - - - - - - - - - - - - - - - - - - - ConditionalStmt/cs -> IfStmt/cs | CaseStmt/cs ; IfStmt/ifs -> 'IF' Expression/e 'THEN' IfStmtThen/ift $ ifs = 'IF %s THEN %s' % (e, ift) ; IfStmtThen/ift -> Statement/ift ( 'ELSE' Statement/s $ ift = '%s ELSE %s' % (ift, s) )? ; CaseStmt/cs -> 'CASE' Expression/e 'OF' CaseSelectorList/lcs CaseStmtTail/cst $ cs = 'CASE %s OF %s %s' % (e, lcs, cst) ; CaseStmtTail/cst -> 'ELSE' StmtList/sl 'END' $ cst = 'ELSE %s END' % (sl,) | 'END'/cst ; CaseSelectorList/lcs -> CaseSelector/lcs ( CaseSelector/cs $ lcs = '%s %s' % (lcs, cs) )* ; CaseSelector/cs -> CaseLabelList/lcl COLON Statement/s SCOLN $ cs = '%s : %s;' % (lcl, s) ; CaseLabelList/lcl -> CaseLabel/lcl ( COMMA CaseLabel/cl $ lcl = '%s, %s' % (lcl, cl) )* ; CaseLabel/cl -> ConstExpr/cl ( RANGE ConstExpr/ce $ cl = '%s..%s' % (cl, ce) )? ; # LoopStmt - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - LoopStmt/sl -> RepeatStmt/sl | WhileStmt/sl | ForStmt/sl ; RepeatStmt/sr -> 'REPEAT' Statement/sr 'UNTIL' Expression/e $ sr = 'REPEAT %s UNTIL (%s)' % (sr, e) ; WhileStmt/sw -> 'WHILE' Expression/sw 'DO' Statement/s $ sw = 'WHILE (%s) DO %s\n' % (sw, s) ; ForStmt/sf -> 'FOR' QualId/i AFFECT Expression/b FOR_RANGE/d Expression/e 'DO' Statement/s $ sf = 'FOR %s:= %s %s %s DO %s' % (i, b, d, e, s) ; # WithStmt - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WithStmt/sw -> 'WITH' IdentList/li 'DO' Statement/s $ sw = 'WITH %s DO %s' % (li, s) ; # Exception related Statements - - - - - - - - - - - - - - - - - - - - - RaiseStmt/sr -> 'RAISE'/sr ( RaiseStmtTail/srt $ sr = 'RAISE %s' % (srt,) )? ; RaiseStmtTail/srt -> Exception/srt ( 'AT' Address/a $ srt = '%s AT %s' % (srt, a) )? ; Exception/e -> # procedure call "TException.Create(...)" Designator/pc Arguments/a $ e = '%s%s' % (pc, a) # single designator | Designator/e ; TryFinallyStmt/stf -> 'TRY' StmtList/ts 'FINALLY' StmtList/fs 'END' $ stf = 'TRY %s FINALLY %s END' % (ts, fs) ; TryExceptStmt/ste -> 'TRY' StmtList/ts 'EXCEPT' TryExceptTail/tet $ ste = 'TRY %s EXCEPT %s' % (ts, tet) ; TryExceptTail/tet -> ExceptBlock/be 'END' $ tet = '%s \nEND' % (be,) | StmtList/ls 'END' $ tet = '%s END' % (ls,) ; ExceptBlock/be -> ExceptSelectorList/be ( 'ELSE' StmtList/ls $ be = '%s\nELSE %s' % (be, ls) )? ; ExceptSelectorList/esl -> ExceptSelector/esl ( SCOLN ExceptSelector/es $ esl = '%s; %s' % (esl, es) )* ; ExceptSelector/es -> # fsckin optionnal last ';' 'ON' ExceptType/te 'DO' Statement/s $ es = '\nON %s DO %s' % (te, s) | $ es = '' ; ExceptType/te -> ID/id COLON TypeId/t $ te = '%s: %s' % (id, t) | TypeId/te ; # AsmStmt - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - AsmStmt/asm -> # see token definition ASSEMBLER/asm $ asm = '[ASM_CODE]' ; # Expression - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Expression/e -> SimpleExpr/e ( RelOp/op SimpleExpr/se $ e = '[E: %s.%s.%s]' % (op, e, se) )* ; ExprList/le -> Expression/le ( COMMA Expression/e $ le = '%s, %s' % (le, e) )* ; # A constant expression is an expression that the compiler can evaluate # without executing the program in which it occurs. # Constant expressions include numerals, character strings, true # constants, values of enumerated types, the special constants True, # False, and nil, and expressions built exclusively from these # elements with operators, typecasts, and set constructors. # Constant expressions cannot include variables, pointers, or function # calls, except calls to the following predefined functions: # # Abs, Chr, Hi High, Length, Lo Low, Odd, Ord Pred, Round, SizeOf, # Succ, Swap, Trunc ConstExpr/ce -> # Blame me, I choosed the easy way Expression/ce ; SimpleExpr/se -> '[+-]'/op SimpleExpr/se $ se = '%s%s' % (op, se) | Term/se ( AddOp/op Term/t $ se = '[SE: %s.%s.%s]' % (op, se, t) )* ; Term/t -> Factor/t ( MulOp/op Factor/f $ t = '[T: %s.%s.%s]' % (op, t, f) )* ; Factor/f -> # procedure call Designator/pc Arguments/a $ f = '[CALL %s%s]' % (pc, a) # variable | Designator/f $ f = '[VAR %s]' % (f,) | Address/f | Number/f | Boolean/f | String/f | NIL/f | LP Expression/f RP $ f = '(%s)' % (f,) | NOT Factor/f $ f = 'not %s' % (f,) | SetConstructor/f | TypeId/tid LP Expression/e RP $ f = '%s(%s)' % (tid, e) ; # Sets - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - SetConstructor/sc -> LB SetElement/sc RB $ sc = '[%s]' % (sc,) | LB SetElement/sc ( COMMA SetElement/se $ sc = '[%s, %s]' % (sc, se) )+ RB ; SetElement/se -> Expression/se ( RANGE Expression/e $se = '%s..%s' % (se, e) )? ; # ProcedureCall - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ProcedureCall/pc -> Designator/pc ( Arguments/a $ pc = '%s%s' % (pc, a) )? ; Arguments/a -> LP ArgsTail/at $ a = '(%s' % (at,) ; ArgsTail/at -> ExprList/le RP $ at = '%s)' % (le,) | RP/at ; # Operators - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - RelOp/op -> LE/op | GE/op | NE/op | LS/op | GT/op | EQ/op | 'IN'/op | 'IS'/op | 'AS'/op ; AddOp/op -> '[+-]'/op | 'OR'/op | 'XOR'/op ; MulOp/op -> SLASH/op | STAR/op | 'DIV'/op | 'MOD'/op | 'AND'/op | 'SHL'/op | 'SHR'/op ; # Idents and Misc. Parsing - - - - - - - - - - - - - - - - - - - - - - - Address/a -> AT Designator/d $ a = '@%s' % (d,) ; Designator/d -> QualId/d ( DesignatorTail/dt $ d = '%s%s' % (d,dt) )* ; DesignatorTail/dt -> DOT Ident/id $ dt = '.%s' % (id,) | LB ExprList/el RB $ dt = '[%s]' % (el,) | CIRC/dt ; LabelId/lid -> ID/lid ; QualId/qid -> UnitId/uid DOT QualId/qid $ qid = '%s.%s' % (uid, qid) | Ident/qid ; UnitId/uid -> ID/uid ; TypeId/tid -> UnitId/uid DOT TypeId/tid $ tid = '%s.%s' % (uid, tid) | ID/tid ; Ident/id -> ID/id ( DOT ID/i $ id = '%s.%s' % (id, i) )* ; IdentList/lid -> Ident/lid ( COMMA Ident/id $ lid = '%s, %s' % (lid, id) )* ; Number/n -> VINT/n | VREAL/n ; Boolean/b -> VBOOL/b ; String/s -> VSTR/s ; """ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if __name__ == '__main__' : stmt= Statement() file= open('./statement.txt') try : #~ try : print stmt(file.read()) #~ except Exception, e : #~ print e finally : file.close() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -