#!/usr/bin/env python # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - """\ Very first try to parse delphi source """ __author__ = "Benoit Kogut-Kubiak" __email__ = "benoit.kogutkubiak@netasq.com" __version__ = "$Revision: 0.0 $"[11:-2] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - import string import tpg # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class DeclSection(tpg.Parser): r""" # Can't use NamedGroupLexer anymore, since we've reached (actually # 'went far beyond' would be a better statement) the feared 100 named # groups limit [TPG help : Chapter 5 Grammar structure] set lexer = Lexer set word_boundary= True set lexer_ignorecase= True # SEPARATORS - - - - - - - - - - - - - - - - - - - - - - - - - - - - - separator spaces '[\s\n]+' ; separator sngleComment '//.*?(?:\n|$)' ; # (?s) => DOTALL separator multiComment '(?s)(?:\(\*.*?\*\)|\{.*?\})' ; # TOKENS - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Hopefully we won't need to parse assembler within delphi code; Sadly # we have to EXPLICTLY take out comments to avoid any wrong 'end' match token ASSEMBLER '(?s)\bASM\b(?:(?://|;).*?(?:\n|$)|\(\*.*?\*\)|\{.*?\}|.)*?\bEND\b' ; token VREAL '\d+\.\d+(?:e[-+]?\d+)?|\d+e[-+]?\d+' ; token VINT '\d+' ; token VBOOL '\b(?:TRUE|FALSE)\b' ; # no backslashing madness :) token VSTR '(?:\'[^\n]*?\')+' ; # parenthesis tokens should not be confused with multi line comment delimiters token LP '\((?!\*)' ; token RP '(?=' ; token NE '<>(?!=)' ; token LS '<(?![>=])' ; token GT '(?(?!=)' ; token EQ '(?:])=' ; token CIRC '\^' ; token AT '@' ; token COMMA ',' ; token AFFECT ':=' ; token COLON ':(?!=)' ; token SCOLN ';' ; # .. must be matched before . token RANGE '\.\.' ; # avoid .. and . confusion token DOT '(? StmtList/s ; # DeclSection - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - DeclSection/sd -> LabelDecl/sd | ConstSection/sd | TypeSection/sd #~ | VarSection/sd #~ | ProcDeclSection/sd ; # Constants Delaration - - - - - - - - - - - - - - - - - - - - - - - - - ConstSection/sc -> 'CONST'/sc ( ConstantDecl/cd SCOLN $ sc = 'CONST %s;' % (cd,) )* ; ConstantDecl/cd -> ID/id EQ ConstExpr/ce $ cd = '%s= %s' % (id, ce) | ID/id COLON TypeId/tp EQ TypedConstant/ct $ cd = '%s : %s= %s' % (id, tp, ct) ; TypedConstant/ct -> ConstExpr/ct | ArrayConstant/ct | RecordConstant/ct ; ArrayConstant/ca -> LP ArrayConstantVal/cav RP $ ca = '(%s)' % (cav,) ; ArrayConstantVal/cav -> TypedConstant/cav ( COMMA TypedConstant/ct $ cav = '%s, %s' % (cav, ct) )* ; RecordConstant/cr -> LP RP $ cr = '()' | LP RecordConstantVal/crv RP $ cr = '(%s)' % (crv,) ; RecordConstantVal/crv -> RecordFieldConstant/crv ( RecordFieldConstant/crf $ crv = '%s %s' % (crv, crf) )* ; RecordFieldConstant/crf -> Ident/id COLON TypedConstant/ct SCOLN $ crf = '%s : %s;' % (id, ct) ; # Type Delaration - - - - - - - - - - - - - - - - - - - - - - - - - - - TypeSection/st -> 'TYPE'/st ( TypeDecl/td SCOLN $ st = 'TYPE %s;' % (td,) )* ; TypeDecl/td -> ID/id EQ TypeKind/tk $ td = '%s= %s' % (id, tk) ; TypeKind/tk Type/tk #~ | RestrictedType/tk ; Type/t -> TypeId/t | SimpleType/t #~ | StrucType/t | PointerType/t | StringType/t #~ | ProcedureType/t | VariantType/t | ClassRefType/t ; RestrictedType/rt -> #~ ObjectType/rt #~ | ClassType/rt #~ | InterfaceType/rt ; # Misc Type Delaration - - - - - - - - - - - - - - - - - - - - - - - - ArrayType/ta -> 'ARRAY' 'OF' Type/t $ ta = 'ARRAY OF %s' % (t,) | 'ARRAY' LB OrdinalTypeList/lto RB 'OF' Type/t $ ta = 'ARRAY [%s] OF %s' % (lto,t) ; PointerType/tp -> CIRC TypeId/id $ tp = '^%s' %(id,) ; StringType/ts -> 'STRING' LB ConstExpr/ce RB $ ts = 'STRING[%s]' % (ce,) | StringId/ts ; VariantType/tv -> # scroll down to bottom VariantId/tv ; FileType/tf -> 'FILE' 'OF' TypeId/id $ tf = 'FILE OF %s' % (id,) ; ClassRefType/tcr -> 'CLASS' 'OF' TypeId/id $ tcr = 'CLASS OF %s' % (id,) ; # 'Simple' Type Delaration - - - - - - - - - - - - - - - - - - - - - - SimpleType/ts -> # RealId : scroll down to grammar bottom RealId/ts | OrdinalType/ts ; OrdinalType/to -> OrdinalId/to | SubrangeType/to | EnumeratedType/to ; OrdinalTypeList/lto -> OrdinalType/lto ( COMMA OrdinalType/to $ lto = '%s, %s' % (lto, to) )* ; SubRangeType/tsr -> ConstExpr/lce RANGE ConstExpr/rce $ tsr = '%s..%s' % (lce, rce) ; EnumeratedType/te -> LP EnumTypeItemList/leti RP $ te = '(%s)' % (leti,) ; EnumTypeItem/eti -> Ident/eti ( EQ ConstExpr/ce $ eti = '%s= %s' %(eti, ce) )? ; EnumTypeItemList/leti -> EnumTypeItem/leti ( COMMA EnumTypeItem/eti $ leti = '%s, %s' % (leti, eti) )* ; # Statement - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Statement/s -> LabelId COLON Statement/s | SimpleStatement/s | StructStmt/s | $ s = '' ; StmtList/ls -> Statement/ls ( SCOLN Statement/s $ ls = '%s;\n%s' % (ls, s) )* ; SimpleStatement/ss -> # !WARNING rules order does matter here Designator/d AFFECT Expression/e $ ss = '%s:= %s' % (d, e) | ProcedureCall/ss | 'INHERITED'/ss | GotoStmt/ss ; StructStmt/ss -> CompoundStmt/ss | ConditionalStmt/ss | LoopStmt/ss | WithStmt/ss | TryFinallyStmt/ss | TryExceptStmt/ss | AsmStmt/ss | RaiseStmt/ss ; CompoundStmt/sc -> 'BEGIN' StmtList/sc 'END' $ sc = 'BEGIN %s END' % (sc,) ; # ConditionalStmt - - - - - - - - - - - - - - - - - - - - - - - - - - - ConditionalStmt/cs -> IfStmt/cs | CaseStmt/cs ; IfStmt/ifs -> 'IF' Expression/e 'THEN' IfStmtThen/ift $ ifs = 'IF %s THEN %s' % (e, ift) ; IfStmtThen/ift -> Statement/ift ( 'ELSE' Statement/s $ ift = '%s ELSE %s' % (ift, s) )? ; CaseStmt/cs -> 'CASE' Expression/e 'OF' CaseSelectorList/lcs CaseStmtTail/cst $ cs = 'CASE %s OF %s %s' % (e, lcs, cst) ; CaseStmtTail/cst -> 'ELSE' StmtList/sl 'END' $ cst = 'ELSE %s END' % (sl,) | 'END'/cst ; CaseSelectorList/lcs -> CaseSelector/lcs ( CaseSelector/cs $ lcs = '%s %s' % (lcs, cs) )* ; CaseSelector/cs -> CaseLabelList/lcl COLON Statement/s SCOLN $ cs = '%s : %s;' % (lcl, s) ; CaseLabelList/lcl -> CaseLabel/lcl ( COMMA CaseLabel/cl $ lcl = '%s, %s' % (lcl, cl) )* ; CaseLabel/cl -> ConstExpr/cl ( RANGE ConstExpr/ce $ cl = '%s..%s' % (cl, ce) )? ; # LoopStmt - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - LoopStmt/sl -> RepeatStmt/sl | WhileStmt/sl | ForStmt/sl ; RepeatStmt/sr -> 'REPEAT' Statement/sr 'UNTIL' Expression/e $ sr = 'REPEAT %s UNTIL (%s)' % (sr, e) ; WhileStmt/sw -> 'WHILE' Expression/sw 'DO' Statement/s $ sw = 'WHILE (%s) DO %s\n' % (sw, s) ; ForStmt/sf -> 'FOR' QualId/i AFFECT Expression/b FOR_RANGE/d Expression/e 'DO' Statement/s $ sf = 'FOR %s:= %s %s %s DO %s' % (i, b, d, e, s) ; # WithStmt - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - WithStmt/sw -> 'WITH' IdentList/li 'DO' Statement/s $ sw = 'WITH %s DO %s' % (li, s) ; # Exception related Statements - - - - - - - - - - - - - - - - - - - - - RaiseStmt/sr -> 'RAISE'/sr ( RaiseStmtTail/srt $ sr = 'RAISE %s' % (srt,) )? ; RaiseStmtTail/srt -> Exception/srt ( 'AT' Address/a $ srt = '%s AT %s' % (srt, a) )? ; Exception/e -> # procedure call "TException.Create(...)" Designator/pc Arguments/a $ e = '%s%s' % (pc, a) # single designator | Designator/e ; TryFinallyStmt/stf -> 'TRY' StmtList/ts 'FINALLY' StmtList/fs 'END' $ stf = 'TRY %s FINALLY %s END' % (ts, fs) ; TryExceptStmt/ste -> 'TRY' StmtList/ts 'EXCEPT' TryExceptTail/tet $ ste = 'TRY %s EXCEPT %s' % (ts, tet) ; TryExceptTail/tet -> ExceptBlock/be 'END' $ tet = '%s \nEND' % (be,) | StmtList/ls 'END' $ tet = '%s END' % (ls,) ; ExceptBlock/be -> ExceptSelectorList/be ( 'ELSE' StmtList/ls $ be = '%s\nELSE %s' % (be, ls) )? ; ExceptSelectorList/esl -> ExceptSelector/esl ( SCOLN ExceptSelector/es $ esl = '%s; %s' % (esl, es) )* ; ExceptSelector/es -> # fsckin optionnal last ';' 'ON' ExceptType/te 'DO' Statement/s $ es = '\nON %s DO %s' % (te, s) | $ es = '' ; ExceptType/te -> ID/id COLON TypeId/t $ te = '%s: %s' % (id, t) | TypeId/te ; # AsmStmt - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - AsmStmt/asm -> # see token definition ASSEMBLER/asm $ asm = '[ASM_CODE]' ; # Expression - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Expression/e -> SimpleExpr/e ( RelOp/op SimpleExpr/se $ e = '[E: %s.%s.%s]' % (op, e, se) )* ; ExprList/le -> Expression/le ( COMMA Expression/e $ le = '%s, %s' % (le, e) )* ; # A constant expression is an expression that the compiler can evaluate # without executing the program in which it occurs. # Constant expressions include numerals, character strings, true # constants, values of enumerated types, the special constants True, # False, and nil, and expressions built exclusively from these # elements with operators, typecasts, and set constructors. # Constant expressions cannot include variables, pointers, or function # calls, except calls to the following predefined functions: # # Abs, Chr, Hi High, Length, Lo Low, Odd, Ord Pred, Round, SizeOf, # Succ, Swap, Trunc ConstExpr/ce -> # Blame me, I choosed the easy way Expression/ce ; SimpleExpr/se -> '[+-]'/op SimpleExpr/se $ se = '%s%s' % (op, se) | Term/se ( AddOp/op Term/t $ se = '[SE: %s.%s.%s]' % (op, se, t) )* ; Term/t -> Factor/t ( MulOp/op Factor/f $ t = '[T: %s.%s.%s]' % (op, t, f) )* ; Factor/f -> # procedure call Designator/pc Arguments/a $ f = '[CALL %s%s]' % (pc, a) # variable | Designator/f $ f = '[VAR %s]' % (f,) | Address/f | Number/f | Boolean/f | String/f | NIL/f | LP Expression/f RP $ f = '(%s)' % (f,) | NOT Factor/f $ f = 'not %s' % (f,) | SetConstructor/f | TypeId/tid LP Expression/e RP $ f = '%s(%s)' % (tid, e) ; # Sets - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - SetConstructor/sc -> LB SetElement/sc RB $ sc = '[%s]' % (sc,) | LB SetElement/sc ( COMMA SetElement/se $ sc = '[%s, %s]' % (sc, se) )+ RB ; SetElement/se -> Expression/se ( RANGE Expression/e $se = '%s..%s' % (se, e) )? ; # ProcedureCall - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ProcedureCall/pc -> Designator/pc ( Arguments/a $ pc = '%s%s' % (pc, a) )? ; Arguments/a -> LP ArgsTail/at $ a = '(%s' % (at,) ; ArgsTail/at -> ExprList/le RP $ at = '%s)' % (le,) | RP/at ; # Goto / Label - - - - - - - - - - - - - - - - - - - - - - - - - - - - - LabelDecl/ld -> 'LABEL' LabelId/lid $ ld = 'LABEL %s' % (lid,) ; GotoStmt/sg -> 'GOTO' LabelId/lid $ sg = 'GOTO %s' % (lid,) ; # Operators - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - RelOp/op -> LE/op | GE/op | NE/op | LS/op | GT/op | EQ/op | 'IN'/op | 'IS'/op | 'AS'/op ; AddOp/op -> '[+-]'/op | 'OR'/op | 'XOR'/op ; MulOp/op -> SLASH/op | STAR/op | 'DIV'/op | 'MOD'/op | 'AND'/op | 'SHL'/op | 'SHR'/op ; # Misc type keywords - - - - - - - - - - - - - - - - - - - - - - - - - - RealId/ri -> 'REAL48'/ri | 'REAL'/ri | 'SINGLE'/ri | 'DOUBLE'/ri | 'EXTENDED'/ri | 'CURRENCY'/ri | 'COMP'/ri ; OrdinalId/oi -> 'SHORTINT'/oi | 'SMALLINT'/oi | 'INTEGER'/oi | 'CARDINAL'/oi | 'BYTE'/oi | 'LONGINT'/oi | 'INT64'/oi | 'WORD'/oi | 'BOOLEAN'/oi | 'BYTEBOOL'/oi | 'WORDBOOL'/oi | 'LONGBOOL'/oi | 'BOOL'/oi | 'CHAR'/oi | 'WIDECHAR'/oi | 'LONGWORD'/oi | 'PCHAR'/oi | 'PWIDECHAR'/oi ; StringId/si -> 'STRING'/si | 'ANSISTRING'/si | 'SHORTSTRING'/si | 'WIDESTRING'/si ; VariantId/vi -> 'VARIANT'/vi | 'OLEVARIANT'/vi ; # Class member visibility - - - - - - - - - - - - - - - - - - - - - - - - ClassVisibility/cv -> 'PUBLIC'/cv | 'PROTECTED'/cv | 'PRIVATE'/cv | 'PUBLISHED'/cv ; # Directives - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directive/d -> 'CDECL'/d | 'REGISTER'/d | 'DYNAMIC'/d | 'VIRTUAL'/d | 'EXPORT'/d | 'EXTERNAL'/d | 'FAR'/d | 'FORWARD'/d | 'MESSAGE'/d | 'OVERRIDE'/d | 'OVERLOAD'/d | 'PASCAL'/d | 'REINTRODUCE'/d | 'SAFECALL'/d | 'STDCALL'/d ; # Idents and Misc. Parsing - - - - - - - - - - - - - - - - - - - - - - - Address/a -> AT Designator/d $ a = '@%s' % (d,) ; Designator/d -> QualId/d ( DesignatorTail/dt $ d = '%s%s' % (d,dt) )* ; DesignatorTail/dt -> DOT Ident/id $ dt = '.%s' % (id,) | LB ExprList/el RB $ dt = '[%s]' % (el,) | CIRC/dt ; LabelId/lid -> ID/lid ; QualId/qid -> UnitId/uid DOT QualId/qid $ qid = '%s.%s' % (uid, qid) | Ident/qid ; UnitId/uid -> ID/uid ; TypeId/tid -> UnitId/uid DOT TypeId/tid $ tid = '%s.%s' % (uid, tid) | ID/tid ; Ident/id -> ID/id ( DOT ID/i $ id = '%s.%s' % (id, i) )* ; IdentList/lid -> Ident/lid ( COMMA Ident/id $ lid = '%s, %s' % (lid, id) )* ; Number/n -> VINT/n | VREAL/n ; Boolean/b -> VBOOL/b ; String/s -> VSTR/s ; """ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if __name__ == '__main__' : stmt= DeclSection() file= open('./DeclSection.txt') try : #~ try : print stmt(file.read()) #~ except Exception, e : #~ print e finally : file.close() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -