バイナリ解析器ジェネレータ
世界で最も幸せなプログラムは、プログラムを書くプログラムである
30分プログラム、その110。バイナリデータのパーサを自動生成してみる。
id:mzp:20070816:classfileがほとんど手書きで書けそうだったので。
使い方
$ cat classfile.h ClassFile{ u4 magic; u2 minor_version; u2 major_version; u2 constant_pool_count; } $ mkparser classfile.h > classfile.hs $ ghc --make runner $ runner Test.class magic: -889275714 0xcafebabe minor_version: 0 0x0 major_version: 49 0x31 constant_pool_count: 34 0x22
ヘッダファイルにstructがないのは、Java仮想マシン仕様 (The Java series)に合わせたからです。
あと、classfile.hsをコンパイルするためにいくつかの補助モジュールを使っています。
binary.hs
前回書いたやつから、一部とりだしたモジュール。
module Binary(u1,u2,u4,uN,BParser) where import Control.Monad import Data.Word import Data.Bits import Text.ParserCombinators.Parsec x << n = shift x n type BParser a = GenParser Word8 () a bsatisfy :: (Word8 -> Bool) -> BParser Word8 bsatisfy f = tokenPrim (\s -> show s) (\pos _ _ -> incSourceLine pos 1) (\s -> if f s then Just s else Nothing) u1 :: BParser Int u1 = bsatisfy (const True) >>= return.fromIntegral uN :: Int->BParser Int uN n = do xs <- replicateM n u1 return $ foldl1 (\x y-> (x<<8)+y) $ take n xs u2 :: BParser Int u2 = uN 2 u4 :: BParser Int u4 = uN 4
runner.hs
main関数。
import Classfile import System.Environment main = do (x:_) <- getArgs c <- readFile x case parse x $ map (fromIntegral.fromEnum) c of Left err -> print err Right x -> print x
ソースコード
自動生成するコードだと、とてもじゃないけどレイアウトルールは使えないね。
-- Dump Parser Generator -- -- Sample Input: -- -- ClassFile { -- u4 magic; -- u2 minor_version; -- } import Data.Char import Data.List import Text.ParserCombinators.Parsec import Text.Printf import qualified Text.ParserCombinators.Parsec.Token as P import Text.ParserCombinators.Parsec.Language( javaStyle ) import System.Environment data JavaType = U1 | U2 | U4 deriving (Read,Show) data HaskellType = Int deriving Show data Struct = Struct Name [(JavaType,Name)] deriving Show type Name = String -- haskellType _ = Int -- string util -- upcase "abc" -> "ABC" upcase :: String -> String upcase = map toUpper -- downcase "ABC" -> "abc" downcase :: String -> String downcase = map toLower -- captilize "abc" -> "Abc" capitalize :: String -> String capitalise [] = [] capitalize (x:xs) = (toUpper x):xs -- parser lexer = P.makeTokenParser javaStyle ident = P.identifier lexer whiteSpace = P.whiteSpace lexer semi = P.semi lexer braces = P.braces lexer pParam :: Parser (JavaType,Name) pParam = do t <- ident whiteSpace name <- ident semi return (read $ capitalize t,name) pStruct :: Parser Struct pStruct = do name <- ident xs <- braces $ many pParam return $ Struct name xs run p input filename = case (parse p filename input) of Left err -> error $ show err Right x -> x f = run pStruct "ClassFile { u4 magic; u2 minor_version; u1 b;}" "" -- generator unwordsMap f xs = unwords $ map f xs join c xs = concat $ intersperse c xs dataType (Struct name _) = name parseFunc (Struct name _) = "p"++name moduleName (Struct name _) = capitalize $ downcase name gData :: Struct -> String gData s@(Struct _ xs) = printf "data %s = %s {%s}" (dataType s) (dataType s) $ join "," $ map field xs where field :: (JavaType,Name) -> String field (jType,name) = printf "%s::%s" name (show $ haskellType jType) gShow :: Struct -> String gShow s@(Struct _ xs) = join "\n" [printf "instance Show %s where{" (dataType s), printf "show s = printf \"%s\" %s" (join "\\n" $ map format xs) (join " " $ map field xs), "}"] where format (_,name) = printf "%s:\t%%d\t0x%%x" name field (_,name) = printf "(%s s) (%s s)" name name gParser :: Struct -> String gParser s@(Struct _ xs) = printf "%s = do { %s return $ %s %s}" (parseFunc s) (concatMap clause xs) (dataType s) $ unwordsMap (varName.snd) xs where varName x = "t_"++x clause :: (JavaType,Name) -> String clause (jType,name) = printf "%s <- %s;" (varName name) (downcase $show jType) gHeader :: Struct -> String gHeader s = join "\n" [printf "module %s(parse) where" (moduleName s), "import Binary", "import Text.Printf", "import qualified Text.ParserCombinators.Parsec as P", printf "parse name input = P.parse %s name input" (parseFunc s)] g = gData f h = gParser f i = gHeader f -- runner main = do (x:_) <- getArgs content <- readFile x let struct = run pStruct content x mapM_ putStrLn [gHeader struct, gData struct, gShow struct, gParser struct]
これで自動生成したコードはこうなる。
module Classfile(parse) where import Binary import qualified Text.ParserCombinators.Parsec as P parse name input = P.parse pClassFile name input data ClassFile = ClassFile {magic::Int,minor_version::Int,major_version::Int,constant_pool_count::Int} deriving Show pClassFile = do { t_magic <- u4;t_minor_version <- u2;t_major_version <- u2;t_constant_pool_count <- u2; return $ ClassFile t_magic t_minor_version t_major_version t_constant_pool_count}