バイナリ解析器ジェネレータ

世界で最も幸せなプログラムは、プログラムを書くプログラムである

30分プログラム、その110。バイナリデータのパーサを自動生成してみる。

id:mzp:20070816:classfileがほとんど手書きで書けそうだったので。

使い方

$ cat classfile.h
ClassFile{
  u4 magic;
  u2 minor_version;
  u2 major_version;
  u2 constant_pool_count;
}

$ mkparser classfile.h > classfile.hs

$ ghc --make runner

$ runner Test.class
magic:  -889275714      0xcafebabe
minor_version:  0       0x0
major_version:  49      0x31
constant_pool_count:    34      0x22

ヘッダファイルにstructがないのは、Java仮想マシン仕様 (The Java series)に合わせたからです。

あと、classfile.hsをコンパイルするためにいくつかの補助モジュールを使っています。

binary.hs

前回書いたやつから、一部とりだしたモジュール。

module Binary(u1,u2,u4,uN,BParser) where
import Control.Monad
import Data.Word
import Data.Bits
import Text.ParserCombinators.Parsec

x << n = shift x n

type BParser a = GenParser Word8 () a

bsatisfy :: (Word8 -> Bool) -> BParser Word8
bsatisfy f = tokenPrim 
             (\s -> show s) 
             (\pos _ _ -> incSourceLine pos 1) 
             (\s -> if f s then Just s else Nothing)

u1 :: BParser Int
u1 = bsatisfy (const True) >>= return.fromIntegral
             
uN :: Int->BParser Int
uN n = do xs <- replicateM n u1
          return $ foldl1 (\x y-> (x<<8)+y) $ take n xs

u2 :: BParser Int
u2 = uN 2

u4 :: BParser Int
u4 = uN 4
runner.hs

main関数。

import Classfile
import System.Environment
main = do (x:_) <- getArgs
          c <- readFile x
          case parse x $ map (fromIntegral.fromEnum) c of
            Left err -> print err
            Right x -> print x 

ソースコード

自動生成するコードだと、とてもじゃないけどレイアウトルールは使えないね。

-- Dump Parser Generator
-- 
-- Sample Input:
--
-- ClassFile {
--  u4 magic;
--  u2 minor_version;
-- }

import Data.Char
import Data.List
import Text.ParserCombinators.Parsec
import Text.Printf
import qualified Text.ParserCombinators.Parsec.Token as P
import Text.ParserCombinators.Parsec.Language( javaStyle )
import System.Environment

data JavaType = U1 | U2 | U4 deriving (Read,Show)
data HaskellType = Int deriving Show

data Struct = Struct Name [(JavaType,Name)]  deriving Show
type Name = String

--
haskellType _ = Int

-- string util
-- upcase "abc" -> "ABC"
upcase :: String -> String
upcase = map toUpper

-- downcase "ABC" -> "abc"
downcase :: String -> String
downcase = map toLower

-- captilize "abc" -> "Abc"
capitalize :: String -> String
capitalise [] = []
capitalize (x:xs) = (toUpper x):xs

-- parser
lexer = P.makeTokenParser javaStyle
ident = P.identifier lexer
whiteSpace = P.whiteSpace lexer
semi = P.semi lexer
braces = P.braces lexer

pParam :: Parser (JavaType,Name)
pParam = do t <- ident
            whiteSpace
            name <- ident
            semi
            return (read $ capitalize t,name)

pStruct :: Parser Struct
pStruct = do name <- ident
             xs <- braces $ many pParam
             return $ Struct name xs

run p input filename = case (parse p filename input) of
                         Left err -> error $ show err
                         Right x -> x

f = run pStruct "ClassFile { u4 magic; u2 minor_version; u1 b;}" ""

-- generator
unwordsMap f xs = unwords $ map f xs
join c xs = concat $ intersperse c xs

dataType (Struct name _) = name
parseFunc (Struct name _) = "p"++name
moduleName (Struct name _) = capitalize $ downcase name

gData :: Struct -> String
gData s@(Struct _ xs) = printf "data %s = %s {%s}" 
                           (dataType s)
                           (dataType s)
                           $ join "," $ map field xs
    where field :: (JavaType,Name) -> String
          field (jType,name) = printf "%s::%s" name (show $ haskellType jType)

gShow :: Struct -> String
gShow s@(Struct _ xs) = join "\n" [printf "instance Show %s where{" (dataType s),
                                   printf "show s = printf \"%s\" %s"
                                           (join "\\n" $ map format xs)
                                           (join " " $ map field xs),
                                   "}"]
    where format (_,name) = printf "%s:\t%%d\t0x%%x" name
          field (_,name) = printf "(%s s) (%s s)" name name

gParser :: Struct -> String
gParser s@(Struct _ xs) = printf "%s = do { %s return $ %s %s}"
                             (parseFunc s)
                             (concatMap clause xs)
                             (dataType s)
                             $ unwordsMap (varName.snd) xs
    where varName x = "t_"++x
          clause :: (JavaType,Name) -> String
          clause (jType,name) = printf "%s <- %s;" (varName name) (downcase $show jType)

gHeader :: Struct -> String
gHeader s = join "\n" [printf "module %s(parse) where" (moduleName s),
                       "import Binary",
                       "import Text.Printf",
                       "import qualified Text.ParserCombinators.Parsec as P",
                       printf "parse name input = P.parse %s name input" (parseFunc s)]
g = gData f
h = gParser f
i = gHeader f
-- runner
main = do (x:_) <- getArgs
          content <- readFile x
          let struct = run pStruct content x
          mapM_ putStrLn [gHeader struct,
                          gData struct,
                          gShow struct,
                          gParser struct]

これで自動生成したコードはこうなる。

module Classfile(parse) where
import Binary
import qualified Text.ParserCombinators.Parsec as P
parse name input = P.parse pClassFile name input
data ClassFile = ClassFile {magic::Int,minor_version::Int,major_version::Int,constant_pool_count::Int} deriving Show
pClassFile = do { t_magic <- u4;t_minor_version <- u2;t_major_version <- u2;t_constant_pool_count <- u2; return $ ClassFile t_magic t_minor_version t_major_version t_constant_pool_count}