> {-# OPTIONS_GHC -Wall -fno-warn-unused-matches -fno-warn-name-shadowing #-} Scanner for SystemVerilog > module SystemVerilogScanner(scan) where > import Data.Char > import Data.List > import Numeric(readDec, readFloat) > import Text.Regex > import qualified Data.Map as M > import Position > import Error(ErrMsg(..), internalError) > import SystemVerilogTokens > import SystemVerilogKeywords > import FStringCompat > newtype ScannerState = ScannerState (Position, String) > type Scanner = ScannerState -> [SV_Token] Should we present comments as tokens? > preserveComments :: Bool > preserveComments = False Scan is not in a monad because of laziness requirements (space efficiency) > scan :: Position -- initial position > -> String -- input > -> [SV_Token] > scan initPos input = scanMain (ScannerState (initPos, input)) Toplevel scanner function > scanMain :: Scanner > -- end of file > -- note that we don't know its position, so we can't accurately reproduce > -- spaces at the end of the file > scanMain state@(ScannerState (pos, [])) = [] > scanMain state@(ScannerState (pos, '#':input)) > |(isFirstColumn pos)&& (isCPreprocessorDirective input) = > [SV_Token_Error { start_position = pos, > end_position = pos, > errMessage = ECPPDirective}] > -- line comment > scanMain state@(ScannerState (pos, '/':'/':input)) = > scanLineComment pos (eatChars 2 state) > -- block comment > scanMain state@(ScannerState (pos, '/':'*':input)) = > scanBlockComment pos "" (eatChars 2 state) > -- string > scanMain state@(ScannerState (pos, '\"':input)) = > scanString pos "" (eatChars 1 state) > -- system identifier > scanMain state@(ScannerState (pos, input)) | matchesTaskId input = > let (idChars, restOfInput) = span isIdChar input > (end_pos, next_pos) = posAtEndAndAfterString pos idChars > nextScannerState = ScannerState (next_pos, restOfInput) > token = SV_Token_System_Id { start_position = pos, > end_position = end_pos, > name = idChars } > in token : scanMain nextScannerState > -- escaped identifier (SV 3.1a LRM A.9.3, also V2001 LRM section 2.7.1) > scanMain state@(ScannerState (pos, '\\':input)) = > let (idChars, restOfInput) = span (not . isWhitespace) input > (end_pos, next_pos) = posAtEndAndAfterString pos ('\\':idChars) > nextScannerState = ScannerState (next_pos, restOfInput) > token = SV_Token_Id { start_position = pos, > end_position = end_pos, > name = idChars } > in token : scanMain nextScannerState > -- `line compiler directive > scanMain state@(ScannerState (pos, ('`':'l':'i':'n':'e':'(':restOfInput))) > = scanLinePosDirective pos (eatChars 6 state) > scanMain state@(ScannerState (pos, ('`':'l':'i':'n':'e':c:restOfInput))) > | isWhitespace c = scanLineDirective pos (eatChars 6 state) > -- other compiler directives; assume the words are like identifiers > scanMain state@(ScannerState (pos, ('`':c:input))) > | isIdStart c = > let (restDirective, restOfInput) = span isIdChar input > directive = '`':c:restDirective > (end_pos, next_pos) = posAtEndAndAfterString pos directive > token = SV_Token_Directive { start_position = pos, > end_position = end_pos, > name = directive } > nextScannerState = ScannerState (next_pos, restOfInput) > in token : scanMain nextScannerState > -- whitespace > scanMain state@(ScannerState (pos, c:restOfInput)) | isWhitespace c = > scanMain (eatChars 1 state) > -- keyword or simple identifier > scanMain state@(ScannerState (pos, c:input)) | isIdStart c = > let (idRest, restOfInput) = span isIdChar input > idChars = c:idRest > (end_pos, next_pos) = posAtEndAndAfterString pos idChars > token = case M.lookup idChars svKeywords of > Just kw -> SV_Token_Keyword { start_position = pos, > end_position = end_pos, > keyword = kw } > Nothing -> SV_Token_Id { start_position = pos, > end_position = end_pos, > name = idChars } > nextScannerState = ScannerState (next_pos, restOfInput) > in token : scanMain nextScannerState > -- unbased unsized literal ('0, '1, 'x, 'z, ...) > scanMain state@(ScannerState (pos, '\'':c:restOfInput)) > | c /= '_' && isBinXZ_ c = > let (end_pos, next_pos) = posAtEndAndAfterString pos ['\'',c] > token = SV_Token_Number { start_position = pos, > end_position = end_pos, > value = SV_NUM_Repeated (readBit c), > bitwidth = Nothing, > base = Nothing, > signed = False, > originalText = ['\'',c] } > nextScannerState = ScannerState (next_pos, restOfInput) > in token : scanMain nextScannerState > -- unbased number, sized based number, timescale > scanMain state@(ScannerState (pos, d:_)) | isDigit d = scanNumber state > -- unsized based number > scanMain (ScannerState (pos, '\'':char1:char2:rest)) > | base `elem` "dboh" = -- decimal, binary, octal, or hex base > let (consumedChars, restOfInput) = if signed > then (['\'',char1,char2], rest) > else (['\'',char1], char2:rest) > (_ {-end_pos-}, next_pos) = posAtEndAndAfterString pos consumedChars > nextState = ScannerState (next_pos, restOfInput) > in scanBasedNumber pos Nothing consumedChars signed base > nextState > where signed = char1 == 's' || char1 == 'S' > base = toLower (if signed then char2 else char1) > -- operators, parsed greedily > scanMain state@(ScannerState (pos, input@(c:_))) | isSymChar c = > let (opChars, restOfInput) = span isSymChar input > -- XXX using findLongestMatch is inefficient > findLongestMatch table "" revRest = Nothing > findLongestMatch table str revRest = > case M.lookup str svSymbols of > Just op -> Just (op, str, reverse revRest) > Nothing -> findLongestMatch table (init str) (last str : revRest) > in case findLongestMatch svSymbols opChars "" of > Nothing -> > let (end_pos, _ {- next_pos -}) = posAtEndAndAfterString pos opChars > in [SV_Token_Error { start_position = pos, > end_position = end_pos, > errMessage = EBadSymbol opChars}] > Just (op, eatenChars, revRest) -> > let (end_pos, next_pos) = posAtEndAndAfterString pos eatenChars > token = SV_Token_Symbol { start_position = pos, > end_position = end_pos, > symbol = op } > nextScannerState = > ScannerState (next_pos, reverse revRest ++ restOfInput) > in token : scanMain nextScannerState > -- unknown character > scanMain state@(ScannerState (pos, c:_)) = > [SV_Token_Error { start_position = pos, > end_position = pos, > errMessage = EBadLexChar c }] > isCPreprocessorDirective :: String -> Bool > isCPreprocessorDirective rest_of_input = > let > match_init :: String -> Bool > match_init cpp = ((and (zipWith (==) cpp rest_of_input))&& > (notIdChar (drop (length cpp) rest_of_input))) > notIdChar :: String -> Bool > notIdChar [] = True > notIdChar (c:_) = isWhitespace c > in > or (map match_init > ["define" > ,"undef" > ,"include" > ,"line" > ,"pragma" > ,"endif" > ,"ifdef" > ,"ifndef" > ,"if" > ,"elif" > ,"else"]) Generalize recognizing task ids so that we can treat an identifier without a leading $ as a task id > matchesTaskId :: String -> Bool > matchesTaskId ('$':_) = True > -- matchesTaskId ('f':'m':'t':x:_) | not (isIdChar x) = True > matchesTaskId _ = False Fast lookup table of keywords > svKeywords :: M.Map String SV_Keyword > svKeywords = M.fromList [(name, kw) | (kw, name, _) <- svKeywordTable] Fast lookup table of operators > svSymbols :: M.Map String SV_Symbol > svSymbols = M.fromList [(name, op) | (op, name, _) <- svSymbolTable] STRINGS Recognize string, handling escape identifiers: Verilog 2001: \n \t \\ \" \ddd SystemVerilog: \v \f \a \xhh \NEWLINE > scanString :: Position -- init position > -> String -- characters collected so far, reversed > -> Scanner -- the scanner, i.e., state -> tokens > -- end of string > scanString initPos reversedSoFar > state@(ScannerState (pos, '\"':input)) = > let token = SV_Token_String { start_position = initPos, > end_position = pos, > contents = reverse reversedSoFar } > in token : scanMain (eatChars 1 state) > -- escaped newline > -- note that we actually drop the \ and the newline, so we can't output > -- the same characters quite right > scanString initPos reversedSoFar > state@(ScannerState (pos, '\\':'\n':input))= > scanString initPos reversedSoFar (eatChars 2 state) > -- octal escape > scanString initPos reversedSoFar > state@(ScannerState (pos, '\\':d1:d2:d3:input)) > | isOctDigit d1 && isOctDigit d2 && isOctDigit d3 = > let c = chr (digitToInt d3 + 8 * digitToInt d2 > + 64 * digitToInt d1) > in scanString initPos (c:reversedSoFar) (eatChars 4 state) > -- hexadecimal escape > scanString initPos reversedSoFar > state@(ScannerState (pos, '\\':'x':d1:d2:input)) > | isHexDigit d1 && isHexDigit d2 = > let c = chr (digitToInt d2 + 16 * digitToInt d1) > in scanString initPos (c:reversedSoFar) (eatChars 4 state) > -- generic escaped char > scanString initPos reversedSoFar > state@(ScannerState (pos, '\\':e:input)) = > -- list is fast enough -- escapes don't happen too often > let escapes = [('n', '\n'), ('t', '\t'), ('\\', '\\'), ('\"', '\"'), > ('v', '\v'), ('f', '\f'), ('a', '\a')] > in case lookup e escapes of > Just resultChar -> > scanString initPos (resultChar:reversedSoFar) (eatChars 2 state) > Nothing -> > [SV_Token_Error { start_position = pos, > end_position = updatePosChar pos '\\', > errMessage = EBadStringEscapeChar e }] > scanString initPos reversedSoFar > state@(ScannerState (pos, '\n':input)) = > [SV_Token_Error { start_position = pos, end_position = pos, > errMessage = EStringNewline }] > scanString initPos reversedSoFar > state@(ScannerState (pos, c:restOfInput)) = > scanString initPos (c:reversedSoFar) (eatChars 1 state) > scanString initPos reversedSoFar state@(ScannerState (pos, "")) = > [SV_Token_Error { start_position = pos, > end_position = pos, > errMessage = EStringEOF }] Line comments > scanLineComment :: Position -- init position > -> Scanner -- the scanner, i.e., state -> tokens > scanLineComment initPos (ScannerState (pos, input)) = > let (commentText, afterCommentText) = span (/= '\n') input > (end_pos, next_pos) = posAtEndAndAfterString pos commentText > token = SV_Token_Comment { start_position = initPos, > end_position = end_pos, > isBlock = False, > contents = commentText } > nextScannerState = > case afterCommentText of > '\n':restOfInput -> -- skip newline > ScannerState (updatePosChar next_pos '\n', restOfInput) > -- XXX should an unterminated line comment be an error? > restOfInput -> ScannerState (next_pos, restOfInput) > in if preserveComments > then token : scanMain nextScannerState > else scanMain nextScannerState > -- XXX this reversed text business is nasty and inefficient for long comments > scanBlockComment :: Position -- init position > -> String -- comment text scanned so far, reversed > -> Scanner -- the scanner, i.e., state -> tokens > scanBlockComment initPos reversedText (ScannerState (pos, '*':'/':input)) = > let (end_pos, next_pos) = posAtEndAndAfterString pos "*/" > token = SV_Token_Comment { start_position = initPos, > end_position = end_pos, > isBlock = True, > contents = reverse reversedText } > nextScannerState = ScannerState (next_pos, input) > in if preserveComments > then token : scanMain nextScannerState > else scanMain nextScannerState > scanBlockComment initPos reversedText (ScannerState (pos, char:rest)) = > scanBlockComment initPos (char : reversedText) > (ScannerState (updatePosChar pos char, rest)) > scanBlockComment initPos reversedText state@(ScannerState (pos, rest@"")) = > let token = SV_Token_Error { start_position = pos, > end_position = pos, > errMessage = EUntermComm initPos } > in token : scanMain state line Pos directive. Emitted by the preprocessor One day the gods will smite me for this. > scanLinePosDirective ::Position -> Scanner > scanLinePosDirective ipos state@(ScannerState (pos,input)) = > let (directive, _:restOfInput) = span (/= ')') input > list = Data.List.groupBy (\x -> \y -> (x /= ',') && (y /= ',')) directive > param_list = map (filter (\x -> (not (isWhitespace x)))) > (filter ( /= ",") list) > (f:l:c:_ {-level-}:_) = param_list > in > scanMain (ScannerState (updatePosFileLineCol pos (mkFString f) > (read l) (read c), restOfInput)) line directives Verilog 2001 standard specifies they should look like `line 123 "/foo/../foobar.v" 0 vpp, though, produces `line 123 foo.v we accept both, for now > -- this requires that the `line directive be followed by a newline > -- it's also an inelegant hack > scanLineDirective :: Position -> Scanner > scanLineDirective initPos state@(ScannerState (pos,input)) = > let (directive, eolRestOfInput) = span (/= '\n') input > restOfInput = case eolRestOfInput of { '\n':cs -> cs; cs -> cs } > (end_pos, _) = posAtEndAndAfterString pos directive > err = [SV_Token_Error { start_position = initPos, > end_position = end_pos, > errMessage = EBadLineDirective }] > unquote ('"':cs@(_:_)) | last cs == '"' = Just (init cs) > | otherwise = Nothing > unquote cs = Just cs > scanLine' :: String -> String -> String -> [SV_Token] > scanLine' lineNo filename level = > case (reads lineNo, unquote filename) of > ([(l, "")], Just f) -> > scanMain (ScannerState (newLinePosition f l, restOfInput)) > _ -> err > wordsDirective :: [String] > wordsDirective = > case (matchRegex scanLineRegexLevel directive) of > Just x -> x > Nothing -> > case (matchRegex scanLineRegexNoLevel directive) of > Just x -> x > Nothing -> [] -- this will FAIL the case > -- statement below > > in case wordsDirective of > [lineNo, filename, level] -> scanLine' lineNo filename level > [lineNo, filename] -> scanLine' lineNo filename "0" > _ -> err These Regexes are defined at the top level (instead of inside a "let" in the function above) for efficiency. > scanLineRegexLevel :: Regex > scanLineRegexLevel = > mkRegex "^[ ]*([0-9]+)[ ]+(.+)[ ]+([0-9]+)[ ]*$" if you have a non-number as the "level" (third argument), it will get parsed as part of a filename with spaces in it. (But if the filename has quotes, then the unquote function will cause it to bomb out.) > scanLineRegexNoLevel :: Regex > scanLineRegexNoLevel = > mkRegex "^[ ]*([0-9]+)[ ]+(.+)" SYSTEM VERILOG CHARACTER CLASSES Is a character whitespace? (SV 3.1a LRM A.9.4) (Note that Verilog 2001 LRM section 2.2 -- but not its grammar -- also has \f) (Also note that we include \r for Windows compatibility) > isWhitespace :: Char -> Bool > isWhitespace ' ' = True; isWhitespace '\t' = True; isWhitespace '\n' = True > isWhitespace '\f' = True; isWhitespace '\r' = True; isWhitespace _ = False > isHorizontalWhitespace :: Char -> Bool > isHorizontalWhitespace ' ' = True; isHorizontalWhitespace '\t' = True > isHorizontalWhitespace _ = False Does a character start a valid identifier? (SV 3.1a LRM A.9.3) > isIdStart :: Char -> Bool > isIdStart '_' = True -- might be faster as a full table > isIdStart c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') Does a character continue a valid identifier? (SV 3.1a LRM A.9.3) > isIdChar :: Char -> Bool > isIdChar '_' = True -- might be faster as a full table > isIdChar '$' = True > isIdChar c = ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') > || (c >= '0' && c <= '9')) Is a character part of a valid operator? > isSymChar :: Char -> Bool > isSymChar '!' = True; isSymChar '%' = True; isSymChar '&' = True > isSymChar '*' = True; isSymChar '+' = True; isSymChar '-' = True > isSymChar '/' = True; isSymChar '<' = True; isSymChar '>' = True > isSymChar '=' = True; isSymChar '?' = True; isSymChar '^' = True > isSymChar '|' = True; isSymChar '~' = True; isSymChar '.' = True > isSymChar '(' = True; isSymChar ')' = True; isSymChar ',' = True > isSymChar '[' = True; isSymChar ']' = True; isSymChar ';' = True > isSymChar '{' = True; isSymChar '}' = True; isSymChar '#' = True > isSymChar '`' = True; isSymChar ':' = True; isSymChar '\'' = True > isSymChar _ = False Is a character a decimal digit or an underscore? > isDecDigitOr_ :: Char -> Bool > isDecDigitOr_ '_' = True > isDecDigitOr_ d = isDigit d Is a character a binary digit, undefined (x), high-impedance (z, ?), or _? > isBinXZ_ :: Char -> Bool > isBinXZ_ '0' = True; isBinXZ_ '1' = True; isBinXZ_ 'X' = True > isBinXZ_ 'x' = True; isBinXZ_ 'Z' = True; isBinXZ_ 'z' = True > isBinXZ_ '?' = True; isBinXZ_ '_' = True; isBinXZ_ _ = False Is a character an undefined (x) or high-impedance (z, ?) digit? > isXorZ :: Char -> Bool > isXorZ 'X' = True; isXorZ 'x' = True; isXorZ 'Z' = True; isXorZ 'z' = True; > isXorZ '?' = True; isXorZ _ = False Turn a binary digit into a SystemVerilog bit > readBit :: Char -> SV_Bit > readBit '0' = SV_BIT_0; readBit '1' = SV_BIT_1; > readBit 'x' = SV_BIT_X; readBit 'X' = SV_BIT_X; > readBit 'z' = SV_BIT_Z; readBit 'Z' = SV_BIT_Z; > readBit '?' = SV_BIT_DontCare; > readBit c = internalError ("readBit: can't read \"" ++ [c] ++ "\"") Turn an octal digit into SystemVerilog bits > octToBits :: Char -> [SV_Bit] > octToBits '0' = map readBit "000"; octToBits '1' = map readBit "001" > octToBits '2' = map readBit "010"; octToBits '3' = map readBit "011" > octToBits '4' = map readBit "100"; octToBits '5' = map readBit "101" > octToBits '6' = map readBit "110"; octToBits '7' = map readBit "111" > octToBits 'X' = map readBit "XXX"; octToBits 'x' = map readBit "xxx" > octToBits 'Z' = map readBit "ZZZ"; octToBits 'z' = map readBit "zzz" > octToBits '?' = map readBit "???" > octToBits d = internalError ("octToBits: \""++[d]++"\" not an octal digit") Turn a hex digit into SystemVerilog bits > hexToBits :: Char -> [SV_Bit] > hexToBits '0' = map readBit "0000"; hexToBits '1' = map readBit "0001" > hexToBits '2' = map readBit "0010"; hexToBits '3' = map readBit "0011" > hexToBits '4' = map readBit "0100"; hexToBits '5' = map readBit "0101" > hexToBits '6' = map readBit "0110"; hexToBits '7' = map readBit "0111" > hexToBits '8' = map readBit "1000"; hexToBits '9' = map readBit "1001" > hexToBits 'a' = map readBit "1010"; hexToBits 'b' = map readBit "1011" > hexToBits 'c' = map readBit "1100"; hexToBits 'd' = map readBit "1101" > hexToBits 'e' = map readBit "1110"; hexToBits 'f' = map readBit "1111" > hexToBits 'X' = map readBit "XXXX"; hexToBits 'x' = map readBit "xxxx" > hexToBits 'Z' = map readBit "ZZZZ"; hexToBits 'z' = map readBit "zzzz" > hexToBits '?' = map readBit "????" > hexToBits d = internalError ("hexToBits: \""++[d]++"\" not a hex digit") Turn a hex digit into an Integer > digitToInteger :: Char -> Integer > digitToInteger d | d >= '0' && d <= '9' = toInteger (ord d - ord '0') > | d >= 'a' && d <= 'f' = 10 + toInteger (ord d - ord 'a') > | d >= 'A' && d <= 'F' = 10 + toInteger (ord d - ord 'A') > | otherwise = internalError ("digitToInt: \""++[d]++"\"") distinguish real digits from X, Z, ? > isSpecialDigit :: Char -> Bool > isSpecialDigit d | (d == 'X' || d == 'x' || > d == 'Z' || d == 'z' || > d == '?') = True > isSpecialDigit _ = False group real digits together and special digits with themselves > groupDigits :: String -> [String] > groupDigits ds = > let digitEq d1 d2 | not (isSpecialDigit d1 || isSpecialDigit d2) = True > digitEq d1 d2 = (d1 == d2) > in groupBy digitEq ds UTILITY FUNCTIONS Eat n characters of input, updating the position > eatChars :: Int -> ScannerState -> ScannerState > eatChars 0 state = state > eatChars n (ScannerState (pos, c:input)) = > eatChars (n-1) (ScannerState (updatePosChar pos c, input)) > eatChars n (ScannerState (pos, "")) = > internalError ("eatChars " ++ show n ++ ": no input left") Eat input characters while they match a predicate > eatCharsWhile :: (Char -> Bool) -> ScannerState -> ScannerState > eatCharsWhile isEdible state@(ScannerState (pos, "")) = state > eatCharsWhile isEdible state@(ScannerState (pos, c:restOfInput)) > | isEdible c = (eatCharsWhile isEdible > (ScannerState (updatePosChar pos c, restOfInput))) > | otherwise = state NUMBERS AND TIMESCALES Scan some decimal digits, and subsequent non-newline whitespace. they could be - a standalone integer - a bitsize for a base constant - a whole-number part of a real number > scanNumber :: Scanner > scanNumber (ScannerState (pos, input)) = > let (digits, wsAndRest) = span isDecDigitOr_ input > (whitespace, restOfInput) = span isHorizontalWhitespace wsAndRest > original = digits ++ whitespace > nextPos = updatePosString (updatePosString pos digits) whitespace > in scanNumberWith pos digits (not (null whitespace)) original > (ScannerState (nextPos, restOfInput)) Utility functions for confirming that more digits follow > areMoreDigits :: String -> Bool > areMoreDigits (d:_) | isDigit d = True > areMoreDigits ('_':rest) = areMoreDigits rest > areMoreDigits _ = False > areMoreSignedDigits :: String -> Bool > areMoreSignedDigits (s:rest) | (s == '+' || s == '-') = areMoreDigits rest > areMoreSignedDigits ds = areMoreDigits ds Scan parts of a number following the first, if any; input must start with non-whitespace > scanNumberWith :: Position -- where the number starts > -> String -- digits of first number parsed > -> Bool -- was there whitespace after the number? > -> String -- characters scanned so far > -> Scanner -- resulting scanner > -- decimal point after no space -- scan fractional part and try exponent > scanNumberWith initPos preDigits False origSoFar (ScannerState (pos, '.':input)) > | areMoreDigits input = > let (postDigits, restOfInput) = span isDecDigitOr_ input > nextOrig = origSoFar ++ ('.':postDigits) > nextState = ScannerState (updatePosString pos ('.':postDigits), > restOfInput) > in scanRealExponent initPos preDigits postDigits nextOrig nextState > -- exponent after no space -- scan the exponent > scanNumberWith initPos digits False origSoFar state@(ScannerState (pos, c:_)) > | c == 'e' || c == 'E' = scanRealExponent initPos digits "0" origSoFar state > -- numeric bases > scanNumberWith initPos widthDigits hadWS origSoFar > (ScannerState (pos, '\'':char1:char2:rest)) > | base `elem` "dboh" = -- decimal, binary, octal, or hex base > let width = readDecDigitsAnd_ widthDigits > (tconsumedChars, wsrestOfInput) = if signed > then (['\'',char1,char2], rest) > else (['\'',char1], char2:rest) > --removes whitespace between base and number > (ws,restOfInput) = span isHorizontalWhitespace wsrestOfInput > consumedChars = tconsumedChars++ws > newPos = updatePosString pos consumedChars > nextState = ScannerState (newPos, restOfInput) > orig = origSoFar ++ consumedChars > in scanBasedNumber initPos (Just width) orig signed base nextState > where signed = char1 == 's' || char1 == 'S' > base = toLower (if signed then char2 else char1) > -- nothing interesting follows; return the integer > scanNumberWith initPos digits hadWS origText state@(ScannerState (pos, _)) = > let num = readDecDigitsAnd_ digits > token = SV_Token_Number { start_position = initPos, > end_position = updatePosBackspace pos, > value = SV_NUM_Integer num, > bitwidth = Nothing, > base = Nothing, > signed = False, > originalText = origText } > in token : scanMain state Scan a number after a base specifier; the base specifier is always lowercase > scanBasedNumber :: Position -- init position > -> Maybe Integer -- bit width, if specified > -> String -- text of token scanned so far > -> Bool -- true iff number is signed ('s/'S) > -> Char -- base: 'd', 'b', 'o', or 'h' > -> Scanner -- the scanner, i.e., state -> tokens > -- decimal X, x, Z, z, with underscores > scanBasedNumber initPos width origSoFar sign 'd' > (ScannerState (pos, input@(c:rest))) > | isXorZ c = > let (underscores, restOfInput) = span (== '_') rest > (end_pos,next_pos) = posAtEndAndAfterString pos (c:underscores) > origText = origSoFar ++ (c:underscores) > number = SV_NUM_Repeated (readBit c) > token = SV_Token_Number { start_position = initPos, > end_position = end_pos, > value = number, > bitwidth = width, > base = Just SV_NUM_BASE_Dec, > signed = sign, > originalText = origText } > in token : scanMain (ScannerState (next_pos, restOfInput)) > -- decimal with no Xs or Zs > scanBasedNumber initPos width origSoFar sign 'd' state = > let -- not used because acceptXZ is false > decToBitsErr d = internalError ("scanBasedNumber: \"" ++ [d] ++ > "\" in decimal number") > in (scanBasedNumberCommon initPos width origSoFar sign > SV_NUM_BASE_Dec False isDigit decToBitsErr state) > -- binary > scanBasedNumber initPos width origSoFar sign 'b' state = > let isBinDigit '0' = True; isBinDigit '1' = True; isBinDigit _ = False > binToBits = (:[]) . readBit > in (scanBasedNumberCommon initPos width origSoFar sign > SV_NUM_BASE_Bin True isBinDigit binToBits state) > -- octal > scanBasedNumber initPos width origSoFar sign 'o' state = > (scanBasedNumberCommon initPos width origSoFar sign > SV_NUM_BASE_Oct True isOctDigit octToBits state) > -- hexadecimal > scanBasedNumber initPos width origSoFar sign 'h' state = > (scanBasedNumberCommon initPos width origSoFar sign > SV_NUM_BASE_Hex True isHexDigit (hexToBits . toLower) state) > scanBasedNumber _ _ _ _ base _ = > internalError ("SystemVerilogScanner.scanBasedNumber: unknown base \"" > ++ [base] ++ "\"") Given base, a digit tester, and a digit converter, scan a number of any base > scanBasedNumberCommon :: Position -- position of start of number > -> Maybe Integer -- width, if specified > -> String -- text of token scanned so far > -> Bool -- specified as signed? > -> SV_Numeric_Base -- number base > -> Bool -- accept X or Z as digits? > -> (Char -> Bool) -- acceptable digit test > -> (Char -> [SV_Bit]) -- turn digit to bits > -> Scanner -- resulting scanner > scanBasedNumberCommon initPos width origSoFar sign svBase acceptXZ isDigit > digitToBits (ScannerState (pos, input)) = > let isOkDigit '_' = True > isOkDigit d = isDigit d || (acceptXZ && d `elem` "xXzZ?") > (digits, restOfInput) = span isOkDigit input > (end_pos, next_pos) = posAtEndAndAfterString pos digits > numericDigits = [d | d <- digits, d /= '_'] > baseVal = svNumericBaseValue svBase > mkNum = foldl (\sofar d -> baseVal * sofar + digitToInteger d) 0 > -- XXX error out if numericDigits = "" > num = if any (not . isDigit) numericDigits -- any funky digits? > then let grouped_digits = groupDigits numericDigits > mkPair ds@(d:_) | isSpecialDigit d = > case (digitToBits d) of > (b:_) -> (genericLength ds, Right b) > _ -> internalError ("scanBasedNumberCommon: " > ++ "no digits") > mkPair ds = (genericLength ds, Left (mkNum ds)) > in SV_NUM_Mixed (map mkPair grouped_digits) > else SV_NUM_Integer (mkNum numericDigits) > token = SV_Token_Number { start_position = initPos, > end_position = end_pos, > value = num, > bitwidth = width, > base = Just svBase, > signed = sign, > originalText = origSoFar ++ digits } > in token : scanMain (ScannerState (next_pos, restOfInput)) Scan the exponent of a real number, if any > scanRealExponent :: Position -- position of start of number > -> String -- whole part of the number > -> String -- fractional part of the number > -> String -- text of token scanned so far > -> Scanner -- resulting scanner > scanRealExponent initPos whole fractional origSoFar > (ScannerState (pos, e:input)) > | (e == 'e' || e == 'E') -- exponent present > && (areMoreSignedDigits input) = > let (signChars, inputAfterSign) = > case input of > ('+':rest) -> ("+",rest) > ('-':rest) -> ("-",rest) > _ -> ("",input) > (expDigits, restOfInput) = span isDecDigitOr_ inputAfterSign > consumedChars = e:(signChars ++ expDigits) > (end_pos, next_pos) = posAtEndAndAfterString pos consumedChars > -- put it all together > floatChars = whole ++ "." ++ fractional ++ consumedChars > -- XXX since "readFloat" can take forever on large exponents > -- XXX we special case for large exponents > num = let e = readDecDigitsAnd_ expDigits > in if (e > 100000) > then if signChars == "-" > then 0 -- zero > else 1/0 -- Infinity > else readFloatDigitsAnd_ floatChars > origText = origSoFar ++ consumedChars > token = SV_Token_Number { start_position = initPos, > end_position = end_pos, > value = SV_NUM_Real num, > signed = False, > bitwidth = Nothing, > base = Nothing, > originalText = origText } > in token : scanMain (ScannerState (next_pos, restOfInput)) > scanRealExponent initPos whole fractional origText > state@(ScannerState (pos,_)) = > let -- put it all together > floatChars = whole ++ "." ++ fractional > number = SV_NUM_Real (readFloatDigitsAnd_ floatChars) > token = SV_Token_Number { start_position = initPos, > end_position = updatePosBackspace pos, > value = number, > signed = False, > bitwidth = Nothing, > base = Nothing, > originalText = origText } > in token : scanMain state Strip underscores and convert remaining decimal digits to an Integer > readDecDigitsAnd_ :: String -> Integer > readDecDigitsAnd_ digits = > case readDec [d | d <- digits, d /= '_'] of > [(num, "")] -> num > [] -> internalError ("readDecDigitsAnd_: no parses of \"" > ++ digits ++ "\"") > ps -> internalError ("readDecDigitsAnd_: multiple parses of \"" ++ digits > ++ "\": " ++ show ps) Strip underscores and convert remaining characters to a Double (may include a decimal place and a signed exponent) > readFloatDigitsAnd_ :: String -> Double > readFloatDigitsAnd_ digits = > case readFloat [d | d <- digits, d /= '_'] of > [(num, "")] -> num > [] -> internalError ("readDecDigitsAnd_: no parses of \"" > ++ digits ++ "\"") > ps -> internalError ("readDecDigitsAnd_: multiple parses of \"" ++ digits > ++ "\": " ++ show ps)