• <ins id="pjuwb"></ins>
    <blockquote id="pjuwb"><pre id="pjuwb"></pre></blockquote>
    <noscript id="pjuwb"></noscript>
          <sup id="pjuwb"><pre id="pjuwb"></pre></sup>
            <dd id="pjuwb"></dd>
            <abbr id="pjuwb"></abbr>

            woaidongmao

            文章均收錄自他人博客,但不喜標題前加-[轉貼],因其丑陋,見諒!~
            隨筆 - 1469, 文章 - 0, 評論 - 661, 引用 - 0
            數據加載中……

            C++的詞法分析器

             

            [lexical_analyzer.h]

            #ifndef LEXICAL_ANALYZER_H
            #define LEXICAL_ANALYZER_H

            enum lexical_status
            {
                status_success,
                status_eof,
                status_invalid_char,
                status_unknown = -1,
            };

            enum token_category
            {
                token_error,
                token_keyword,
                token_identifier,
                token_number,
                token_char,
                token_string,
                token_operator,
                token_punctuator,
            };

            struct token
            {

                token_category category;
                std::string value;
            };

            class lexical_analyzer
            {
            public:
                explicit lexical_analyzer(std::istream& ifs);
                lexical_status get_token(token& t);
                int current_line() const;
            private:
                bool filter_space();
                bool filter_comment();
                std::string get_hex_string();
                std::string get_digital_string();
                std::string get_string(char delimiter);
                bool is_keyword(const std::string& str);
                int get_char();
                int peek_char();
                void putback(char ch);
                void skip_char();
            private:
                std::istream* m_pstream;
                int m_line;
            };

            inline lexical_analyzer::lexical_analyzer(std::istream& ifs)
                :m_pstream(&ifs), m_line(1)
            {
            }

            inline int lexical_analyzer::current_line() const
            {
                return m_line;
            }

            inline int lexical_analyzer::peek_char()
            {
                return m_pstream->peek();
            }

            inline void lexical_analyzer::skip_char()
            {
                get_char();
            }

            #endif//LEXICAL_ANALYZER_H




            [lexical_analyzer.cpp]


            #include <fstream>
            #include "lexical_analyzer.h"

            int lexical_analyzer::get_char()
            {
                int ch = m_pstream->get();
                if (ch=='\n')
                    ++m_line;
                return ch;
            }

            void lexical_analyzer::putback(char ch)
            {
                if (ch=='\n')
                    --m_line;
                m_pstream->putback(ch);
            }

            bool lexical_analyzer::filter_space()
            {
                bool result = false;
                char ch = peek_char();
                if (isspace(ch))
                {
                    do
                    {
                        skip_char();
                        ch = peek_char();
                    }
                    while (isspace(ch));
                    return true;
                }
                return false;
            }

            bool lexical_analyzer::filter_comment()
            {
                if(peek_char()=='/')
                {
                    skip_char();
                    char ch = get_char();
                    if(ch=='/')
                    {
                        while(peek_char()!='\n')
                        {
                            skip_char();
                        }
                    }
                    else if(ch=='*')
                    {
                        for(;;)
                        {
                            if(get_char()=='*' && get_char()=='/')
                                break;
                        }
                    }
                    else
                    {
                        putback('/');
                        return false;
                    }
                    return true;
                }
                else
                {
                    return false;
                }
            }

            bool lexical_analyzer::is_keyword(const std::string& str)
            {
                static const char* const keywords[]=
                {
                    "asm",      "auto",         "bad_cast",     "bad_typeid",
                    "bool",     "break",        "case",         "catch",
                    "char",     "class",        "const",        "const_cast",
                    "continue", "default",      "delete",       "do",
                    "double",   "dynamic_cast", "else",         "enum",
                    "except",   "explicit",     "extern",       "false",
                    "finally",  "float",        "for",          "friend",
                    "goto",     "if",           "inline",       "int",
                    "long",     "mutable",      "namespace",    "new",
                    "operator", "private",      "protected",    "public",
                    "register", "reinterpret_cast",     "return",   "short",
                    "signed",   "sizeof",       "static",       "static_cast",
                    "struct",   "switch",       "template",     "this",
                    "throw",    "true",         "try",          "typedef",
                    "typeid",   "typename",     "union",        "unsigned",
                    "using",    "virtual",      "void",         "volatile",
                    "while", 
                };

                for(int i=0; i<sizeof(keywords)/sizeof(keywords[0]); i++)
                {
                    if(str.compare(keywords[i])==0)
                        return true;
                }

                return false;
            }

            std::string lexical_analyzer::get_string(char delimiter)
            {
                std::string result;
                for(;;)
                {
                    char ch = get_char();
                    if(ch==delimiter)
                        break;
                    else if(ch=='\\')
                    {
                        ch = get_char();
                        switch(ch)
                        {
                        case '\"':
                            ch = '\"';
                            break;
                        case '\'':
                            ch = '\'';
                            break;
                        case 'r':
                            ch = '\r';
                            break;

                        case 'n':
                            ch = '\n';
                            break;
                        case 'v':
                            ch = '\v';
                            break;
                        case 't':
                            ch = '\t';
                            break;
                        case 'a':
                            ch = '\a';
                            break;
                        case 'b':
                            ch = '\b';
                            break;
                        case 'f':
                            ch = '\f';
                            break;
                        case '\r':                          // line splice
                        case '\n':
                            continue;
                            break;
                        default:
                            break;
                        }

                        if(ch=='x' || ch=='X')
                        {
                            std::string s = get_hex_string();
                            int x = 0;
                            for(int i=0; i<s.length(); i++)
                            {
                                x *= 16;
                                if(s[i]>='A' && s[i]<='F')
                                    x += s[i]-'A' + 10;
                                else if(s[i]>='a' && s[i]<='f')
                                    x += s[i]-'a' + 10;
                                else
                                    x += s[i]-'0';
                            }
                            ch = (char)x;
                        }

                    }

                    result += ch;
                }

                return result;
            }

            std::string lexical_analyzer::get_digital_string()
            {
                std::string result;
                char ch;
                while(isdigit(ch=get_char()))
                {
                    result += ch;
                }
                putback(ch);

                return result;
            }

            std::string lexical_analyzer::get_hex_string()
            {
                std::string result;
                char ch;
                while(isxdigit(ch=get_char()))
                {
                    result += ch;
                }
                putback(ch);

                return result;
            }

            lexical_status lexical_analyzer::get_token(token& t)
            {
                if(m_pstream->eof())
                    return status_eof;


                while(filter_space() || filter_comment())
                {
                }
                while(filter_comment() || filter_space())
                {
                }

                if(m_pstream->eof())
                    return status_eof;

                t.value.resize(0);

                char ch = get_char();
                if(ch=='_' || isalpha(ch) || isdigit(ch) || ch=='$')
                {
                    t.category = token_identifier;
                    do
                    {
                        t.value += ch;
                        ch = get_char();
                    }while(ch=='_' || isalpha(ch) || isdigit(ch) || ch=='$');
                    putback(ch);
                }
                else if(isdigit(ch))
                {
                    t.category = token_number;
                    t.value += ch;
                    ch = get_char();
                    if(ch=='x' || ch=='X')
                    {
                        t.value += ch;
                        t.value += get_hex_string();
                    }
                    else if(isdigit(ch))
                    {
                        t.value += ch;
                        t.value += get_digital_string();
                    }
                }
                else if(ch=='\"')
                {
                    t.category = token_string;
                    t.value = get_string('\"');
                }
                else if(ch=='\'')
                {
                    t.category = token_char;
                    t.value = get_string('\'');
                }
                else
                {
                    t.category = token_operator;
                    if(ch=='=' || ch=='&' || ch=='|' || ch==':')
                    {
                        t.value = ch;
                        if(peek_char()==ch)
                        {
                            t.value += ch;
                            skip_char();
                        }
                    }
                    else if(ch=='+' || ch=='-')
                    {
                        t.value = ch;
                        char cc = get_char();
                        if(cc==ch)
                        {
                            t.value += ch;
                        }
                        else if(cc=='=')
                        {
                            t.value += '=';
                        }
                        else if(ch=='-' && cc=='>')
                        {
                            t.value += '>';                         // ->
                            cc = peek_char();
                            if(cc=='*')
                            {
                                skip_char();

                                t.value += '*';                     // ->*
                            }
                        }
                        else
                        {
                            putback(cc);
                        }
                    }
                    else if(ch=='*' || ch=='/' || ch=='%' || ch=='^' || ch=='!')
                    {

                        t.value = ch;
                        ch = peek_char();
                        if(ch=='=')
                        {
                            t.value+='=';
                            skip_char();
                        }
                    }
                    else if(ch=='<' || ch=='>')
                    {
                        t.value = ch;
                        char cc = get_char();
                        if(ch==cc)                              // << >>
                        {
                            t.value += cc;
                            cc = peek_char();
                            if(cc=='=')                         // <<= >>=
                            {
                                skip_char();
                                t.value += '=';
                            }
                        }
                        else if(cc=='=')
                        {
                            t.value += '=';
                        }
                        else
                        {
                            putback(cc);
                        }
                    }
                    else if(ch=='.')
                    {
                        t.value = '.';                          // .
                        ch = get_char();
                        if(ch=='*')
                        {
                            t.value += '*';                     // .*
                        }
                        else if(ch=='.')
                        {
                            char cc = get_char();
                            if(cc=='.')                         // ...
                            {
                                t.value += "..";
                            }
                            else
                            {
                                putback(cc);
                                putback(ch);
                            }
                        }
                        else
                        {
                            putback(ch);
                        }
                    }
                    else if(ch=='~' || ch =='?' ||
                        ch=='[' || ch==']' ||
                        ch=='(' || ch==')'
                        )
                    {
                        t.value = ch;
                    }
                    else if(ch==';' || ch=='{'|| ch=='}'|| ch==','|| ch=='#' )
                    {
                        t.category = token_punctuator;
                        t.value = ch;
                    }
                    else if(ch=='\\')
                    {
                        ch = peek_char();
                        if(ch=='\r' || ch=='\n')
                        {
                            skip_char();
                        }
                        else
                        {
                            t.category = token_error;
                            t.value = ch;
                        }
                    }
                    else
                    {
                        t.category = token_error;
                        t.value = ch;
                        return status_invalid_char;
                    }
                }

                if(t.category == token_identifier && is_keyword(t.value))
                {
                    t.category = token_keyword;
                }

                return status_success;
            }



            [main.c], 測試程序
            #include <fstream>
            #include <string>
            #include <iostream>

            #include "lexical_analyzer.h"

            int main()
            {
                std::ifstream ifs("D:\\ThreadFuncs.cpp", std::ios::in | std::ios::binary);
                lexical_analyzer lex(std::cin);
                //lexical_analyzer lex(ifs);
                std::ofstream ofs("D:\\out.cpp");
                //std::ostream& os = ofs;
                std::ostream& os = std::cout;
                token t;
                lexical_status status;
                while((status=lex.get_token(t))!=status_eof)
                {
                    if(status==status_success)
                        os << t.value << '\n';
                    else if(status==status_invalid_char)
                        std::cerr << "Line:" << lex.current_line() << "invalid_char: " << t.value << '\n';
                }
                return 0;
            }

            posted on 2008-05-17 00:09 肥仔 閱讀(1150) 評論(0)  編輯 收藏 引用 所屬分類: LEX & YACC

            蜜臀av性久久久久蜜臀aⅴ | 久久婷婷国产剧情内射白浆| 国产午夜精品久久久久九九| 久久婷婷久久一区二区三区| 久久不见久久见免费影院www日本| 久久本道久久综合伊人| 精品久久久久久中文字幕大豆网| 国产精品久久久久久久久鸭| 国产2021久久精品| 亚洲午夜久久久影院伊人| 99久久99久久精品国产片果冻| 国产精品久久久久久久app| 丁香狠狠色婷婷久久综合| 少妇被又大又粗又爽毛片久久黑人| 一本色道久久88精品综合| 激情五月综合综合久久69| 久久久久无码精品国产| 久久无码人妻精品一区二区三区| 91视频国产91久久久| 久久精品极品盛宴观看| 久久精品国产亚洲av瑜伽| 精品久久久无码人妻中文字幕豆芽| 久久久久婷婷| 久久精品国产一区二区| 国产精品久久久久影视不卡| 久久久久免费精品国产| 国产三级观看久久| 国产精品久久久久久久久久免费| 久久精品国产亚洲av麻豆色欲 | 久久午夜免费视频| 狠狠精品久久久无码中文字幕 | 中文国产成人精品久久亚洲精品AⅤ无码精品 | 欧美综合天天夜夜久久| 日韩乱码人妻无码中文字幕久久| 欧美午夜A∨大片久久| 久久免费国产精品| 久久精品国产亚洲5555| 久久伊人精品青青草原日本| 久久久久亚洲爆乳少妇无| 狠狠色丁香婷婷综合久久来来去| 久久精品国产99久久丝袜 |