近來LingosHook更新很慢,是因為比較忙,當然是工作了。。。(我就不說我的夜生活很豐富。。。)
找到一個HTML分析重復的問題,就是Tidy和PreProcess兩個過程有重復操作,導致Dict數據定位不準確,終于還是讓我‘想起來’了。。。這樣下個版本應該快了~
這里貼下HtmlDictParser對象,此對象用于分析HTML中的Dict數據,跟DictObject的區別是其只分析Dict本身數據,如ID,和單詞數據外,不再像DictObject對象那樣要分析具體詞典結果數據。HtmlDictParser是LingosHook擺脫Dict限制的主要對象。
#ifndef __HTMLDICTPARSER_H__
#define __HTMLDICTPARSER_H__

#include <map>
#include <vector>

#include "wx/wx.h"

#include "DBAccess.h"
#include "TinyHtmlParser.h"

namespace HtmlDictParser


{

struct TDictConfig


{
int m_iLoadParam;
int m_iStoreParam;
};

typedef std::map<int, TDictConfig> TDictConfigMap;//index + config

struct TDictInfo


{
std::wstring m_strDictID;
std::wstring m_strTitle;

TDictConfig m_stConfig;
};

typedef std::map<std::wstring, int> TDictIDMap;//dictid + dictindex
typedef std::map<int, TDictInfo> TDictIndexMap;//dictindex + info

class CDictInfoObject


{
public:

CDictInfoObject()
{}

virtual ~CDictInfoObject()
{}

int Init(CDBAccess::TDatabase& db);

int Insert(int index, const TDictInfo& info);
int GetDictIndex(const std::wstring& id) const;
protected:
TDictIDMap _mapDictID;
TDictIndexMap _mapDictIndex;
};

struct TDictResult


{
int m_iDictIndex;

int m_iDictStart;
int m_iDictEnd;
};

typedef std::vector<TDictResult> TDictResultVector;
typedef std::map<std::wstring, TDictResultVector> TDictResultMap;


class CParser


{
public:

CParser()
{}

virtual ~CParser()
{}

virtual int Init(CDBAccess::TDatabase& db);
virtual int ParserHTML(const std::wstring& html, TDictResultVector& result);
virtual int ParserHTML(CDBAccess::TDatabase &db, const std::wstring& dictid, const std::wstring& html, TinyHtmlParser::CDocumentObject& doc, const TinyHtmlParser::CElementObject* dict, TDictResultMap& result);

virtual int SaveResult(CDBAccess::TDatabase& db, int wordid, const TDictResultMap& result);
virtual int GetResult(CDBAccess::TDatabase& db, int wordid, TDictResultMap& result);
virtual int RemoveResult(CDBAccess::TDatabase& db, int wordid);

virtual int GenHtmlResult(const TDictResultVector& vct, const std::wstring& html, wxString& result) const;
protected:
int CheckDictHtml();
int UpdateDictInfo(CDBAccess::TDatabase &db, const std::wstring& dictid, const std::wstring& html, TinyHtmlParser::CDocumentObject& doc, const TinyHtmlParser::CElementObject* dict);
int UpdateDictInfo(CDBAccess::TDatabase &db, const std::wstring& dictid, const std::wstring& title);
protected:
CDictInfoObject _objDictInfo;
};

}

#endif
