?? mydocument.cpp
字號:
#include "stdafx.h"
#include "SiteDownload.h"
#include "MyDocument.h"
#include "MyView.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
/////////////////////////////////////////////////////////////////////////////
// CMyDoc
IMPLEMENT_DYNCREATE(CMyDoc, CDocument)
BEGIN_MESSAGE_MAP(CMyDoc, CDocument)
//{{AFX_MSG_MAP(CMyDoc)
//}}AFX_MSG_MAP
END_MESSAGE_MAP()
/////////////////////////////////////////////////////////////////////////////
// CMyDoc construction/destruction
CMyDoc::CMyDoc()
{
m_Options.nMaxDepth = 3;
m_Options.nMaxPages = 0;
m_Options.bFixupLinks = TRUE;
m_Options.bContents = TRUE;
m_Options.bMultimedia = TRUE;
m_Options.bOffsiteLinks = TRUE;
// 設(shè)置哈希表尺寸
m_arrPagesDone.InitHashTable(1200);
m_arrMediaDone.InitHashTable(2400);
m_bProjectLoaded = FALSE;//空項目
m_bAutoMode = FALSE;
m_nLevel = 0;//處在第零層
}
CMyDoc::~CMyDoc()
{
try
{
ClearCacheMaps();
}
catch(...)
{
}
}
//處理打開的新文檔,這個函數(shù)只在程序剛執(zhí)行的時候調(diào)用一次
BOOL CMyDoc::OnNewDocument()
{
static bFirstTime = TRUE;
if(bFirstTime)
{
bFirstTime = FALSE;
CString strDefName;
strDefName.LoadString(IDS_NO_PROJECT);
SetTitle(strDefName);
if (!CDocument::OnNewDocument())
return FALSE;
return TRUE;
}
if (!CDocument::OnNewDocument())
return FALSE;
m_bProjectLoaded = TRUE;
m_strStartPage.Empty();
m_nGottenPageCount = 0;
m_nGottenFileCount = 0;
m_nQueuedPageCount = 0;
m_nTotalBytes = 0;
POSITION pos = GetFirstViewPosition();
CMyView* pView = (CMyView *) GetNextView(pos);
return TRUE;
}
//打開一個文件
BOOL CMyDoc::OnOpenDocument(LPCTSTR lpszPathName)
{
POSITION pos = GetFirstViewPosition();
CMyView* pView = (CMyView *) GetNextView(pos);
// 先要保存當前的項目
SaveModified();
//清空樹形控件顯示的內(nèi)容
pView->ClearTree();
if (!CDocument::OnOpenDocument(lpszPathName))
return FALSE;
SetPathName(lpszPathName);
m_strDirectory = CInternetDownload::SplitFileName(lpszPathName,
CInternetDownload::DRIVE|CInternetDownload::PATH|CInternetDownload::FNAME)+"\\";
SetTitle(CInternetDownload::SplitFileName(lpszPathName,CInternetDownload::FNAME|CInternetDownload::EXT));
m_bProjectLoaded = TRUE;
SetModifiedFlag(TRUE);
return TRUE;
}
//保存修改的信息
BOOL CMyDoc::SaveModified()
{
if(IsModified())
return CDocument::DoFileSave();
return(TRUE);
}
//判斷用戶是否可以安全的退出程序
BOOL CMyDoc::CanCloseFrame(CFrameWnd* pFrame)
{
POSITION pos = GetFirstViewPosition();
CMyView* pView = (CMyView *) GetNextView(pos);
return(!pView->GetSnagging());
}
//重置一個新的文檔:所有的文件信息和統(tǒng)計信息都需要重置
void CMyDoc::Reset(LPCTSTR lpszProjName)
{
CString strNewProjName;
if(lpszProjName)
strNewProjName = lpszProjName;
strNewProjName.LoadString(IDS_NO_PROJECT);
m_strPathName.Empty();
m_strDirectory.Empty();
m_bProjectLoaded = FALSE;
SetModifiedFlag(FALSE);
SetTitle(strNewProjName);
m_strStartPage.Empty();
m_nGottenPageCount = 0;
m_nGottenFileCount = 0;
m_nQueuedPageCount = 0;
m_nTotalBytes = 0;
POSITION pos = GetFirstViewPosition();
CMyView* pView = (CMyView *) GetNextView(pos);
}
//保存然后關(guān)閉文檔
void CMyDoc::OnCloseDocument()
{
SaveModified();
CDocument::OnCloseDocument();
}
//得到項目下載的配置信息
void CMyDoc::GetOptions(CConfigure& Options)
{
Options.nMaxDepth = m_Options.nMaxDepth;
Options.nMaxPages = m_Options.nMaxPages;
Options.bFixupLinks = m_Options.bFixupLinks;
Options.bContents = m_Options.bContents;
Options.bMultimedia = m_Options.bMultimedia;
Options.bOffsiteLinks = m_Options.bOffsiteLinks;
}
//設(shè)置項目下載的配置信息
void CMyDoc::SetOptions(CConfigure& Options)
{
m_Options.nMaxDepth = Options.nMaxDepth;
m_Options.nMaxPages = Options.nMaxPages;
m_Options.bFixupLinks = Options.bFixupLinks;
m_Options.bContents = Options.bContents;
m_Options.bMultimedia = Options.bMultimedia;
m_Options.bOffsiteLinks = Options.bOffsiteLinks;
}
//文檔信息的串行化
void CMyDoc::Serialize(CArchive& ar)
{
POSITION pos = GetFirstViewPosition();
CMyView* pView = (CMyView *) GetNextView(pos);
if (ar.IsStoring())
{
ar << m_nGottenPageCount;
ar << m_nGottenFileCount;
ar << m_nTotalBytes;
}
else
{
ar >> m_nGottenPageCount;
ar >> m_nGottenFileCount;
ar >> m_nTotalBytes;
m_nQueuedPageCount = 0;
}
m_Options.Serialize(ar);
// 視圖中的樹形控件信息也要串行化
pView->SerializeTree(ar);
m_nLevel = 0;
}
//使用CInternetDownload類或者直接從硬盤上得到想要的具體的頁面。
//如果頁面是從網(wǎng)絡(luò)中得到的,那么將它保存在硬盤中,并且使用分析器分析該頁面所包含的鏈接和多媒體資源。
BOOL CMyDoc::GetPage(CString& strPage, CString& strFileName, LINKS& linkEntry)
{
BYTE *pbyBuffer = m_byBuffer;
int nLen;
BOOL bPageInCache = FALSE;
BOOL bRet = FALSE;
CInternetDownload::RESULTS ret;
MAP_FILES* pMapEntry;
//初始化鏈接棧的入口信息
linkEntry.arrLinks.SetSize(0,100);
linkEntry.arrMedia.SetSize(0,100);
linkEntry.arrOffsite.SetSize(0,100);
linkEntry.nIndex = 0;
//判斷是否應(yīng)該從網(wǎng)絡(luò)中得到該頁面,還是從本地硬盤中得到
if(ShouldGetPage(strPage,pMapEntry))//網(wǎng)絡(luò)資源
{
ret = m_Inet.GetPage(strPage,&pbyBuffer,nLen,TRUE);
if(ret == CInternetDownload::SUCCESS)
{
bRet = TRUE;
m_nTotalBytes += nLen;
}
}
else//硬盤資源
{
CFile fileIn;
CFileException ex;
strFileName = pMapEntry->strFileName;
CString strTempFileName = m_strDirectory+strFileName;
if(fileIn.Open(strTempFileName,CFile::modeRead,&ex))
{
nLen = fileIn.Read(pbyBuffer,MAX_INET_BUFFER);
fileIn.Close();
bRet = TRUE;
}
// 標注我們不是從CInternetDownload中得到的頁面
bPageInCache = TRUE;
}
if(bRet)
{
// 用分析器分析該頁面所包含的鏈接和媒體信息
CHTMLFileParser Parser;
Parser.SetPageURL(strPage);
if(nLen > MAX_INET_BUFFER)
nLen = MAX_INET_BUFFER;
pbyBuffer = m_byBuffer;
Parser.SetFixupMode(FALSE);
Parser.ResetArrays();
Parser.SetGetMedia(m_Options.bMultimedia);
Parser.ParseText((char *)pbyBuffer,nLen);
m_strPageTitle = Parser.GetTitle();
//把新下載的頁面保存到硬盤中
if(!bPageInCache)
{
pbyBuffer = m_byBuffer;
m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);
}
// 判斷鏈接的數(shù)目
int nLinks;
BOOL bOffsite;
CString strNewPage;
nLinks = Parser.GetLinks().GetSize();
//評估每一個鏈接然后決定是否將他們放入下載的隊列中
for(int i = 0; i < nLinks; i++)
{
// 得到這個鏈接的url
strNewPage = Parser.GetLinks().GetAt(i);
// 得到這個頁面的偏移鏈接號
bOffsite = Parser.GetOffsiteFlags().GetAt(i);
// 判斷是否應(yīng)該將其放入下載隊列中
if(ShouldQueuePage(strNewPage,bOffsite))
{
linkEntry.arrLinks.Add(strNewPage);
linkEntry.arrOffsite.Add(bOffsite);
}
}
//是否是舊頁面
if(!bPageInCache)
{
int nMedia = Parser.GetMedia().GetSize();
CString strMedia;
for(i = 0; i < nMedia; i++)
{
strMedia = Parser.GetMedia().GetAt(i);
if(ShouldGetMedia(strMedia,pMapEntry))
linkEntry.arrMedia.Add(strMedia);
}
}
bRet = TRUE;
}
return(bRet);
}
//使用CInternetDownload類得到網(wǎng)頁中的多媒體項,并保存
BOOL CMyDoc::GetMedia(CString& strMedia, CString& strFileName)
{
BYTE *pbyBuffer = m_byBuffer;
int nLen;
BOOL bRet = FALSE;
CInternetDownload::RESULTS ret;
// 從Inet中得到文件
ret = m_Inet.GetFile(strMedia,&pbyBuffer,nLen);
if(ret == CInternetDownload::SUCCESS)
{
m_nTotalBytes += nLen;
// 保存文件
m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);
bRet = TRUE;
}
return bRet;
}
//判斷這個頁面是否已經(jīng)通過INet下載了,如果已經(jīng)下載了,
//那么就用一個指針指向返回頁面列表的該頁面
BOOL CMyDoc::ShouldGetPage(CString& strPage, MAP_FILES*& pMapEntry)
{
CString strNewPage = strPage;
strNewPage.MakeLower();
strNewPage = strNewPage.SpanExcluding("#");
return(!m_arrPagesDone.Lookup(strNewPage,(CObject *&) pMapEntry));
}
//判斷這個多媒體項是否已經(jīng)通過INet下載了,如果已經(jīng)下載了,
//那么就用一個指針指向返回多媒體項列表的該元素
BOOL CMyDoc::ShouldGetMedia(CString& strMedia, MAP_FILES*& pMapEntry)
{
CString strNewMedia = strMedia;
strNewMedia.MakeLower();
strNewMedia = strNewMedia.SpanExcluding("#");
return(!m_arrMediaDone.Lookup(strNewMedia,(CObject *&) pMapEntry));
}
// Returns TRUE if the specified page should be added to the download queue.
// This basically means that the page is not in the list of queued pages at a
// lower level or that it hasn't been previously downloaded at a lower level.
// If the above criteria are met -- it thens check to see if this is an offsite
// page and determines whether it should be downloaded.
BOOL CMyDoc::ShouldQueuePage(CString& strNewPage, BOOL bOffsite)
{
MAP_FILES* pMapEntry;
// Have we downloaded this page yet???
if(ShouldGetPage(strNewPage,pMapEntry))
{
// No...then look for it in the queue of waiting pages at previous
// levels
for(int i = 0; i < m_nLevel; i++)
{
for(int j = 0; j < m_aLinks[i].arrLinks.GetSize(); j++)
{
if(strNewPage == m_aLinks[i].arrLinks.GetAt(j))
return(FALSE);
}
}
}
else
{
// Yes...did we follow its links all the way to the
// maximum level?
if(m_Options.nMaxDepth && m_nLevel >= pMapEntry->nMaxLevel)
return(TRUE);
}
// Make sure that we allow offsite links for offsite pages
if(bOffsite && !m_Options.bOffsiteLinks)
return(FALSE);
return(TRUE);
}
// Initializes the specified link stack entry
void CMyDoc::ResetLink(int nLevel)
{
m_aLinks[nLevel].nIndex = 0;
m_aLinks[nLevel].arrLinks.SetSize(0,100);
m_aLinks[nLevel].arrMedia.SetSize(0,100);
m_aLinks[nLevel].arrOffsite.SetSize(0,100);
}
// The workhouse thread routine that recursively navigates linked web pages and
// retrieves each of them along with their multimedia files. This process is
// spawned indirectrly in RecursiveDownload() using the AfxBeginThread() call.
UINT CMyDoc::DownloadThread(LPVOID lpvData)
{
HTREEITEM htreePage;
// Static methods can't have a "this" pointer to get the parent class's
// pointer which the call passes as a parameter
CMyDoc *pThis = (CMyDoc *) lpvData;
int nMaxDepth = pThis->m_Options.nMaxDepth-1;
int nCount;
CString strPage = pThis->m_strStartPage;
CString strFileName;
CString strLogData;
CString strText;
POSITION pos = pThis->GetFirstViewPosition();
CMyView* pView = (CMyView *) pThis->GetNextView(pos);
BOOL bIsOffsite = FALSE;
// Establish the WinInet Session
try
{
pThis->m_Inet.OpenSession(pThis->m_Options.bUseProxy,pThis->m_Options.strProxyName);
}
catch(...)
{
}
// Create the log file
pThis->m_fileLog.Open(pThis->m_strDirectory+"sitesnag.log",
CFile::modeCreate|CFile::modeWrite);
// Create the table of contents file
if(pThis->m_Options.bContents)
{
pThis->m_fileContents.Open(pThis->m_strDirectory+"SnagCon1.htm",
CFile::modeCreate|CFile::modeWrite);
// Add the TOC to the list of downloaded files
pThis->SetPageCacheEntry("snagcon1.htm","SnagCon1.htm",0);
// Add the TOC to the tree control
CString strTitle = "Contents Page 1 (SnagCon1.htm)";
pView->AddTreeContent(strTitle);
// Write the beginning of the first TOC page
strText = "<HTML>\r\n<HEAD>\r\n<TITLE>SiteSnagger Contents</TITLE>\r\n";
strText += "</HEAD\r\n<BODY>\r\n";
strText += "<H1><center>SiteSnagger Table of Contents</center><br><br></H1>\r\n<UL>\r\n";
pThis->m_fileContents.Write(strText,strText.GetLength());
}
// Initialize the index for the first link level, start with the first level
pThis->m_nLevel = 0;
pThis->m_aLinks[0].nIndex = 0;
pThis->m_Inet.ResetUniqueCount();
// Recusively search web links until either we've searched them all (m_nLevel is
// -1 or if the user decides to abort
while(pThis->m_nLevel >= 0 )
{
// Get the name of a new page in a second dimension element
if(pThis->m_aLinks[pThis->m_nLevel].nIndex > 0)
{
// Save the URL and whether it's offsite
int nIndex = pThis->m_aLinks[pThis->m_nLevel].nIndex;
strPage = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetAt(nIndex);
bIsOffsite = pThis->m_aLinks[pThis->m_nLevel].arrOffsite.GetAt(nIndex);
// Bump to the next level so we can get the page's links
pThis->m_nLevel++;
}
// Generate a unique filename for this page
pThis->m_Inet.GenerateUniqueFileName(strPage,strFileName,
pThis->m_arrPagesDone,TRUE);
// Write a log entry for this page -- leave room for the result
strLogData.Format("[%02d] Getting page %s ",pThis->m_nLevel+1,strPage);
pThis->m_fileLog.Write(strLogData,strLogData.GetLength());
CString strOrigPage = strPage;
// Get the page from Inet or from local file
if(pThis->GetPage(strPage,strFileName,pThis->m_aLinks[pThis->m_nLevel]))
{
MAP_FILES *pMapEntry;
// Get the count of links
nCount = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetSize();
// Did we just download this new page??
if(pThis->ShouldGetPage(strPage,pMapEntry))
{
// Yes, add it to the list of retrieved pages
pThis->SetPageCacheEntry(strPage,strFileName,pThis->m_nLevel);
// If the page was redirected then add its original name too
if(strPage != strOrigPage && pThis->ShouldGetPage(strOrigPage,pMapEntry))
pThis->SetPageCacheEntry(strOrigPage,strFileName,pThis->m_nLevel);
// Prefix offsite pages with their URL (i.e. http://www.xxx.yyy)
if(bIsOffsite)
strText = strPage+" - ";
else strText.Empty();
// Add the page's title and local filename
strText += pThis->m_strPageTitle+" ("+
strFileName.SpanExcluding("#")+")";
htreePage = pView->AddTreePage(strText,bIsOffsite);
strText.Format("<a href=%s><li> %s (%s - %s)<br>\r\n",strFileName,
pThis->m_strPageTitle,
strFileName.SpanExcluding("#"),strPage);
pThis->m_fileContents.Write(strText,strText.GetLength());
// Update the statistics
pThis->m_nGottenPageCount++;
pThis->m_nGottenFileCount++;
}
else
{
// Set the new depth level if necessary
if(nMaxDepth)
{
// Have we gone to the max level yet???
if(pThis->m_nLevel >= pMapEntry->nMaxLevel)
nCount = 0;
else pMapEntry->nMaxLevel = pThis->m_nLevel;
}
}
// Log the results
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -