?? crobotinternet.cpp
字號:
////////////////////////////////////////////////////////////////////
//
// CRobotInternet.cpp - CRobotInternet class implementation
//
// Source: "Programming Robots, Spiders and Intelligent Agents
// using Visual C++"
//
// Copyright (C) 1999 David Pallmann. All Rights Reserved.
#include <stdafx.h>
#include <afxinet.h>
#include "CRobot.h"
#include "CRobotInternet.h"
// Constructor
CRobotInternet::CRobotInternet()
{
m_bReadFromCache = false;
m_bWriteToCache = false;
m_nContext = 0;
m_sLogonUsername = "";
m_sLogonPassword = "";
m_sProxyLogonMethod = "";
m_sProxyLogonUsername = "";
m_sProxyLogonPassword = "";
m_sUserAgent = "Mozilla";
}
// Destructor
CRobotInternet::~CRobotInternet()
{
m_sProxyLogonMethod.Empty();
m_sProxyLogonUsername.Empty();
m_sProxyLogonPassword.Empty();
}
////////////////////////////////////////////////////////////////////
//
// Support functions
// ********************** private
// * *
// * EncodeTextBase64 *
// * *
// **********************
// Function: Returns the Base64-encoded version of a text string.
CString CRobotInternet::EncodeTextBase64(const CString& sText)
{
unsigned char cChar[255];
int nIndex1, nIndex2, nIndex3, nIndex4;
int nChars;
CString sBase64 = "";
char cTable[64 + 1];
CString sTemp;
cTable[0] = 'A';
cTable[1] = 'B';
cTable[2] = 'C';
cTable[3] = 'D';
cTable[4] = 'E';
cTable[5] = 'F';
cTable[6] = 'G';
cTable[7] = 'H';
cTable[8] = 'I';
cTable[9] = 'J';
cTable[10] = 'K';
cTable[11] = 'L';
cTable[12] = 'M';
cTable[13] = 'N';
cTable[14] = 'O';
cTable[15] = 'P';
cTable[16] = 'Q';
cTable[17] = 'R';
cTable[18] = 'S';
cTable[19] = 'T';
cTable[20] = 'U';
cTable[21] = 'V';
cTable[22] = 'W';
cTable[23] = 'X';
cTable[24] = 'Y';
cTable[25] = 'Z';
cTable[26] = 'a';
cTable[27] = 'b';
cTable[28] = 'c';
cTable[29] = 'd';
cTable[30] = 'e';
cTable[31] = 'f';
cTable[32] = 'g';
cTable[33] = 'h';
cTable[34] = 'i';
cTable[35] = 'j';
cTable[36] = 'k';
cTable[37] = 'l';
cTable[38] = 'm';
cTable[39] = 'n';
cTable[40] = 'o';
cTable[41] = 'p';
cTable[42] = 'q';
cTable[43] = 'r';
cTable[44] = 's';
cTable[45] = 't';
cTable[46] = 'u';
cTable[47] = 'v';
cTable[48] = 'w';
cTable[49] = 'x';
cTable[50] = 'y';
cTable[51] = 'z';
cTable[52] = '0';
cTable[53] = '1';
cTable[54] = '2';
cTable[55] = '3';
cTable[56] = '4';
cTable[57] = '5';
cTable[58] = '6';
cTable[59] = '7';
cTable[60] = '8';
cTable[61] = '9';
cTable[62] = '+';
cTable[63] = '/';
cTable[64] = '=';
nChars = sText.GetLength();
for (int nPos = 0; nPos < nChars; nPos++)
{
cChar[nPos] = sText.GetAt(nPos);
} // End for
// cChar[nPos] cChar[nPos+1] cChar[nPos+2]
// | | |
// -------+------- -------+------- -------+-------
// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
// | | | | | | | | | | | | | | | | | | | | | | | |
// x x x x x x x x x x x x x x x x x x x x x x x x
// | | | | | | | | | | | | | | | | | | | | | | | |
// 5 4 3 2 1 0 5 4 3 2 1 0 5 4 3 2 1 0 5 4 3 2 1 0
// -----+----- -----+----- -----+----- -----+-----
// | | | |
// nIndex1 nIndex2 nIndex3 nIndex4
//
for (nPos = 0; nPos < nChars; nPos += 3)
{
if (nPos + 1 >= nChars) cChar[nPos + 1] = '0';
if (nPos + 2 >= nChars) cChar[nPos + 2] = '0';
nIndex4 = ( cChar[nPos + 2] & 0x3F ) & 0x3F;
nIndex3 = ( ((cChar[nPos + 1] & 0x0F) << 2)
| ((cChar[nPos + 2] & 0xC0) >> 6) ) & 0x3F;
nIndex2 = ( ((cChar[nPos] & 3) << 4)
| ((cChar[nPos + 1] & 0xF0) >> 4) ) & 0x3F;
nIndex1 = ( (cChar[nPos] & 0xFC) >> 2 ) & 0x3F;
if (nPos + 1 >= nChars)
{
nIndex3 = 64;
nIndex4 = 64;
} // end if
if (nPos + 2 >= nChars)
{
nIndex4 = 64;
} // end if
sTemp.Format("%c%c%c%c",
cTable[nIndex1],
cTable[nIndex2],
cTable[nIndex3],
cTable[nIndex4]);
sBase64 += sTemp;
} // End for
return sBase64;
}
// ********************* private
// * *
// * ResponseMessage *
// * *
// *********************
// Function: Returns a textual message describing
// a TCP/IP response code.
CString CRobotInternet::ResponseMessage(const int nCode)
{
CString sErrMsg = "";
switch (nCode)
{
//---- 200 series (success) ----
case 200:
sErrMsg = "OK, request succeeded";
break;
case 201:
sErrMsg = "OK, new resource created.";
break;
case 202:
sErrMsg = "Request accepted but processing not completed.";
break;
case 204:
sErrMsg = "OK, but no content to return.";
break;
//---- 300 series (redirection) ----
case 301:
sErrMsg = "Requested resource has been assigned a "
"new permanent URL.";
break;
case 302:
sErrMsg = "Requested resource resides temporarily "
"under a different URL.";
break;
case 304:
sErrMsg = "Document has not been modified.";
break;
//---- 400 series (client error) ----
case 400:
sErrMsg = "Bad request.";
break;
case 401:
sErrMsg = "Unauthorized; request requires "
"user authentication.";
break;
case 403:
sErrMsg = "Forbidden for unspecified reason.";
break;
case 404:
sErrMsg = "Not Found.";
break;
case 407:
sErrMsg = "Unauthorized; reject by proxy server.";
break;
//---- 500 series (server error) ----
case 500:
sErrMsg = "Internal server error.";
break;
case 501:
sErrMsg = "Not implemented.";
break;
case 502:
sErrMsg = "Bad gateway; invalid response from "
"gateway or upstream server.";
break;
case 503:
sErrMsg = "Service temporarily unavailable.";
break;
default:
sErrMsg.Format("Error %d", nCode);
break;
} // End switch
return sErrMsg;
}
// ****************** private
// * *
// * ErrorMessage *
// * *
// ******************
// Function: Returns a textual message describing a CRobot
// error code.
CString CRobotInternet::ErrorMessage(const int nError)
{
CString sErrMsg = "";
switch (nError)
{
case CROBOT_ERR_SUCCESS:
sErrMsg = "Successful";
break;
case CROBOT_ERR_INVALID_URL:
sErrMsg = "Invalid URL";
break;
case CROBOT_ERR_INVALID_PARAMETER:
sErrMsg = "Invalid parameter";
break;
case CROBOT_ERR_CONNECTION_FAILED:
sErrMsg = "Connection failed";
break;
case CROBOT_ERR_TIMED_OUT:
sErrMsg = "Timed out";
break;
case CROBOT_ERR_NOT_FOUND:
sErrMsg = "Not found";
break;
case CROBOT_ERR_NOT_AUTHORIZED:
sErrMsg = "Not authorized";
break;
case CROBOT_ERR_DISK_FILE_ERROR:
sErrMsg = "Disk/file error";
break;
default:
sErrMsg.Format("CRobotInternet error %d", nError);
break;
} // End switch
return sErrMsg;
}
// **************************************************************
// * *
// * *
// * H T T P F u n c t i o n s *
// * *
// * *
// **************************************************************
// ---------------------------------------------------------------
// ************************** private
// * *
// * CreateStandardHeader *
// * *
// **************************
// Function: Return a standard header to use with OpenURL calls.
// If a call has been made to set proxy logon information,
// the authentication string is included in the header
// that is returned.
//
// This is a private function called by various public functions.
CString CRobotInternet::CreateStandardHeader()
{
CString sHeader;
sHeader = "Accept: */*\r\n";
if (m_sProxyLogonMethod=="basic" && m_sProxyLogonUsername!="")
{
sHeader += "Proxy-authorization: Basic "
+ EncodeTextBase64(m_sProxyLogonUsername
+ ":"
+ m_sProxyLogonPassword)
+ "\r\n";
} // End if
if (m_sLogonUsername != "")
{
/* sHeader += "Authorization: "
+ m_sLogonUsername
+ ":"
+ m_sLogonPassword
+ "\r\n";
*/
sHeader += "Authorization: Basic "
+ EncodeTextBase64(m_sLogonUsername
+ ":"
+ m_sLogonPassword)
+ "\r\n";
} // End if
sHeader += "\r\n";
return sHeader;
}
// --------------------------------------------------------------
// ************* public
// * *
// * httpGet *
// * *
// *************
// Function: Retrieves a URL and returns it in CString form.
//
// Inputs: sURL - The URL to access
// (example: "www.mysite.com")
//
// Outputs: <function_result> - True if data was successfully
// retrieved, false otherwise
// sResponse - The HTML retrieved.
// nResult - Completion code. 0 = success,
// n = error (defined in CRobot.h)
// sErrMsg - The error message, if nResult != 0
BOOL CRobotInternet::httpGet(const CString& sURL,
CString& sResponse,
int& nResult,
CString& sErrMsg)
{
// Variable declarations
CInternetSession* pSession;
CHttpFile* pHttpFile;
CString sHeader;
int nRead;
LPSTR pBuffer = NULL;
CString sResult;
CString sWorkingUrl;
CString sMsg;
sErrMsg = "";
nResult = CROBOT_ERR_SUCCESS;
DWORD dwHttpStatus;
try
{
// Initialize variables
pSession = NULL;
pHttpFile = NULL;
sHeader = CreateStandardHeader();
nRead = 0;
pBuffer = new char[1024];
sResult = "";
sWorkingUrl = sURL;
/* Trim URL and add http:// if it contains no
protocol identifier */
sWorkingUrl.TrimLeft();
sWorkingUrl.TrimRight();
if (sWorkingUrl.Find(":") == -1)
{
if (sWorkingUrl.Left(1) == "/")
sWorkingUrl = "http:" + sWorkingUrl;
else
sWorkingUrl = "http://" + sWorkingUrl;
} // End if
DWORD dwFlags;
// Check for invalid parameters
if (!sURL.IsEmpty())
{
// URL is not empty
/* Check the URL - must be valid and of the 'http:'
service type */
DWORD dwServiceType;
CString sServer, sObject;
unsigned short nPort;
if (AfxParseURL(sWorkingUrl,
dwServiceType,
sServer,
sObject,
nPort))
{
// URL is valid
if (dwServiceType == AFX_INET_SERVICE_HTTP)
{
//URL is the correct service type (HTTP).
pSession = new CInternetSession(
m_sUserAgent,
++m_nContext,
INTERNET_OPEN_TYPE_PRECONFIG);
dwFlags = INTERNET_FLAG_TRANSFER_BINARY
| INTERNET_FLAG_EXISTING_CONNECT;
if (!m_bReadFromCache)
dwFlags = dwFlags | INTERNET_FLAG_RELOAD;
if (!m_bWriteToCache)
dwFlags = dwFlags | INTERNET_FLAG_DONT_CACHE;
pHttpFile = (CHttpFile*)
pSession->OpenURL(sWorkingUrl,
1,
dwFlags,
sHeader,
-1L);
if (pHttpFile) /* OpenURL worked */
{
// Check the HTTP return code
if (!pHttpFile->QueryInfoStatusCode(dwHttpStatus))
dwHttpStatus = 200;
if (dwHttpStatus >= 400)
{
switch(dwHttpStatus)
{
case 404:
nResult = CROBOT_ERR_NOT_FOUND;
break;
case 403:
case 407:
nResult = CROBOT_ERR_NOT_AUTHORIZED;
break;
default:
nResult = CROBOT_ERR_CONNECTION_FAILED;
break;
} // End switch
} // End if dwHttpStatus
else /* No error - read response data */
{
nResult = CROBOT_ERR_SUCCESS;
// Read the data
do
{
nRead = pHttpFile->Read(pBuffer, 1023);
if (nRead != 0)
{
pBuffer[nRead] = 0;
sResult += pBuffer;
} // End if
} while (nRead != 0);
sResponse = sResult;
} // End else
} // End if pHttpFile
else /* OpenURL failed */
nResult = CROBOT_ERR_CONNECTION_FAILED;
} // End if
else
// Wrong service type
nResult = CROBOT_ERR_INVALID_URL;
}
else
// Invalid URL
nResult = CROBOT_ERR_INVALID_URL;
} // End if
else
// Empty URL
nResult = CROBOT_ERR_INVALID_PARAMETER;
} // End try
catch (CInternetException* e)
{
e->Delete();
sResponse = sResult;
// Internet exception occurred
nResult = CROBOT_ERR_CONNECTION_FAILED;
} // End catch
catch (...)
{
sResponse = sResult;
// Exception occurred
nResult = CROBOT_ERR_CONNECTION_FAILED;
} // End catch
// Clean up and exit function
if (pBuffer != NULL)
{
delete pBuffer;
pBuffer = NULL;
} // End if
if (pHttpFile != NULL)
{
pHttpFile->Close();
delete pHttpFile;
} // End if
if (pSession != NULL)
{
pSession->Close();
delete pSession;
} // End if
sErrMsg = ErrorMessage(nResult);
if (nResult == CROBOT_ERR_SUCCESS)
return true;
else
return false;
}
// --------------------------------------------------------------
// ***************** public
// * *
// * httpGetFile *
// * *
// *****************
// Function: Retrieves a URL and outputs it to a local file
//
// Inputs: sURL - The URL to access
// (example: "www.mysite.com")
// sFile - File to output to
// (example: "c:\temp\file1.gif")
//
// Outputs: <function_result> - True if data was successfully
// retrieved, false otherwise
// nResult - Completion code. 0 = success,
// n = error (defined in CRobot.h)
// sErrMsg - The error message, if nResult != 0
BOOL CRobotInternet::httpGetFile(const CString& sURL,
const CString& sOutputFilespec,
int& nResult,
CString& sErrMsg)
{
CInternetSession* pSession;
CHttpFile* pHttpFile;
CFile* pLocalFile;
CFileException exFile;
CString sHeader;
int nRead;
LPSTR pBuffer = NULL;
CString sResult;
CString sWorkingUrl;
CString sTemp;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -