?? crobotinternet.cpp
字號(hào):
AfxParseURL(sTempURL, dwService, sServer, sObject, nPort);
sPath = sObject;
if (sPath=="")
sPath = "/";
if (sPath.Find(".") != -1)
{
nPos = sPath.ReverseFind('/');
if (nPos != -1)
sPath = sPath.Left(nPos + 1);
} // End if
if (sPath.Right(1) != "/")
sPath += "/";
return sPath;
}
// --------------------------------------------------------------
// ******************* public
// * *
// * RobotExcluded *
// * *
// *******************
// Function: Scans a robot policy file (previous acquired from a site),
// and reports whether a particular URL is considered
// accessible. This function expects the m_sUserAgent field
// to have been set in advance to identify the agent.
//
// Inputs: sRobotPolicy - A site's robots.txt file (httpGet may
// be used to acquire this file)
// sUrlToAccess - A fully qualified URL
//
// Outputs: <function_result> - True if access is excluded (denied);
// false if access is not explicitly
// prohibited
BOOL CRobotInternet::RobotExcluded(const CString& sRobotPolicy,
const CString& sUrlToAccess)
{
CString sPath = ParsePathFromURL(sUrlToAccess);
BOOL bIsExcluded;
/* Unless the user agent value is blank, first check for an
explicit reference to this agent */
if (m_sUserAgent != "")
if (CheckExclusion(sRobotPolicy,
m_sUserAgent,
sPath,
bIsExcluded))
/* There is an entry for the user agent */
return bIsExcluded;
/* If there is no entry for the user agent, look for an entry
that applies to * (all other robots) */
if (CheckExclusion(sRobotPolicy, "*", sPath, bIsExcluded))
/* there is an entry for "*" */
return bIsExcluded;
/* Robots.txt contains neither specific nor default prohibitions.
Return false to indicate no exclusion. */
return false;
}
// --------------------------------------------------------------
// ******************** private
// * *
// * CheckExclusion *
// * *
// ********************
// This is a private function called by the public
// function RobotExcluded
//
// Function: Scans a URL and returns the server portion of the URL
//
// Inputs: sRobotPolicy - Text of robots.txt file previously
// acquired from a site.
// sAgent - User agent entry to search for. Can
// be a specific agent name, or "*".
// sPath - The path name to be visited on
// the site.
//
// Outputs: <function_result> - True if a section of robots.txt was
// found for sAgent
// bIsExcluded - Set to true if the access is
// explicitly denied; set to true
// otherwise
BOOL CRobotInternet::CheckExclusion(const CString& sRobotPolicy,
const CString& sAgent,
const CString& sPath,
BOOL& bIsExcluded)
{
int nPos;
int nAgentLen, nExclusionLen;
CString sRobotLower = sRobotPolicy;
sRobotLower.MakeLower();
CString sAgentLower = sAgent;
sAgentLower.MakeLower();
nAgentLen = sAgentLower.GetLength();
CString sPathLower = sPath;
sPathLower.MakeLower();
CString sEntryLower = "";
CString sExclusion = "";
BOOL bScanningUserAgent, bScanningDisallow;
/* Find each user-agent: entry and compare it to the
agent name specified */
bScanningUserAgent = true;
while (bScanningUserAgent)
{
nPos = sRobotLower.Find("user-agent:");
if (nPos != -1)
{
sRobotLower = sRobotLower.Mid(nPos + 11);
sRobotLower.TrimLeft();
if (sRobotLower.Left(nAgentLen) == sAgentLower)
{
// Found entry for this agent
nPos = sRobotLower.Find("user-agent:");
if (nPos == -1)
sEntryLower = sRobotLower;
else
sEntryLower = sRobotLower.Left(nPos);
// Find each disallow: statement in the entry
bScanningDisallow = true;
while (bScanningDisallow)
{
nPos = sEntryLower.Find("disallow:");
if (nPos != -1)
{
sEntryLower = sEntryLower.Mid(nPos + 9);
sEntryLower.TrimLeft();
nPos = sEntryLower.Find("disallow:");
if (nPos == -1)
sExclusion = sEntryLower;
else
sExclusion = sEntryLower.Left(nPos);
sExclusion.TrimRight();
if (sExclusion.Right(1) != "/")
sExclusion += "/";
nExclusionLen = sExclusion.GetLength();
/* Compare exclusion to target path to see
if there is a match */
if (sPath.Left(nExclusionLen) == sExclusion)
{
bIsExcluded = true;
return true;
} // End if sPath
} // End if nPos
else
bScanningDisallow = false;
} // End while bScanningDisallow
/* No exclusion encountered or function would have
already returned */
bIsExcluded = false;
// There is an entry for this agent
return true;
} // End if
} // end if nPos
else
bScanningUserAgent = false;
} // End while bScanningUserAgent
bIsExcluded = false;
return false; /* Agent entry not found */
}
// **************************************************************
// * *
// * *
// * F T P F u n c t i o n s *
// * *
// * *
// **************************************************************
// --------------------------------------------------------------
// ****************
// * *
// * ftpGetFile *
// * *
// ****************
// Function: Retrieves a file from an FTP site
//
// Inputs: sURL - URL to access, incouding user id
// and password info if required
// sDir - Directory to move to (optional;
// leave blank or NULL to access
// default directory).
// sRemoteFilespec - Name of file on remote FTP server
// to be retrieved.
// sLocalFilespec - Name to give retrieved file on
// local system.
//
// Outputs: <function-result> - True if successful, false if an
// error occurred
// nResult - Error code (0 = no error,
// n = error code)
// sErrMsg - Error message text of error (if any)
BOOL CRobotInternet::ftpGetFile(const CString& sUrl,
const CString& sDir,
const CString& sRemoteFilespec,
const CString& sLocalFilespec,
int& nResult, CString& sErrMsg)
{
TCHAR sz[1024];
CInternetSession* pSession = NULL;
CStdioFile* pFile = NULL;
DWORD nRead = 0;
CFile* pMyFile = NULL;
CString sId, sPassword;
CString sTemp;
CString sWorkingUrl, sWorkingDir;
nResult = CROBOT_ERR_SUCCESS;
CString sMsg;
CString sHeader = "Accept: */*\r\n\r\n";
try
{
pSession = new CInternetSession(m_sUserAgent,
1,
INTERNET_OPEN_TYPE_PRECONFIG);
sWorkingUrl = sUrl;
sWorkingUrl.TrimLeft();
sWorkingUrl.TrimRight();
// Check for invalid parameters
if (!(sUrl.IsEmpty())
&& !(sLocalFilespec.IsEmpty())
&& !(sRemoteFilespec.IsEmpty()))
{
sId = m_sLogonUsername;
sPassword = m_sLogonPassword;
sTemp = sWorkingUrl.Left(4);
sTemp.MakeLower();;
if (sTemp == "ftp:") sWorkingUrl = sWorkingUrl.Mid(4);
if (sWorkingUrl.Left(2) == "//")
sWorkingUrl = sWorkingUrl.Mid(2);
int nPos1 = sWorkingUrl.Find(":");
int nPos2 = sWorkingUrl.Find("@");
if (nPos1 > 0 && nPos2 > nPos1)
{
sId = sWorkingUrl.Left(nPos1);
sPassword = sWorkingUrl.Mid(nPos1 + 1,
nPos2 - nPos1 - 1);
sWorkingUrl = sWorkingUrl.Mid(nPos2 + 1);
} // End if
if (sId == "")
sWorkingUrl = "ftp://" + sWorkingUrl;
else
sWorkingUrl = "ftp://"
+ sId
+ ":"
+ sPassword
+ "@"
+ sWorkingUrl;
if (sWorkingUrl.Right(1) != "/") sWorkingUrl += "/";
sWorkingDir = sDir;
if (sWorkingDir != "")
{
if (sWorkingDir.Left(1) == "/")
sWorkingDir = sWorkingDir.Mid(1);
sWorkingUrl += sWorkingDir;
if (sWorkingUrl.Right(1) != "/") sWorkingUrl += "/";
} // End if
sWorkingUrl += sRemoteFilespec;
pMyFile = new CFile;
if (pMyFile->Open(sLocalFilespec,
CFile::modeCreate
| CFile::modeReadWrite))
{
pFile = pSession->OpenURL (
sWorkingUrl,
1,
INTERNET_FLAG_RELOAD
| INTERNET_FLAG_TRANSFER_BINARY,
sHeader, // szHead
-1L);
if (pFile) /* OpenURL worked */
{
nResult = CROBOT_ERR_SUCCESS;
// Get data
do
{
nRead = pFile->Read(sz, 1023);
if (nRead != 0)
{
sz[nRead] = 0;
pMyFile->Write (sz, nRead);
} // End if
} while (nRead != 0); // End do ... while
nResult = CROBOT_ERR_SUCCESS;
} // End if pFile
else /* OpenURL failed */
{
nResult = CROBOT_ERR_CONNECTION_FAILED;
} // End else
} // End if
else
{
nResult = CROBOT_ERR_DISK_FILE_ERROR;
} // End else
} // End if
else
nResult = CROBOT_ERR_INVALID_PARAMETER;
} // End try
catch (CInternetException *pEx)
{
switch(pEx->m_dwError)
{
case ERROR_INTERNET_TIMEOUT:
nResult = CROBOT_ERR_TIMED_OUT;
break;
case ERROR_INTERNET_INVALID_URL:
nResult = CROBOT_ERR_INVALID_URL;
break;
case ERROR_INTERNET_EXTENDED_ERROR:
// Invalid or non-existing filename
nResult = CROBOT_ERR_NOT_FOUND;
break;
case ERROR_INTERNET_INCORRECT_USER_NAME:
case ERROR_INTERNET_INCORRECT_PASSWORD:
case ERROR_INTERNET_LOGIN_FAILURE:
nResult = CROBOT_ERR_NOT_AUTHORIZED;
break;
default:
nResult = CROBOT_ERR_CONNECTION_FAILED;
break;
} // End switch
pEx->Delete();
} // End catch
catch (CFileException *pEx)
{
int nErr = pEx->m_cause;
pEx->Delete();
nResult = CROBOT_ERR_FILE+nErr;
} // End catch
catch (...)
{
nResult = CROBOT_ERR_CONNECTION_FAILED;
} // End catch
// Clean up and exit function
if (pFile != NULL)
{
pFile->Close();
delete pFile;
} // End if
if (pMyFile != NULL)
{
pMyFile->Close();
delete pMyFile;
} // End if
if (pSession != NULL)
{
pSession->Close();
delete pSession;
} // End if
sErrMsg = ErrorMessage(nResult);
if (nResult == CROBOT_ERR_SUCCESS)
return true;
else
return false;
}
// --------------------------------------------------------------
// ****************
// * *
// * PutFtpFile *
// * *
// ****************
//
// Outputs: <functionresult> - 0 for success, or error code
BOOL CRobotInternet::ftpPutFile(const CString& sUrl,
const CString& sDir,
const CString& sLocalFilespec,
const CString& sRemoteFilespec,
int& nResult, CString& sErrMsg)
{
#define FTP_UNKNOWN 0
#define FTP_BLANK 1
#define FTP_OPEN 2
#define FTP_CLOSE 3
#define FTP_CD 4
#define FTP_LCD 5
#define FTP_GET 6
#define FTP_PUT 7
#define FTP_BYE 8
#define FTP_ASCII 9
#define FTP_BINARY 10
CInternetSession* pSession = NULL;
CFtpConnection* pFTPConnection = NULL;
CString LocalFile, RemoteFile;
CString sWorkingUrl;
CString sId, sPassword;
CString sTemp;
nResult = CROBOT_ERR_SUCCESS;
CString sMsg;
try
{
sWorkingUrl = sUrl;
sWorkingUrl.TrimLeft();
sWorkingUrl.TrimRight();
// Check for invalid parameters
if (!(sWorkingUrl.IsEmpty())
&& !(sLocalFilespec.IsEmpty())
&& !(sRemoteFilespec.IsEmpty()))
{
// See if the file to send exists and is available
CFileStatus fs;
if (CFile::GetStatus(sLocalFilespec, fs))
{
/* If user:password@ specified in URL, extract to
sID and sPassword and shorten sWorkingUrl */
sId = m_sLogonUsername;
sPassword = m_sLogonPassword;
sTemp = sWorkingUrl.Left(4);
sTemp.MakeLower();
if (sTemp == "ftp:")
sWorkingUrl = sWorkingUrl.Mid(4);
if (sWorkingUrl.Left(2) == "//")
sWorkingUrl = sWorkingUrl.Mid(2);
int nPos1 = sWorkingUrl.Find(":");
int nPos2 = sWorkingUrl.Find("@");
if (nPos1 > 0 && nPos2 > nPos1)
{
sId = sWorkingUrl.Left(nPos1);
sPassword = sWorkingUrl.Mid(nPos1 + 1,
nPos2 - nPos1 - 1);
sWorkingUrl = sWorkingUrl.Mid(nPos2 + 1);
} // End if
// Establish Internet connection
pSession = new CInternetSession(
m_sUserAgent,
1,
INTERNET_OPEN_TYPE_PRECONFIG);
if (pSession)
{
// Session established. Now open connection.
pFTPConnection = pSession->GetFtpConnection(
sWorkingUrl,
sId,
sPassword);
if (pFTPConnection)
{
/* Established FTP connection.
Set the directory (unless blank sDir
passed to function). */
if (sDir != ""
&& !pFTPConnection->SetCurrentDirectory(sDir))
// Set directory failed
nResult = CROBOT_ERR_CONNECTION_FAILED;
else
{
// Send the file
if (!pFTPConnection->PutFile(
sLocalFilespec,
sRemoteFilespec,
FTP_TRANSFER_TYPE_BINARY,
1))
// File transfer failed
nResult = CROBOT_ERR_CONNECTION_FAILED;
else
nResult = CROBOT_ERR_SUCCESS;
} // End else
} // End if
else
// FTP connection failed
nResult = CROBOT_ERR_CONNECTION_FAILED;
} // End if
else
// Unable to create session
nResult = CROBOT_ERR_CONNECTION_FAILED;
} // End if
else
// File to send does not exist
nResult = CROBOT_ERR_NOT_FOUND;
} // End if
else
// Empty parameter
nResult = CROBOT_ERR_INVALID_PARAMETER;
} // End try
catch(CInternetException* pEx)
{
switch(pEx->m_dwError)
{
case ERROR_INTERNET_TIMEOUT:
nResult = CROBOT_ERR_TIMED_OUT;
break;
case ERROR_INTERNET_INVALID_URL:
nResult = CROBOT_ERR_INVALID_URL;
break;
case ERROR_INTERNET_EXTENDED_ERROR:
// Invalid or non-existing filename
nResult = CROBOT_ERR_NOT_FOUND;
break;
case ERROR_INTERNET_INCORRECT_USER_NAME:
case ERROR_INTERNET_INCORRECT_PASSWORD:
case ERROR_INTERNET_LOGIN_FAILURE:
nResult = CROBOT_ERR_NOT_AUTHORIZED;
break;
default:
nResult = CROBOT_ERR_CONNECTION_FAILED;
break;
} // End switch
pEx->Delete();
} // End catch
catch(...)
{
nResult = CROBOT_ERR_CONNECTION_FAILED;
} // End catch
// Clean up and exit function
if (pFTPConnection != NULL)
{
pFTPConnection->Close();
delete pFTPConnection;
} // End if
if (pSession != NULL)
{
pSession->Close();
delete pSession;
} // End if
sErrMsg = ErrorMessage(nResult);
if (nResult == CROBOT_ERR_SUCCESS)
return true;
else
return false;
};
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -