?? libgmail.py
字號:
#!/usr/bin/env python## libgmail -- Gmail access via Python### To get the version number of the available libgmail version.## Reminder: add date before next release. This attribute is also## used in the setup script.Version = '0.1.11' # (August 2008)# Original author: follower@rancidbacon.com# Maintainers: Waseem (wdaher@mit.edu) and Stas Z (stas@linux.isbeter.nl)## License: GPL 2.0## NOTE:# You should ensure you are permitted to use this script before using it# to access Google's Gmail servers.### Gmail Implementation Notes# ==========================## * Folders contain message threads, not individual messages. At present I# do not know any way to list all messages without processing thread list.#LG_DEBUG=0from lgconstants import *import os,pprintimport reimport urllibimport urllib2import mimetypesimport typesimport mechanize as ClientCookiefrom cPickle import load, dumpfrom email.MIMEBase import MIMEBasefrom email.MIMEText import MIMETextfrom email.MIMEMultipart import MIMEMultipartGMAIL_URL_LOGIN = "https://www.google.com/accounts/ServiceLoginBoxAuth"GMAIL_URL_GMAIL = "https://mail.google.com/mail/?ui=1&"# Set to any value to use proxy.PROXY_URL = None # e.g. libgmail.PROXY_URL = 'myproxy.org:3128'# TODO: Get these on the fly?STANDARD_FOLDERS = [U_INBOX_SEARCH, U_STARRED_SEARCH, U_ALL_SEARCH, U_DRAFTS_SEARCH, U_SENT_SEARCH, U_SPAM_SEARCH]# Constants with names not from the Gmail Javascript:# TODO: Move to `lgconstants.py`?U_SAVEDRAFT_VIEW = "sd"D_DRAFTINFO = "di"# NOTE: All other DI_* field offsets seem to match the MI_* field offsetsDI_BODY = 19versionWarned = False # If the Javascript version is different have we # warned about it?RE_SPLIT_PAGE_CONTENT = re.compile("D\((.*?)\);", re.DOTALL)class GmailError(Exception): ''' Exception thrown upon gmail-specific failures, in particular a failure to log in and a failure to parse responses. ''' passclass GmailSendError(Exception): ''' Exception to throw if we are unable to send a message ''' passdef _parsePage(pageContent): """ Parse the supplied HTML page and extract useful information from the embedded Javascript. """ lines = pageContent.splitlines() data = '\n'.join([x for x in lines if x and x[0] in ['D', ')', ',', ']']]) #data = data.replace(',,',',').replace(',,',',') data = re.sub(r'("(?:[^\\"]|\\.)*")', r'u\1', data) data = re.sub(',{2,}', ',', data) result = [] try: exec data in {'__builtins__': None}, {'D': lambda x: result.append(x)} except SyntaxError,info: print info raise GmailError, 'Failed to parse data returned from gmail.' items = result itemsDict = {} namesFoundTwice = [] for item in items: name = item[0] try: parsedValue = item[1:] except Exception: parsedValue = [''] if itemsDict.has_key(name): # This handles the case where a name key is used more than # once (e.g. mail items, mail body etc) and automatically # places the values into list. # TODO: Check this actually works properly, it's early... :-) if len(parsedValue) and type(parsedValue[0]) is types.ListType: for item in parsedValue: itemsDict[name].append(item) else: itemsDict[name].append(parsedValue) else: if len(parsedValue) and type(parsedValue[0]) is types.ListType: itemsDict[name] = [] for item in parsedValue: itemsDict[name].append(item) else: itemsDict[name] = [parsedValue] return itemsDictdef _splitBunches(infoItems):# Is this still needed ?? Stas """ Utility to help make it easy to iterate over each item separately, even if they were bunched on the page. """ result= [] # TODO: Decide if this is the best approach. for group in infoItems: if type(group) == tuple: result.extend(group) else: result.append(group) return resultclass SmartRedirectHandler(ClientCookie.HTTPRedirectHandler): def __init__(self, cookiejar): self.cookiejar = cookiejar def http_error_302(self, req, fp, code, msg, headers): # The location redirect doesn't seem to change # the hostname header appropriately, so we do # by hand. (Is this a bug in urllib2?) new_host = re.match(r'http[s]*://(.*?\.google\.com)', headers.getheader('Location')) if new_host: req.add_header("Host", new_host.groups()[0]) result = ClientCookie.HTTPRedirectHandler.http_error_302( self, req, fp, code, msg, headers) return result def _buildURL(**kwargs): """ """ return "%s%s" % (URL_GMAIL, urllib.urlencode(kwargs))def _paramsToMime(params, filenames, files): """ """ mimeMsg = MIMEMultipart("form-data") for name, value in params.iteritems(): mimeItem = MIMEText(value) mimeItem.add_header("Content-Disposition", "form-data", name=name) # TODO: Handle this better...? for hdr in ['Content-Type','MIME-Version','Content-Transfer-Encoding']: del mimeItem[hdr] mimeMsg.attach(mimeItem) if filenames or files: filenames = filenames or [] files = files or [] for idx, item in enumerate(filenames + files): # TODO: This is messy, tidy it... if isinstance(item, str): # We assume it's a file path... filename = item contentType = mimetypes.guess_type(filename)[0] payload = open(filename, "rb").read() else: # We assume it's an `email.Message.Message` instance... # TODO: Make more use of the pre-encoded information? filename = item.get_filename() contentType = item.get_content_type() payload = item.get_payload(decode=True) if not contentType: contentType = "application/octet-stream" mimeItem = MIMEBase(*contentType.split("/")) mimeItem.add_header("Content-Disposition", "form-data", name="file%s" % idx, filename=filename) # TODO: Encode the payload? mimeItem.set_payload(payload) # TODO: Handle this better...? for hdr in ['MIME-Version','Content-Transfer-Encoding']: del mimeItem[hdr] mimeMsg.attach(mimeItem) del mimeMsg['MIME-Version'] return mimeMsgclass GmailLoginFailure(Exception): """ Raised whenever the login process fails--could be wrong username/password, or Gmail service error, for example. Extract the error message like this: try: foobar except GmailLoginFailure,e: mesg = e.message# or print e# uses the __str__ """ def __init__(self,message): self.message = message def __str__(self): return repr(self.message)class GmailAccount: """ """ def __init__(self, name = "", pw = "", state = None, domain = None): global URL_LOGIN, URL_GMAIL """ """ self.domain = domain if self.domain: URL_LOGIN = "https://www.google.com/a/" + self.domain + "/LoginAction2" URL_GMAIL = "http://mail.google.com/a/" + self.domain + "/?ui=1&" else: URL_LOGIN = GMAIL_URL_LOGIN URL_GMAIL = GMAIL_URL_GMAIL if name and pw: self.name = name self._pw = pw self._cookieJar = ClientCookie.LWPCookieJar() opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(self._cookieJar)) ClientCookie.install_opener(opener) if PROXY_URL is not None: import gmail_transport self.opener = ClientCookie.build_opener(gmail_transport.ConnectHTTPHandler(proxy = PROXY_URL), gmail_transport.ConnectHTTPSHandler(proxy = PROXY_URL), SmartRedirectHandler(self._cookieJar)) else: self.opener = ClientCookie.build_opener( ClientCookie.HTTPHandler(), ClientCookie.HTTPSHandler(), SmartRedirectHandler(self._cookieJar)) elif state: # TODO: Check for stale state cookies? self.name, self._cookieJar = state.state else: raise ValueError("GmailAccount must be instantiated with " \ "either GmailSessionState object or name " \ "and password.") self._cachedQuotaInfo = None self._cachedLabelNames = None def login(self): """ """ # TODO: Throw exception if we were instantiated with state? if self.domain: data = urllib.urlencode({'continue': URL_GMAIL, 'at' : 'null', 'service' : 'mail', 'Email': self.name, 'Passwd': self._pw, }) else: data = urllib.urlencode({'continue': URL_GMAIL, 'Email': self.name, 'Passwd': self._pw, }) headers = {'Host': 'www.google.com', 'User-Agent': 'Mozilla/5.0 (Compatible; libgmail-python)'} req = ClientCookie.Request(URL_LOGIN, data=data, headers=headers) pageData = self._retrievePage(req) if not self.domain: # The GV cookie no longer comes in this page for # "Apps", so this bottom portion is unnecessary for it. # This requests the page that provides the required "GV" cookie. RE_PAGE_REDIRECT = 'CheckCookie\?continue=([^"\']+)' # TODO: Catch more failure exceptions here...? try: link = re.search(RE_PAGE_REDIRECT, pageData).group(1) redirectURL = urllib2.unquote(link) redirectURL = redirectURL.replace('\\x26', '&') except AttributeError: raise GmailLoginFailure("Login failed. (Wrong username/password?)") # We aren't concerned with the actual content of this page, # just the cookie that is returned with it. pageData = self._retrievePage(redirectURL) def getCookie(self,cookiename): # TODO: Is there a way to extract the value directly? for index, cookie in enumerate(self._cookieJar): if cookie.name == cookiename: return cookie.value return "" def _retrievePage(self, urlOrRequest): """ """ if self.opener is None: raise "Cannot find urlopener" # ClientCookieify it, if it hasn't been already if not isinstance(urlOrRequest, urllib2.Request): req = ClientCookie.Request(urlOrRequest) else: req = urlOrRequest req.add_header('User-Agent', 'Mozilla/5.0 (Compatible; libgmail-python)') try: resp = self.opener.open(req) except urllib2.HTTPError,info: print info return None pageData = resp.read() # TODO: This, for some reason, is still necessary? self._cookieJar.extract_cookies(resp, req) # TODO: Enable logging of page data for debugging purposes? return pageData def _parsePage(self, urlOrRequest): """ Retrieve & then parse the requested page content. """ items = _parsePage(self._retrievePage(urlOrRequest)) # Automatically cache some things like quota usage. # TODO: Cache more? # TODO: Expire cached values? # TODO: Do this better. try: self._cachedQuotaInfo = items[D_QUOTA] except KeyError: pass #pprint.pprint(items) try: self._cachedLabelNames = [category[CT_NAME] for category in items[D_CATEGORIES][0]] except KeyError: pass return items def _parseSearchResult(self, searchType, start = 0, **kwargs): """ """ params = {U_SEARCH: searchType, U_START: start, U_VIEW: U_THREADLIST_VIEW, } params.update(kwargs) return self._parsePage(_buildURL(**params)) def _parseThreadSearch(self, searchType, allPages = False, **kwargs): """ Only works for thread-based results at present. # TODO: Change this? """ start = 0 tot = 0 threadsInfo = [] # Option to get *all* threads if multiple pages are used. while (start == 0) or (allPages and len(threadsInfo) < threadListSummary[TS_TOTAL]): items = self._parseSearchResult(searchType, start, **kwargs) #TODO: Handle single & zero result case better? Does this work?
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -