zhasm
11/3/2011 - 2:19 PM

get unread Gmails, print the Subject, From, To, Message-ID, without modifying the gmails' flags.

get unread Gmails, print the Subject, From, To, Message-ID, without modifying the gmails' flags.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
import imaplib2 as imaplib
from email import Header
from email.parser import Parser

##USER CONFIG
HOST = 'imap.gmail.com'
PORT = '993'
USER = ''
PAWD = ''
HAM_F  = 'INBOX'

SPAM_F = '0_learn_spam' #'[Gmail]/Spam' by default
LIMIT = 20  #howmany mails to read each time.

##Filter config. you need to know some regex
filters = {
    "To": re.compile(r"""(?xi)
        msn\d{3,8}\@fanfou\.com |
        (?:coderex|evernote)\@zhasm\.com 
        """),
    "From": re.compile(r"""(?xi)
        vipcareer\.net |
        calendar-notification\@google\.com |
        Tmall\@newsletter\.mail\.taobao\.com |
        newsletter@newegg.com.cn |
        培训
        """),
    "Subject": re.compile(r"""(?x)
                              (AD)|
                              \(AD\)|
                              ^\.$
                           """),
    "Message-ID": re.compile(r"\.ru>$"),
    }

## system config


def GetUID(s):
    """the global ID for emails"""
    pattern_uid = re.compile(r'(?<=UID )(\d+)')
    try:
        return pattern_uid.findall(s)[0]
    except Exception, e:
        print "Error!", str(e)
        return ""
    

def Filter(headers):
    """to detect if it is spam
    if any of the filters matches, the message is considered spammy.
    aggressive, but simple to handle.
    """
    for header_name, header_value in headers.items():
        if filters[header_name].search(header_value):
            #print "Match!", header_name, header_value, headers
            return True

    return False       
        
def DecodeSingleHeader(s):
    """
    decode headers to UTF-8
    """
    parts = Header.decode_header(s)
    header = []
    for part in parts:
        s, enc=part
        if enc:
            s = unicode(s , enc).encode('utf8', 'replace')
        header.append(s)
        
    h= " ".join(header)
    return h

def StringToHeaders(s):
    return Parser().parsestr(s)

def TrimFrom(s):
    """
    only necessary for "From" header.
    """
    s=re.sub(r'''\?=["']<''', '?= <', s)
    s=s.replace('"', '')
    s=s.replace("'", '')
    
    return s

def MarkUnread(imap, uids):
    """
    mark a list of uids as Read. Implementation: remove the SEEN flag.
    """
    if not uids:
        return
        
    uids=','.join(uids)
    print "marking unread", uids
    typ, data = imap.uid('STORE', uids, '-FLAGS', "\SEEN")
    print typ, data
    
def Move(imap, ham_f, spam_f, uids):
    """
        move uids from one folder to another.
        implementation: Add new flag and delete from old folder.
    """
    
    spam_f= '"%s"'% spam_f
    uids=",".join(uids)
    print "moving spams:", uids
    
    try:
        typ, data = imap.uid('STORE', uids, '+X-GM-LABELS', spam_f)
        print data
        if typ=='OK':
            mov, data = imap.uid('STORE', uids , '+FLAGS', '(\Deleted)')
            print data
            imap.expunge()
            
    except Exception, e:
        print e
        
g = imaplib.IMAP4_SSL(HOST) 

try:
    r, info=g.login(USER, PAWD)
except Exception, e:
    print str(e)

g.select(HAM_F)
#typ, msg_ids = g.search(None, ('UNSEEN'))
typ, msg_ids = g.search(None, ('ALL'))

spam=[]
ham=[]

if typ=='OK':
    ids= msg_ids[0].split(' ')
    for id in ids[:LIMIT]:

        (r,msg)=g.fetch(id, '(UID BODY[HEADER.FIELDS (SUBJECT FROM TO MESSAGE-ID)])')

        header=msg[0][1]
        uid= GetUID(msg[0][0])

        h=StringToHeaders(header)
                        
        header={}
        for i in ['From', 'To', 'Subject', 'Message-ID' ]:
            if h.has_key(i):

                v=h[i]
                if i.lower() == 'from':
                    v=TrimFrom(v)                 

                header[i]=DecodeSingleHeader(v)

        if Filter(header):
            spam.append(uid)
            print "SPAM:"
        else:
            ham.append(uid)
            print "HAM:"
        for k, v in header.items():
            print k, ": ", v
        print

Move(g, HAM_F, SPAM_F, spam)
MarkUnread(g, ham)
g.logout()