hivefans
9/27/2016 - 11:34 PM

监控目录文件读写

监控目录文件读写

#!/usr/bin/python

import sys, os, glob, time
from stat import *

class WatchedFile:
  def __init__(self, path, inode, length):
    self.path = path
    self.inode = inode
    self.length = length
    self.fd = open(path, "r")
    self.newdata = True
    self.offset = 0
    self.data = bytearray("")
  def close(self):
    self.fd.close()

  def reset(self):
    self.close()
    self.fd = open(self.path, "r")
    self.offset = 0
    self.length = 0
    self.data = bytearray("")

  def __repr__(self):
    return "WatchedFile(path = %s, inode = %d, offset = %d, length = %d, newdata = '%s')"  \
        % (self.path, self.inode, self.offset, self.length, str(self.newdata))


def tail(pattern, processor):
  watchedFiles = {}
  while True:
    # file all items matching the pattern
    for path in glob.iglob(pattern):
      try:
        stat = os.stat(path)
        # only watch regular files
        if S_ISREG(stat.st_mode):
          if stat.st_ino in watchedFiles:
            # update length for files already being watched
            watchedFile = watchedFiles[stat.st_ino]
            if stat.st_size > watchedFile.length:
              watchedFile.newdata = True
            elif stat.st_size < watchedFile.length:
              watchedFile.reset()
              if stat.st_size > 0:
                watchedFile.newdata = True
            watchedFile.length = stat.st_size
            print watchedFile
          else:
            watchedFiles[stat.st_ino] = WatchedFile(path, stat.st_ino, stat.st_size)
      except OSError:
        # thrown by either os.stat or open
        pass
#    for watchedFile in watchedFiles.values():
#      if not watchedFile.newdata:



    for watchedFile in watchedFiles.values():
      if watchedFile.newdata:
        length = watchedFile.length - watchedFile.offset
        if length > 0:
          data = watchedFile.fd.read(length)
          if data:
            watchedFile.data += bytearray(data)
            watchedFile.offset += processor(watchedFile.path, watchedFile.data)
            watchedFile.newdata = False
    # remove files which no longer exist
    inodes = watchedFiles.keys()
    for inode in inodes:
      watchedFile = watchedFiles[inode]
      if not os.path.isfile(watchedFile.path):
        watchedFile.close()
        del watchedFiles[inode]
    try:
      time.sleep(1)
    except KeyboardInterrupt:
      sys.exit(0)
    #break

def line_processor(path, buff):
  offset = 1
  bytesRead = 0
  while offset > 0:
    offset = buff.find("\n")
    if offset > 0:
      offset += 1 # include \n
      line = buff[:offset]
      del buff[:offset]
      print "%s = '%s'" % (path, line.strip())
      bytesRead += offset
  return bytesRead
  
tail(sys.argv[1], line_processor)
#coding:utf-8
#!/usr/bin/python

import sys, os, glob, time
import json
import traceback
from stat import *
from urllib import unquote
import hashlib

def sha1_dict(d):
    j = json.dumps(d, sort_keys=True)
    return hashlib.sha1(j).hexdigest()

def parseLineLogic(line):
    listparam=line.split('##')
    if len(listparam) != 6:
        return
    dict={}
    dict['logtime']=listparam[0]
    dict['ip']=listparam[1]
    (dict['type'],dict['url'],httpversion)=listparam[3].split(' ')
    dict['agent']=listparam[4]
    param = listparam[5]
    if (len(param) < 4):
        param = dict['url'].split('?')[1]
    paramdict = {}
    paramlist = param.split('&')
    for val in paramlist:
        (key,value) = val.split('=')
        paramdict[key] = value
    if 'data' in paramdict:
        paramdict['data'] = json.loads(unquote(paramdict['data']))
    dict['param'] = paramdict
    dict['hashid'] =  sha1_dict(dict)
    print json.dumps(dict)

class WatchedFile:
    def __init__(self, path, inode, length):
        self.path = path
        self.inode = inode
        self.length = length
        self.fd = open(path, "r")
        self.newdata = True
        self.offset = 0
        self.data = bytearray("")
    def close(self):
        self.fd.close()
    def reset(self):
        self.close()
        self.fd = open(self.path, "r")
        self.offset = 0
        self.length = 0
        self.data = bytearray("")

    def __repr__(self):
        return "WatchedFile(path = %s, inode = %d, offset = %d, length = %d, newdata = '%s')"  \
        % (self.path, self.inode, self.offset, self.length, str(self.newdata))


statusFile = 'tail.status'

def loadWatchFilesStatus():
    FileStatus = {}
    try:
        statusF = open(statusFile,'r')
        for line in statusF.readlines():
            FileObj = json.loads(line)
            #print json.dumps(FileObj)
            watchFile = WatchedFile(path=FileObj['path'], inode=FileObj['inode'], length=FileObj['length'])
            watchFile.newdata = True if FileObj['offset'] < FileObj['length'] else False
            watchFile.offset = FileObj['offset']
            watchFile.fd.seek(watchFile.offset)
            FileStatus[FileObj['inode']] = watchFile
    except:
        #traceback.print_exc()
        pass
    #print 'files load:%d' %len(FileStatus)
    return FileStatus

def saveWatchedFilesStatus(Files):
    statusF = open(statusFile,'w')
    for obFile in Files.values():
        #print obFile
        dt = {}
        dt['inode'] = obFile.inode
        dt['path'] = obFile.path
        dt['offset'] = obFile.offset
        dt['length'] = obFile.length
        dt['newdata'] = obFile.newdata
        statusF.write(json.dumps(dt)+'\n')
    statusF.close()

def tail(pattern, processor):
    watchedFiles = loadWatchFilesStatus()
    #watchedFiles = {}
    while True:
        # file all items matching the pattern
        for path in glob.iglob(pattern):
            try:
                stat = os.stat(path)
                # only watch regular files
                if S_ISREG(stat.st_mode):
                    if stat.st_ino in watchedFiles:
                        #print 'in watchedFies'
                        # update length for files already being watched
                        #print stat
			watchedFile = watchedFiles[stat.st_ino]
                        if stat.st_size > watchedFile.length:
                            watchedFile.newdata = True
                        elif stat.st_size < watchedFile.length:
                            watchedFile.reset()
                            if stat.st_size > 0:
                                watchedFile.newdata = True
                        watchedFile.length = stat.st_size
                        #print watchedFile
                    else:
                        #print 'add watch file',path,stat.st_ino,stat.st_size
                        watchedFiles[stat.st_ino] = WatchedFile(path, stat.st_ino, stat.st_size)
            except OSError:
                # thrown by either os.stat or open
                pass
#    for watchedFile in watchedFiles.values():
#      if not watchedFile.newdata:
        for watchedFile in watchedFiles.values():
            if watchedFile.newdata:
                length = watchedFile.length - watchedFile.offset
                if length > 0:
                    data = watchedFile.fd.read(length)
                    if data:
                        watchedFile.data += bytearray(data)
                        watchedFile.offset += processor(watchedFile.path, watchedFile.data)
                        watchedFile.newdata = False
                # remove files which no longer exist
            inodes = watchedFiles.keys()
            for inode in inodes:
                watchedFile = watchedFiles[inode]
                if not os.path.isfile(watchedFile.path):
                    watchedFile.close()
                    del watchedFiles[inode]

        saveWatchedFilesStatus(watchedFiles)

        try:
            time.sleep(1)
        except KeyboardInterrupt:
            sys.exit(0)
    #break

def line_processor(path, buff):
    offset = 1
    bytesRead = 0
    #print buff
    while offset > 0:
        offset = buff.find("\n")
        if offset > 0:
            offset += 1 # include \n
            line = buff[:offset]
            del buff[:offset]
            #print "%s=%s" % (line.strip(), path)
            try:
                parseLineLogic(str(line))
            except:
            #    traceback.print_exc()
                pass
            bytesRead += offset
    return bytesRead


if __name__ == '__main__':
    statusFile = sys.argv[2]
    tail(sys.argv[1], line_processor)