benfasoli
10/11/2017 - 4:35 PM

CR1000 Data Sync

Sync local database with remote CR1000 over TCP/IP

#! /usr/bin/env python
# Ben Fasoli

import pandas as pd
import os
from datetime import datetime, timedelta
from pycampbellcr1000 import CR1000

def fetch_cr1000(ip, port, table, t_start=None, t_end=None):
    """
    Pull data from remote CR1000 using TCP/IP

    Connects to device using tcp on the specified IP/port, downloads all data
    available in table during the period between t_start and t_end, and returns
    a pandas DataFrame.

    Args:
        ip (str): IP address
        port (str/int): connection port
        table (str): CR1000 table name containing desired data
        t_start (datetime): time to begin data collection query
        t_end (datetime): time to end data collection query

    Returns:
        pandas.DataFrame: polled data, or None if a problem is encountered
    """
    d = CR1000.from_url('tcp:' + ip + ':' + str(port))
    con = d.ping_node()
    if not con:
        print('Unable to connect to device')
        return None
    tbls = d.list_tables()
    if not table in tbls:
        print('Table ' + table + ' not found')
        print('Available tables: ' + ', '.join(tbls))
        return None
    if t_end is None:
        t_end = datetime.utcnow()
    data = d.get_data(table, t_start, t_end)
    d.bye()
    return pd.DataFrame(data)

def fetch_lgr(ip, port, path='/projects/data/' + site + '/lgr-ugga/raw/'):
    """
    Pull data from remote LGR using TCP/IP

    Connects to device using tcp on the specified IP/port and syncs local data
    archive with remote /home/lgr/data/ directory using rsync.

    Args:
        ip (str): IP address
        port (str/int): Connection port
        path (str): path to local data archive

    Returns:
        bool: True if successful connection and local archive update, False
            otherwise.
    """
    d = CR1000.from_url('tcp:' + ip + ':' + str(port))
    con = d.ping_node()
    if not con:
        print('Unable to connect to device')
        return None
    tbls = d.list_tables()
    if not table in tbls:
        print('Table ' + table + ' not found')
        print('Available tables: ' + ', '.join(tbls))
        return None
    if t_end is None:
        t_end = datetime.utcnow()
    data = d.get_data(table, t_start, t_end)
    d.bye()
    return pd.DataFrame(data)

def find_last_file(site, path):
    """
    Determine last file in raw data archive

    For site, identify the last sorted file in the measurement data archive and
    return the full path to the file.

    Args:
        site (str): Abbreviation corresponding with site naming conventions and
            data directories

    Returns:
        str: Full path to the last file in the site's data directory, or None if no
        files are found for the requested site.
    """
    # ld = '/projects/data/' + site + '/licor-6262/raw/'
    # ld = '/uufs/chpc.utah.edu/common/home/lin-group2/measurements/data/' + site + '/licor-6262/raw/'
    lf = os.listdir(path)
    if len(lf) is 0:
        return None
    lf.sort()
    return path + lf[-1]

def find_last_time(site):
    """
    Find last time in sorted data archive

    Retrieves the first column of the last row of the last file in site's data
    archive and formats as datetime.

    Args:
        site (str): Abbreviation corresponding with site naming conventions and
            data directories

    Returns:
        datetime: time found on last line of file, or None if no file found or
        error converting data to datetime
    """
    f = find_last_file(site, '/projects/data/' + site + '/licor-6262/raw/')
    lt = tail(f)[0].split(',')[0]
    try:
        return datetime.strptime(lt, '%Y-%m-%d %H:%M:%S.%f')
    except ValueError:
        return None

def lookup_ip(site):
    """
    Lookup IP address for site

    Args:
        site (str): Abbreviation corresponding with site naming conventions and
            data directories

    Returns:
        str: IP address, or None if site not found in table
    """
    ip = {
        'dbk': '166.130.22.212',
        'heb': '166.130.69.244',
        'imc': '67.128.146.28',
        'lgn': '129.123.46.97',
        'rpk': '205.127.188.48',
        'sug': '166.130.89.167',
        'sun': '107.1.14.185',
    }
    try:
        return ip[site]
    except KeyError:
        return None

def lookup_port(site):
    """
    Lookup connection port for site

    Args:
        site (str): Abbreviation corresponding with site naming conventions and
            data directories

    Returns:
        str: connection port, or None if site not found in table
    """
    port = {
        'dbk': '6785',
        'heb': '3001',
        'imc': '6785',
        'lgn': '6785',
        'rpk': '6785',
        'sug': '3001',
        'sun': '6785',
    }
    try:
        return port[site]
    except KeyError:
        return None

def tail(f, n=1):
    """
    UNIX-style file tail

    Returns the last n lines of file f.

    Args:
        f (str): Full path to file
        n (int): number of lines to return, defaults to 1

    Returns:
        list: strings for each line returned by tail, or None if f does not
        exist or is not found found
    """
    if not os.path.isfile(f):
        print('File ' + f + 'not found')
        return None
    cmd = 'tail -n ' + str(n) + ' ' + f
    stdout = os.popen(cmd).readlines()
    return stdout


if __name__ == '__main__':
    site    = 'dbk'
    ip      = lookup_ip(site)
    port    = lookup_port(site)
    t_start = find_last_time(site)
    co2 = fetch_cr1000(ip, port, 'Dat', t_start)
    co2.tail()
    # # pm
    # t_start = None #datetime.utcnow() - timedelta(0, 300)
    # pm = pull_data(ip, port, 'PM', t_start, t_end)
    # pm.head()
    # pm.tail()
from flask import Flask, request, redirect
import requests

app = Flask(__name__)

@app.route('/fetch_cr1000', methods=['GET'])
def fetch_cr1000():
    try:
        ip = request.args.get('ip')
        table = request.args.get('table')
        t_start = request.args.get('t_start')
        uri = ('http://' + ip + '/?command=dataquery&uri=dl:' + table +
               '&format=TOA5&mode=since-time&p1=' + t_start)
        return redirect(uri, code=302)
    except TypeError:
        response = ['Invalid API query.',
                    '',
                    'Query requires the following three parameters',
                    'ip: IP address of remote CR1000',
                    'table: name of CR1000 table to query',
                    't_start: start time of data query, in format 2017-10-27T19:00:00',
                    '',
                    'Example:',
                    'air.utah.edu:9000/?ip=129.123.46.97&table=Dat&t_start=2017-10-26T20:00:00']
        return pre + '\r\n'.join(response) + post

if __name__ == '__main__':
    app.run('0.0.0.0', port=9000, debug=True)