#!/usr/bin/python -u 

"""
Takes in text data from a producer and inserts it
into a NilmDB stream. 

John Donnal 2016
"""

DESCRIPTION = """
Specify a configuration YAML file and one or more data files. 
Wildcards are accepted. If the files are not ordered by 
ascending timestamp, gaps will be inserted between any
non-monotonic files.
"""

EXAMPLE_CONFIG = """
Example config.yml, copy and customize:

---
#configuration              description [default_value]
#
name: Custom Input          # name
abbreviation: custom        # optional abbreviation

buffer_size: 5              # ~= samples per second
db_path: /data/sensor1      # db path
ts_format: us               # timestamp: [us] ms string

# [schema] is a list of data fields
# each field must have a [name], other entries are optional
schema:                    # add an entry for each data column
 - name: sine              # each column must have a unique [name]
   default_min:            # leave blank for auto scale  
   default_max:            # leave blank for auto scale
   offset: 0               # y = (x-offset)*scale
   scale_factor: 1.0       # see above
   plottable: true         # set to False to hide data from Explorer
   discrete: false         # set to True to plot as ||| pipes

 - name: cosine             # ... next column

## end of example config file ##

"""

import argparse
import nilmdb.client
import nilmtools.filter
from nilmdb.utils.time import (timestamp_to_human,
                               seconds_to_timestamp,
                               parse_time,
                               now as time_now)
from auto_decimate import Decimator
import sys
import pdb
import numpy as np
import json
import yaml
import time
from db_file import DbFile
from db_stream import DbStream
from nilmdb.client.errors import ClientError

#Timestamp formats
TS_MICROSECOND = 'us'
TS_MILLISECOND = 'ms'
TS_STRING = 'string'

MAX_GAP = 10*1e6 #insert gap if timestamps skew by > 10 seconds between files

class Consumer:
    def __init__(self, config):
        self.initialized = False
        self.config = config

        # initialize state variables:
        self.input_fifo = None
        self.buffer_size = None
        
    def initialize(self):
        
        self.client = nilmdb.client.numpyclient.\
                      NumpyClient("http://localhost/nilmdb")        
        # setup the input stream
        if(self.__setup_input()==False):
            return False
        self.dbFile = DbFile()
        # setup the destination stream
        if(self.dbFile.buildFromConfig(self.config)==False):
            return False
        # create the stream if necessary
        if(self.__create_stream() == False):
            return False # error in create stream

        self.initialized = True
        return True
    
    def __setup_input(self):
        try:
            self.input_fifo = self.config['fifo_path']
            self.buffer_size = int(self.__set('buffer_size',1))
            self.ts_format = self.__set('ts_format','us')
            if(self.ts_format != TS_MICROSECOND and
               self.ts_format != TS_MILLISECOND and
               self.ts_format != TS_STRING):
                print "[ERROR]: invalid ts_format [%s], "+\
                    "use 'us','ms', or 'string'"
                return False
            
        except KeyError as e:
            print "config file missing %s"%e[0]
            return False
        except ValueError as e:
            print "[ERROR]: buffer_size must be an integer"
            return False
        return True
    
    def __set(self,key, default):
        try:
            return self.config[key]
        except KeyError:
            return default

    def __create_stream(self):
        try:
            path = self.config['db_path']
        except KeyError as e:
            print "[ERROR]: missing db_path"
            return False
        # make sure the path is valid
        if(len(path[1:].split('/'))!=2):
            print "[ERROR]: invalid path %s, must be /group/file"%path
            return False
        group = path[1:].split('/')[0]
        group_path = "/%s/info"%group
        num_cols = self.dbFile.numCols()
        # 1. create the group if necessary
        info = nilmtools.filter.get_stream_info(self.client,group_path)
        if not info:
            print "creating group [%s]"%group
            self.client.stream_create(group_path,"uint8_1")
        # 2. check if the stream itself is present
        info = nilmtools.filter.get_stream_info(self.client,path)
        if info:
            # 2a: it exists, make sure the data type fits the configs
            if(info.layout_count != self.dbFile.numCols()):
                print("[ERROR]: config has %d fields, %s has %d"%\
                      (num_cols,path,info.layout_count))
                return False
        else:
            # 2b: it doesn't exist, make it
            print "Creating stream"
            config = self.dbFile.getConfig()
            config_key = {"config_key__":json.dumps(config)}
            self.client.stream_create(path,'float32_%d'%num_cols)
            self.client.stream_update_metadata(path,config_key)
        # 3. Build a numpy inserter context
        self.path = path
        return True
    def parse_ts(self,ts):
        if(self.ts_format==TS_MICROSECOND):
            return int(float(ts))
        elif(self.ts_format==TS_MILLISECOND):
            return 1000*int(float(ts))
        else: #self.ts_format==TS_STRING
            return parse_time(ts)

    def run(self, input_file='',last_ts=0):
        if self.initialized==False:
            print "error, call consumer.initialize() first"
            return

        #if input_file is specified use it, otherwise
        #read from the config file fifo parameter
        if(input_file==''):
            input_file = self.input_fifo
            
        #read timestamped data from source
        start_ts = 0
        ts_array = np.empty((self.buffer_size,1),dtype=np.uint64)
        val_array = np.empty((self.buffer_size,
                              self.dbFile.numCols()),dtype=np.float32)
        capture_data = True
        while(capture_data):
            try:
                with open(input_file,'r') as fifo:
                    while(capture_data):
                        # read a chunk of data of buffer_size
                        for i in range(self.buffer_size):
                            line = fifo.readline()
                            if(line==''):
                                capture_data = False
                                break
                            vals = line.split(' ')
                            ts_array[i] = self.parse_ts(vals[0])
                            val_array[i] = [float(x) for x in vals[1:len(vals)]]
                        if(capture_data==False):
                            break
                        # set up the interval
                        data = np.hstack((ts_array,val_array))

                        #use the last_ts if it makes sense
                        if(int(data[0][0])>last_ts and
                               np.abs(int(data[0][0])-last_ts)<MAX_GAP):
                            start_ts = last_ts
                        else:
                            print "starting new interval"
                            start_ts = int(data[0][0])
                            #restart the decimators
                            self.decimator = Decimator(4,self.path,width=
                                                       self.dbFile.numCols())

                        last_ts = int(data[-1][0])+1
                        if(start_ts > last_ts):
                            pdb.set_trace()
                        # insert the data into the database
                        with self.client.\
                             stream_insert_numpy_context(self.path,
                                                         start=start_ts, 
                                                         end=last_ts) as ctx: 
                            ctx.insert(data)
                        # decimate the data
                        self.decimator.process(data)
            except ValueError as e:
                time.sleep(0.2)
                print "ValueError: %s"%e[0]
            except ClientError as e:
                print "Data Error: %s"%e
                return
            return last_ts
        
#            except :
#                print "Error processing input"
#                time.sleep(2)

        
def main():
    parser = argparse.ArgumentParser(prog="nilm-consume",
                                     formatter_class=argparse.RawDescriptionHelpFormatter,
                                     description=DESCRIPTION,
                                     epilog = EXAMPLE_CONFIG)
    parser.add_argument("config_file", help="configuration file")
    parser.add_argument("files", nargs='*', help="data input files")
    args = parser.parse_args()
    # try to load the config file
    try:
        with open(args.config_file,'r') as f:
            config = yaml.load(f)
    except IOError:
        print "can't load configuration file at [%s], is it missing?"%args.config_file
        exit(1)
    # 1.) build the Consumer object
    # if the -f option was present, override the FIFO path
    if(len(args.files) != 0):
        config['fifo_path'] = "--using input files--"
    consumer = Consumer(config)
    # 2.) initialize it and check for errors
    if(consumer.initialize() == False):
        print "error starting the consumer, exiting"
        exit(1)
    # 3.) run through all of the files
    if(len(args.files)!= 0):
        last_ts = 0
        for file in args.files:
            print "processing [%s]"%file
            last_ts = consumer.run(file,last_ts)
            if(last_ts == None):
                print "[ERROR]: bad data in input file [%s], exiting"%file
                exit(1)
    else:
        consumer.run() #FIFO process, shouldn' return    

if __name__ == "__main__":
    main()
