r"""SentimentTransformer uses TextBlob.sentiment to generate sentiment
    based feature vectors (either lexical or naive bayes).

:Author:
    Matthias Manhertz
:Copyright:
    (c) Matthias Manhertz 2015
:Licence:
    MIT
"""

from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from .feature_transformer import FeatureTransformer
from .combine_tweets import combine_tweets
from itertools import izip
from time import time


class SentimentTransformer(FeatureTransformer):
    r"""Transforms json files into count sentiment features

        The SentimentTransformer makes it easy to transform .json files
        containing twitter-data (like the ones generated by
        twistml's filtering and / or preprocessing steps) into count
        vector features (e.g. bag of words or n-grams).

        <Notes>

        References
        ----------

        Example Usage
        -------------
        ::
            import twistml as tml

            filepaths = tml.find_files('c:/data/')
            snt = tml.features.SentimentTransformer(???)
            features = snt.transform(filepaths)

    """  # NOQA

    def __init__(self, **kwargs):
        r"""Initialize the SentimentTransformer

        Parameters
        ----------


        References
        ----------

        """  # NOQA

##        # extract the arguments that are meant for CountVectorizer()
##        cvargs = {key: value for key, value in kwargs.iteritems()
##                  if key in CountVectorizer.__init__.func_code.co_varnames}
##        # extract the arguments that are meant for combine_tweets()
##        self.tweetargs = {key: value for key, value in kwargs.iteritems()
##                          if key in combine_tweets.func_code.co_varnames}
##        self.vectorizer = CountVectorizer(**cvargs)
##
##        self.tfidf = use_tfidf
##        # sanity check
##        d = {key: value for key, value in kwargs.iteritems()
##             if key not in CountVectorizer.__init__.func_code.co_varnames and
##                key not in combine_tweets.func_code.co_varnames}  # NOQA
##        if d:
##            raise ValueError('unknown keyword arg(s): {}'.format(d.keys))
##        pass

    def transform(self, filepaths):
        r"""Transforms twitter data in files into a dict mapping
            datestamps to sentiment vectors.



            Parameters
            ----------
            filepaths : list(str)
                A list of files that contain tweets in the typical
                format (dict[str, str]) as generated by the filtering
                and / or preprocessing functions in twistml.

            Returns
            -------
            daily_sents : dict[datetime, ndarray]
                A dict mapping datestamps to sentiment vectors in
                numpy ndarray format.

        """

##        combined = combine_tweets(filepaths, **self.tweetargs)
##        datestamps, tweets = combined.keys(), combined.values()
##        del(combined)
##
##        t0 = time()
##        print "Vectorizing...",
##        counts = self.vectorizer.fit_transform(tweets)
##        if self.tfidf:
##            tfidf = TfidfTransformer()
##            counts = tfidf.fit_transform(counts)
##        print " done in  {0:.1f}min.".format((time()-t0)/60)
##
##        return dict(izip(datestamps, counts))

    pass
