Source code for nltk.twitter.api

# -*- coding: utf-8 -*-
# Natural Language Toolkit: Twitter API
#
# Copyright (C) 2001-2017 NLTK Project
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
#         Lorenzo Rubio <lrnzcig@gmail.com>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT

"""
This module provides an interface for TweetHandlers, and support for timezone
handling.
"""

from abc import ABCMeta, abstractmethod
from six import add_metaclass
from datetime import tzinfo, timedelta, datetime
from nltk.compat import UTC
import time as _time


[docs]class LocalTimezoneOffsetWithUTC(tzinfo): """ This is not intended to be a general purpose class for dealing with the local timezone. In particular: * it assumes that the date passed has been created using `datetime(..., tzinfo=Local)`, where `Local` is an instance of the object `LocalTimezoneOffsetWithUTC`; * for such an object, it returns the offset with UTC, used for date comparisons. Reference: https://docs.python.org/3/library/datetime.html """ STDOFFSET = timedelta(seconds=-_time.timezone) if _time.daylight: DSTOFFSET = timedelta(seconds=-_time.altzone) else: DSTOFFSET = STDOFFSET
[docs] def utcoffset(self, dt): """ Access the relevant time offset. """ return self.DSTOFFSET
LOCAL = LocalTimezoneOffsetWithUTC() @add_metaclass(ABCMeta)
[docs]class BasicTweetHandler(object): """ Minimal implementation of `TweetHandler`. Counts the number of Tweets and decides when the client should stop fetching them. """ def __init__(self, limit=20): self.limit = limit self.counter = 0 """ A flag to indicate to the client whether to stop fetching data given some condition (e.g., reaching a date limit). """ self.do_stop = False """ Stores the id of the last fetched Tweet to handle pagination. """ self.max_id = None
[docs] def do_continue(self): """ Returns `False` if the client should stop fetching Tweets. """ return self.counter < self.limit and not self.do_stop
[docs]class TweetHandlerI(BasicTweetHandler): """ Interface class whose subclasses should implement a handle method that Twitter clients can delegate to. """ def __init__(self, limit=20, upper_date_limit=None, lower_date_limit=None): """ :param int limit: The number of data items to process in the current\ round of processing. :param tuple upper_date_limit: The date at which to stop collecting\ new data. This should be entered as a tuple which can serve as the\ argument to `datetime.datetime`.\ E.g. `date_limit=(2015, 4, 1, 12, 40)` for 12:30 pm on April 1 2015. :param tuple lower_date_limit: The date at which to stop collecting\ new data. See `upper_data_limit` for formatting. """ BasicTweetHandler.__init__(self, limit) self.upper_date_limit = None self.lower_date_limit = None if upper_date_limit: self.upper_date_limit = datetime(*upper_date_limit, tzinfo=LOCAL) if lower_date_limit: self.lower_date_limit = datetime(*lower_date_limit, tzinfo=LOCAL) self.startingup = True @abstractmethod
[docs] def handle(self, data): """ Deal appropriately with data returned by the Twitter API """
@abstractmethod
[docs] def on_finish(self): """ Actions when the tweet limit has been reached """
[docs] def check_date_limit(self, data, verbose=False): """ Validate date limits. """ if self.upper_date_limit or self.lower_date_limit: date_fmt = '%a %b %d %H:%M:%S +0000 %Y' tweet_date = \ datetime.strptime(data['created_at'], date_fmt).replace(tzinfo=UTC) if (self.upper_date_limit and tweet_date > self.upper_date_limit) or \ (self.lower_date_limit and tweet_date < self.lower_date_limit): if self.upper_date_limit: message = "earlier" date_limit = self.upper_date_limit else: message = "later" date_limit = self.lower_date_limit if verbose: print("Date limit {0} is {1} than date of current tweet {2}".\ format(date_limit, message, tweet_date)) self.do_stop = True