RSS feeds

RSS feeds Reader GUI

The last post mentions about retrieving RSS feeds. To allow easy viewing, a GUI is constructed. The GUI is built using wxpython and consists of few adjustable pane with scrolling enabled. The user can choose to display the different group (eg: “World” and “SG” news) in separate panels.

For live updates, a wx.timer function is added to the GUI so the data can update every x time specified by the users. This post highlights the use of wx MultiSplitterWindow, scrollable panels and wx.timer for feeds live updates.

feeds_watcher

import os, sys, re, time
import wx
from wx.lib.splitter import MultiSplitterWindow
from General_feed_extract import FeedsReader
import  wx.lib.scrolledpanel as scrolled

class SamplePane(scrolled.ScrolledPanel):
    """
    Just a simple test window to put into the splitter.
    Set to scrollable, set to word wrap
    """
    def __init__(self, parent, label):
        scrolled.ScrolledPanel.__init__(self, parent,style = wx.BORDER_SUNKEN)
        #self.SetBackgroundColour(colour)
        self.textbox = wx.TextCtrl(self, -1, label,style=wx.TE_MULTILINE )
        vbox = wx.BoxSizer(wx.VERTICAL)
        vbox.Add(self.textbox, 1, wx.ALIGN_LEFT | wx.ALL|wx.EXPAND, 5)
        self.SetSizer(vbox)
        self.SetAutoLayout(1)
        self.SetupScrolling()

        self.SetupScrolling()
    def SetOtherLabel(self, label):
        self.textbox.SetValue(label)
        self.SetupScrolling()

class MyPanel(wx.Panel):
    def __init__(self, parent):
        wx.Panel.__init__(self, parent, -1)
        self.parent = parent

        ## Add in the feeds parameters
        self.reader = FeedsReader()

        ## Add in timer
        self.timer = wx.Timer(self)
        self.Bind(wx.EVT_TIMER, self.on_timer_update_feeds, self.timer)
        self.timer.Start(30000) # start timer after a delay, time in milli sec

        splitter = MultiSplitterWindow(self, style=wx.SP_LIVE_UPDATE)
        self.splitter = splitter
        sizer = wx.BoxSizer(wx.HORIZONTAL)
        sizer.Add(splitter, 1, wx.EXPAND)
        self.SetSizer(sizer)

        self.world_news_panel = SamplePane(splitter, "Panel One")
        splitter.AppendWindow(self.world_news_panel, 140)

        self.SG_panel = SamplePane(splitter, "Panel Two")
        #self.SG_panel.SetMinSize(self.SG_panel.GetBestSize())
        splitter.AppendWindow(self.SG_panel, 180)

        self.others_panel = SamplePane(splitter,  "Panel Three")
        splitter.AppendWindow(self.others_panel, 105)

        ## Set the orientation
        self.splitter.SetOrientation(wx.VERTICAL)

        ## Updates the panel
        self.update_panels()

    def get_feeds(self):
        """ Run the get feeds class. Use for getting updates of the feeds.

        """
        self.reader.parse_rss_sites_by_cat()

    def update_panels(self):
        """ Update all the panels with the updated feeds.
            Can use the set other label method

        """
        self.get_feeds()
        self.update_SG_panel()
        self.update_world_panel()

    def update_world_panel(self):
        """ Update World_panel on the World news.

        """
        date_key = self.reader.set_last_desired_date(0)
        if self.reader.rss_results_dict_by_cat['World'].has_key(date_key):
            World_news_list = self.reader.rss_results_dict_by_cat['World'][date_key]
            World_news_str = '\n********************\n'.join(['\n'.join(n) for n in World_news_list])
            self.world_news_panel.SetOtherLabel(World_news_str)

    def update_SG_panel(self):
        """ Update SG_panel on the Singapore stock news.

        """
        date_key = self.reader.set_last_desired_date(0)
        if self.reader.rss_results_dict_by_cat['SG'].has_key(date_key):
            SG_news_list = self.reader.rss_results_dict_by_cat['SG'][date_key]
            SG_news_str = '\n********************\n'.join(['\n'.join(n) for n in SG_news_list])
            self.SG_panel.SetOtherLabel(SG_news_str)

    def on_timer_update_feeds(self,evt):
        """ Update feeds once timer reach.
        """
        print 'Updating....'
        self.update_panels()

    def SetLiveUpdate(self, enable):
        if enable:
            self.splitter.SetWindowStyle(wx.SP_LIVE_UPDATE)
        else:
            self.splitter.SetWindowStyle(0)

class MyFrame(wx.Frame):
    def __init__(self, parent, ID, title):      

        wx.Frame.__init__(self, parent, ID, title,pos=(150, 20), size=(850, 720))#size and position

        self.top_panel = MyPanel(self)

class MyApp(wx.App):
    def __init__(self):
        wx.App.__init__(self,redirect =False)
        self.frame= MyFrame(None,wx.ID_ANY, "Feeds Watcher")
        self.SetTopWindow(self.frame)

        self.frame.Show()

def run():
    try:
        app = MyApp()
        app.MainLoop()
    except Exception,e:
        print e
        del app

if __name__== "__main__":
    run()

The following links contains information on setting up scroll bars in wx and also working with wx.timers.

  1. wx scroll bar help
  2. wx timers

Get RSS feeds using python pattern

Python Pattern allows easy way to retrieve RSS feeds. The following script will act as a feeds reader and retrieve feeds from various sites, focusing on world news and related Singapore stock market in this example.

The pattern module has the NewsFeed() function that can take in RSS url and output the corresponding results.  The following is the description of the Newsfeed object from the pattern website “The Newsfeed object is a wrapper for Mark Pilgrim’s Universal Feed Parser. Newsfeed.search() takes the URL of an RSS or Atom news feed and returns a list of Result objects.”

This will return object that has the following attributes title, link and desc. The script below takes in a dict with the different categories as key. The value are the list of RSS url belonging to that category. The script will output results in the form of dict of categories and results of each category are segregated by date key. This script allows consolidation of different feeds from various RSS sources enabling the user to further process the feeds. The printing of the feeds can be limited by the set_last_desired_date() which display only results from a certain date.

import os, re, sys, time, datetime, copy, calendar
from pattern.web import URL, extension, cache, plaintext, Newsfeed

class FeedsReader(object):
    def __init__(self):

        #For grouping to various category
        self.rss_sites_by_category_dict = {
                                            'SG':   [
                                                        'http://feeds.theedgemarkets.com/theedgemarkets/sgtopstories.rss',
                                                        'http://feeds.theedgemarkets.com/theedgemarkets/sgmarkets.rss',
                                                        'http://feeds.theedgemarkets.com/theedgemarkets/sgproperty.rss',
                                                      ],
                                            'World':[
                                                        'http://www.ft.com/rss/home/asia',
                                                        'http://rss.cnn.com/rss/money_news_economy.rss',
                                                        'http://feeds.reuters.com/reuters/businessNews',
                                                      ],
                                            }
        self.rss_sites = []

        ## num of feeds to parse_per_site
        self.num_feeds_parse_per_site = 100

        ## individual group storage of feeds.
        self.rss_results_dict = {} # dict with date as key
        self.rss_title_list = []

        ## full results set consist of category
        self.rss_results_dict_by_cat ={} # dict of dict
        self.rss_title_list_by_cat = {}  # dict of list

    def set_rss_sites(self, rss_site_urls):
        """ Set to self.rss_sites.
            Args:
                rss_site_urls (list): list of rss site url for getting feeds.
        """
        self.rss_sites = rss_site_urls

    def convert_date_str_to_date_key(self, date_str):
        """ Convert the date str given by twiiter [created_at] to date key in format YYYY-MM-DD.
            Args:
                date_str (str): date str in format given by twitter. 'Mon Sep 29 07:00:10 +0000 2014'
            Returns:
                (int): date key in format YYYYMMDD
        """
        date_list = date_str.split()

        month_dict = {v: '0'+str(k) for k,v in enumerate(calendar.month_abbr) if k <10}
        month_dict.update({v:str(k) for k,v in enumerate(calendar.month_abbr) if k >=10})

        return int(date_list[3] + month_dict[date_list[2]] + date_list[1])

    def parse_rss_sites(self):
        """ Function to parse the RSS sites.
            Results are stored in self.rss_results_dict with date as key.
        """
        self.rss_results_dict = {}
        self.rss_title_list = []

        cache.clear()

        for rss_site_url in self.rss_sites:
            print "processing: ", rss_site_url
            for result in Newsfeed().search(rss_site_url)[:self.num_feeds_parse_per_site]:
                date_key = self.convert_date_str_to_date_key(result.date)
                self.rss_title_list.append(result.title)
                if self.rss_results_dict.has_key(date_key):
                    self.rss_results_dict[date_key].append([result.title,  plaintext(result.text)])
                else:
                    self.rss_results_dict[date_key] = [[result.title,  plaintext(result.text)]]
        print 'done'

    def parse_rss_sites_by_cat(self):
        """ Iterate over the list of categories and parse the list of rss sites.
        """
        self.rss_results_dict_by_cat ={} # dict of dict
        self.rss_title_list_by_cat = {}  # dict of list

        for cat in self.rss_sites_by_category_dict:
            print 'Processing Category: ', cat
            self.set_rss_sites(self.rss_sites_by_category_dict[cat])
            self.parse_rss_sites()
            self.rss_results_dict_by_cat[cat] = self.rss_results_dict
            self.rss_title_list_by_cat[cat] = self.rss_title_list

    def set_last_desired_date(self, num_days = 0):
        """ Return the last date in which the results will be displayed.
            It is set to be the current date - num of days as set by users.
            Affect only self.print_feeds function.
            Kwargs:
                num_days (int): num of days prior to the current date.
                Setting to 0 will only retrieve the current date
            Returns:
                (int): datekey as yyyyymmdd.
        """
        last_eff_date_list = list((datetime.date.today() - datetime.timedelta(num_days)).timetuple()[0:3])

        if len(str(last_eff_date_list[1])) == 1:
            last_eff_date_list[1] = '0' + str(last_eff_date_list[1])

        return int(str(last_eff_date_list[0]) + last_eff_date_list[1] + str(last_eff_date_list[2]))

    def print_feeds(self, rss_results_dict):
        """ Print the RSS data results. Required the self.rss_results_dict.
            Args:
                rss_results_dict (dict): dict containing date as key and title, desc as value.
        """
        for n in rss_results_dict.keys():
            print 'Results of date: ', n
            dataset = rss_results_dict[n]
            if int(n) >= self.set_last_desired_date():
                print '===='*10
                for title,desc in dataset:
                    print title
                    print desc
                    print '--'*5
                    print

    def print_feeds_for_all_cat(self):
        """ Print feeds for all the category specified by the self.rss_results_dict_by_cat

        """
        for cat in self.rss_results_dict_by_cat:
            print 'Printing Category: ', cat
            self.print_feeds(self.rss_results_dict_by_cat[cat])
            print
            print "####"*18

if __name__ == '__main__':
        f = FeedsReader()
        f.parse_rss_sites_by_cat()
        print '=='*19
        f.print_feeds_for_all_cat()

The results are as followed:

Processing Category: World
processing: http://www.ft.com/rss/home/asia
processing: http://rss.cnn.com/rss/money_news_economy.rss
processing: http://feeds.reuters.com/reuters/businessNews
done
Processing Category: SG
processing: http://feeds.theedgemarkets.com/theedgemarkets/sgtopstories.rss
processing: http://feeds.theedgemarkets.com/theedgemarkets/sgmarkets.rss
processing: http://feeds.theedgemarkets.com/theedgemarkets/sgproperty.rss
done
======================================

Printing Category: World
Results of date: 20150126
Results of date: 20150127
========================================
China seeks end to gold medal fixation
‘Blind pursuit’ of success condemned as sports administrator scraps rewards for victory
———-

Tsipras poised to unveil new Greek cabinet
Athens and international creditors dig in on Greek debt
———-

EU threatens Russia with more sanctions
Call comes as violence in eastern Ukraine escalates
———-

……..

Printing Category: SG
Results of date: 20150127
========================================
Singapore shares higher; ComfortDelGro shines on broker upgrade
SINGAPORE (Jan 27): Gains in most Asian markets helped lift Singapore shares, with much of the buying centred on penny stocks.

Gainers outnumbered decliners 267 to 187, with some 1.84 billion shares worth $1.49 billion shares changin…
———-

Job vacancies in Singapore up 8.9%
SINGAPORE (Jan 27): The number of job vacancies in Singapore swelled to 67,400 in the year to September 2014, from 61,900 the previous year, with the services industry in greatest need of workers.

The bulk of the vacancies was from c…