← Back to team overview

calibre-devs team mailing list archive

[Merge] lp:~miurahr/calibre/experimental-recipes into lp:calibre

 

Hiroshi Miura has proposed merging lp:~miurahr/calibre/experimental-recipes into lp:calibre.

Requested reviews:
  Kovid Goyal (kovid)


maintenance of recipes

  - fix several minor bug in Japanese recipes
  - Add Toyokezai, east asia economics magazine

-- 
https://code.launchpad.net/~miurahr/calibre/experimental-recipes/+merge/43092
Your team calibre developers is subscribed to branch lp:~miurahr/calibre/experimental-recipes.
=== modified file 'resources/recipes/mainichi_it_news.recipe'
--- resources/recipes/mainichi_it_news.recipe	2010-12-02 15:43:59 +0000
+++ resources/recipes/mainichi_it_news.recipe	2010-12-08 15:06:26 +0000
@@ -1,4 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+import re
 
 class MainichiDailyITNews(BasicNewsRecipe):
     title          = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
@@ -14,6 +15,7 @@
 
     remove_tags_before = {'class':"NewsTitle"}
     remove_tags = [{'class':"RelatedArticle"}]
+    remove_tags_after = {'class':"Credit"}
 
     def parse_feeds(self):
 
@@ -29,4 +31,4 @@
                     index = curfeed.articles.index(d)
                     curfeed.articles[index:index+1] = []
 
-        return feeds   remove_tags_after = {'class':"Credit"}
+        return feeds

=== modified file 'resources/recipes/the_h.recipe'
--- resources/recipes/the_h.recipe	2010-11-23 16:04:13 +0000
+++ resources/recipes/the_h.recipe	2010-12-08 15:06:26 +0000
@@ -14,7 +14,7 @@
     oldest_article = 3
     description    = 'In association with Heise Online'
     publisher      = 'Heise Media UK Ltd.'
-    category       = 'news, technology, security'
+    category       = 'news, technology, security, OSS, internet'
     max_articles_per_feed = 100
     language       = 'en'
     encoding       = 'utf-8'
@@ -27,6 +27,12 @@
     feeds          = [
                       (u'The H News Feed', u'http://www.h-online.com/news/atom.xml')
                      ]
+    cover_url = 'http://www.h-online.com/icons/logo_theH.gif'
+
+    remove_tags = [
+                        dict(id="logo"),
+                        dict(id="footer")
+                        ]
 
     def print_version(self, url):
         return url + '?view=print'

=== added file 'resources/recipes/toyokeizai.recipe'
--- resources/recipes/toyokeizai.recipe	1970-01-01 00:00:00 +0000
+++ resources/recipes/toyokeizai.recipe	2010-12-08 15:06:26 +0000
@@ -0,0 +1,68 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@xxxxxxxxx>'
+'''
+www.toyokeizai.net
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Toyokeizai(BasicNewsRecipe):
+    title          = u'ToyoKeizai News'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 1
+    max_articles_per_feed = 50
+    description    = 'Japanese traditional economy and business magazine, only for advanced subscribers supported'
+    publisher      = 'Toyokeizai Shinbun Sha'
+    category       = 'economy, magazine, japan'
+    language       = 'ja'
+    encoding       = 'euc-jp'
+    index          = 'http://member.toyokeizai.net/news/'
+    remove_javascript = True
+    no_stylesheets = True
+    masthead_title = u'TOYOKEIZAI'
+    needs_subscription = True
+    timefmt = '[%y/%m/%d]'
+    recursions = 5
+    match_regexps =[ r'page/\d+']
+
+    keep_only_tags = [
+                      dict(name='div', attrs={'class':['news']}),
+                      dict(name='div', attrs={'class':["news_cont"]}),
+                      dict(name='div', attrs={'class':["news_con"]}),
+#                      dict(name='div', attrs={'class':["norightsMessage"]})
+                     ]
+    remove_tags = [{'class':"mt35 mgz"},
+                            {'class':"mt20 newzia"},
+                            {'class':"mt20 fontS"},
+                            {'class':"bk_btn_m"},
+                            dict(id='newzia_connect_member')
+                            ]
+
+    def parse_index(self):
+        feeds = []
+        soup   = self.index_to_soup(self.index)
+        topstories = soup.find('ul',attrs={'class':'list6'})
+        if topstories:
+            newsarticles = []
+            for itt in topstories.findAll('li'):
+                itema = itt.find('a',href=True)
+                itemd = itt.find('span')
+                newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'       :re.compile(r"\- ").sub("",itemd.string)
+                                     ,'url'        :'http://member.toyokeizai.net' + itema['href']
+                                     ,'description':itema['title']
+                                    })
+            feeds.append(('news', newsarticles))
+        return feeds
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://member.toyokeizai.net/norights/form/')
+            br.select_form(nr=0)
+            br['kaiin_id']   = self.username
+            br['password'] = self.password
+            res = br.submit()
+        return br


Follow ups