calibre-devs team mailing list archive
-
calibre-devs team
-
Mailing list archive
-
Message #00186
[Merge] lp:~miurahr/calibre/experimental-recipes into lp:calibre
Hiroshi Miura has proposed merging lp:~miurahr/calibre/experimental-recipes into lp:calibre.
Requested reviews:
Kovid Goyal (kovid)
maintenance of recipes
- fix several minor bug in Japanese recipes
- Add Toyokezai, east asia economics magazine
--
https://code.launchpad.net/~miurahr/calibre/experimental-recipes/+merge/43092
Your team calibre developers is subscribed to branch lp:~miurahr/calibre/experimental-recipes.
=== modified file 'resources/recipes/mainichi_it_news.recipe'
--- resources/recipes/mainichi_it_news.recipe 2010-12-02 15:43:59 +0000
+++ resources/recipes/mainichi_it_news.recipe 2010-12-08 15:06:26 +0000
@@ -1,4 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe
+import re
class MainichiDailyITNews(BasicNewsRecipe):
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
@@ -14,6 +15,7 @@
remove_tags_before = {'class':"NewsTitle"}
remove_tags = [{'class':"RelatedArticle"}]
+ remove_tags_after = {'class':"Credit"}
def parse_feeds(self):
@@ -29,4 +31,4 @@
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
- return feeds remove_tags_after = {'class':"Credit"}
+ return feeds
=== modified file 'resources/recipes/the_h.recipe'
--- resources/recipes/the_h.recipe 2010-11-23 16:04:13 +0000
+++ resources/recipes/the_h.recipe 2010-12-08 15:06:26 +0000
@@ -14,7 +14,7 @@
oldest_article = 3
description = 'In association with Heise Online'
publisher = 'Heise Media UK Ltd.'
- category = 'news, technology, security'
+ category = 'news, technology, security, OSS, internet'
max_articles_per_feed = 100
language = 'en'
encoding = 'utf-8'
@@ -27,6 +27,12 @@
feeds = [
(u'The H News Feed', u'http://www.h-online.com/news/atom.xml')
]
+ cover_url = 'http://www.h-online.com/icons/logo_theH.gif'
+
+ remove_tags = [
+ dict(id="logo"),
+ dict(id="footer")
+ ]
def print_version(self, url):
return url + '?view=print'
=== added file 'resources/recipes/toyokeizai.recipe'
--- resources/recipes/toyokeizai.recipe 1970-01-01 00:00:00 +0000
+++ resources/recipes/toyokeizai.recipe 2010-12-08 15:06:26 +0000
@@ -0,0 +1,68 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@xxxxxxxxx>'
+'''
+www.toyokeizai.net
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Toyokeizai(BasicNewsRecipe):
+ title = u'ToyoKeizai News'
+ __author__ = 'Hiroshi Miura'
+ oldest_article = 1
+ max_articles_per_feed = 50
+ description = 'Japanese traditional economy and business magazine, only for advanced subscribers supported'
+ publisher = 'Toyokeizai Shinbun Sha'
+ category = 'economy, magazine, japan'
+ language = 'ja'
+ encoding = 'euc-jp'
+ index = 'http://member.toyokeizai.net/news/'
+ remove_javascript = True
+ no_stylesheets = True
+ masthead_title = u'TOYOKEIZAI'
+ needs_subscription = True
+ timefmt = '[%y/%m/%d]'
+ recursions = 5
+ match_regexps =[ r'page/\d+']
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':['news']}),
+ dict(name='div', attrs={'class':["news_cont"]}),
+ dict(name='div', attrs={'class':["news_con"]}),
+# dict(name='div', attrs={'class':["norightsMessage"]})
+ ]
+ remove_tags = [{'class':"mt35 mgz"},
+ {'class':"mt20 newzia"},
+ {'class':"mt20 fontS"},
+ {'class':"bk_btn_m"},
+ dict(id='newzia_connect_member')
+ ]
+
+ def parse_index(self):
+ feeds = []
+ soup = self.index_to_soup(self.index)
+ topstories = soup.find('ul',attrs={'class':'list6'})
+ if topstories:
+ newsarticles = []
+ for itt in topstories.findAll('li'):
+ itema = itt.find('a',href=True)
+ itemd = itt.find('span')
+ newsarticles.append({
+ 'title' :itema.string
+ ,'date' :re.compile(r"\- ").sub("",itemd.string)
+ ,'url' :'http://member.toyokeizai.net' + itema['href']
+ ,'description':itema['title']
+ })
+ feeds.append(('news', newsarticles))
+ return feeds
+
+ def get_browser(self):
+ br = BasicNewsRecipe.get_browser()
+ if self.username is not None and self.password is not None:
+ br.open('http://member.toyokeizai.net/norights/form/')
+ br.select_form(nr=0)
+ br['kaiin_id'] = self.username
+ br['password'] = self.password
+ res = br.submit()
+ return br
Follow ups