calibre-devs team mailing list archive
-
calibre-devs team
-
Mailing list archive
-
Message #00188
[Merge] lp:~miurahr/calibre/experimental-recipes into lp:calibre
Hiroshi Miura has proposed merging lp:~miurahr/calibre/experimental-recipes into lp:calibre.
Requested reviews:
Kovid Goyal (kovid)
add more recipes
- experimental paper.li recipes(en)
* Wikileaks
* #osm
- Ajiajin tech news from Asia and Japan(en)
- National Geographic News(en)
- National Geographic Japanese news(ja)
- Blog: cute Cat photo blog (ja)
fix some recipes
- Nikkei social (title typo)
--
https://code.launchpad.net/~miurahr/calibre/experimental-recipes/+merge/43470
Your team calibre developers is subscribed to branch lp:~miurahr/calibre/experimental-recipes.
=== added file 'resources/recipes/ajiajin.recipe'
--- resources/recipes/ajiajin.recipe 1970-01-01 00:00:00 +0000
+++ resources/recipes/ajiajin.recipe 2010-12-12 13:39:53 +0000
@@ -0,0 +1,24 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@xxxxxxxxx>'
+'''
+ajiajin.com/blog
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AjiajinBlog(BasicNewsRecipe):
+ title = u'Ajiajin blog'
+ __author__ = 'Hiroshi Miura'
+ oldest_article = 5
+ publication_type = 'blog'
+ max_articles_per_feed = 100
+ description = 'The next generation internet trends in Japan and Asia'
+ publisher = ''
+ category = 'internet, asia, japan'
+ language = 'en'
+ encoding = 'utf-8'
+
+ feeds = [(u'blog', u'http://feeds.feedburner.com/Asiajin')]
+
+
=== added file 'resources/recipes/kahokushinpo.recipe'
--- resources/recipes/kahokushinpo.recipe 1970-01-01 00:00:00 +0000
+++ resources/recipes/kahokushinpo.recipe 2010-12-12 13:39:53 +0000
@@ -0,0 +1,32 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@xxxxxxxxx>'
+'''
+www.kahoku.co.jp
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class KahokuShinpoNews(BasicNewsRecipe):
+ title = u'\u6cb3\u5317\u65b0\u5831'
+ __author__ = 'Hiroshi Miura'
+ oldest_article = 2
+ max_articles_per_feed = 20
+ description = 'Tohoku regional news paper in Japan'
+ publisher = 'Kahoku Shinpo Sha'
+ category = 'news, japan'
+ language = 'ja'
+ encoding = 'Shift_JIS'
+ no_stylesheets = True
+
+ feeds = [(u'news', u'http://www.kahoku.co.jp/rss/index_thk.xml')]
+
+ keep_only_tags = [ dict(id="page_title"),
+ dict(id="news_detail"),
+ dict(id="bt_title"),
+ {'class':"photoLeft"},
+ dict(id="bt_body")
+ ]
+ remove_tags = [ {'class':"button"}]
+
=== added file 'resources/recipes/nationalgeographic.recipe'
--- resources/recipes/nationalgeographic.recipe 1970-01-01 00:00:00 +0000
+++ resources/recipes/nationalgeographic.recipe 2010-12-12 13:39:53 +0000
@@ -0,0 +1,38 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@xxxxxxxxx>'
+'''
+nationalgeographic.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class NationalGeographicNews(BasicNewsRecipe):
+ title = u'National Geographic News'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ remove_javascript = True
+ no_stylesheets = True
+ use_embedded_content = False
+
+ feeds = [(u'news', u'http://feeds.nationalgeographic.com/ng/News/News_Main')]
+
+ remove_tags_before = dict(id='page_head')
+ remove_tags_after = [dict(id='social_buttons'),{'class':'aside'}]
+ remove_tags = [
+ {'class':'hidden'}
+
+ ]
+
+ def parse_feeds(self):
+ feeds = BasicNewsRecipe.parse_feeds(self)
+ for curfeed in feeds:
+ delList = []
+ for a,curarticle in enumerate(curfeed.articles):
+ if re.search(r'ads\.pheedo\.com', curarticle.url):
+ delList.append(curarticle)
+ if len(delList)>0:
+ for d in delList:
+ index = curfeed.articles.index(d)
+ curfeed.articles[index:index+1] = []
+ return feeds
=== added file 'resources/recipes/nationalgeographicjp.recipe'
--- resources/recipes/nationalgeographicjp.recipe 1970-01-01 00:00:00 +0000
+++ resources/recipes/nationalgeographicjp.recipe 2010-12-12 13:39:53 +0000
@@ -0,0 +1,20 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@xxxxxxxxx>'
+'''
+nationalgeographic.co.jp
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class NationalGeoJp(BasicNewsRecipe):
+ title = u'\u30ca\u30b7\u30e7\u30ca\u30eb\u30fb\u30b8\u30aa\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30cb\u30e5\u30fc\u30b9'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ no_stylesheets = True
+
+ feeds = [(u'news', u'http://www.nationalgeographic.co.jp/news/rss.php')]
+
+ def print_version(self, url):
+ return re.sub(r'news_article.php','news_printer_friendly.php', url)
+
=== modified file 'resources/recipes/nikkei_sub_shakai.recipe'
--- resources/recipes/nikkei_sub_shakai.recipe 2010-12-02 15:46:25 +0000
+++ resources/recipes/nikkei_sub_shakai.recipe 2010-12-12 13:39:53 +0000
@@ -10,8 +10,8 @@
from calibre.ptempfile import PersistentTemporaryFile
-class NikkeiNet_sub_life(BasicNewsRecipe):
- title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
+class NikkeiNet_sub_shakai(BasicNewsRecipe):
+ title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Social)'
__author__ = 'Hiroshi Miura'
description = 'News and current market affairs from Japan'
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
=== added file 'resources/recipes/paperli.recipe'
--- resources/recipes/paperli.recipe 1970-01-01 00:00:00 +0000
+++ resources/recipes/paperli.recipe 2010-12-12 13:39:53 +0000
@@ -0,0 +1,58 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@xxxxxxxxx>'
+'''
+paperli
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre import strftime
+import re, sys
+
+class paperli(BasicNewsRecipe):
+#-------------------please change here ----------------
+ paperli_tag = 'osm'
+ title = u'The # osm Daily - paperli'
+#-------------------------------------------------------------
+ base_url = 'http://paper.li'
+ index = '/tag/'+paperli_tag+'/~list'
+
+ __author__ = 'Hiroshi Miura'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ description = 'paper.li page'
+ publisher = 'paper.li'
+ category = 'paper.li'
+ language = 'en'
+ encoding = 'utf-8'
+ remove_javascript = True
+ timefmt = '[%y/%m/%d]'
+
+ def parse_index(self):
+ feeds = []
+ newsarticles = []
+ topic = 'HEADLINE'
+
+ #for pages
+ page = self.index
+ while True:
+ soup = self.index_to_soup(''.join([self.base_url,page]))
+ for itt in soup.findAll('div',attrs={'class':'yui-u'}):
+ itema = itt.find('a',href=True,attrs={'class':'ts'})
+ if itema is not None:
+ itemd = itt.find('div',text=True, attrs={'class':'text'})
+ newsarticles.append({
+ 'title' :itema.string
+ ,'date' :strftime(self.timefmt)
+ ,'url' :itema['href']
+ ,'description':itemd.string
+ })
+
+ nextpage = soup.find('div',attrs={'class':'pagination_top'}).find('li', attrs={'class':'next'})
+ if nextpage is not None:
+ page = nextpage.find('a', href=True)['href']
+ else:
+ break
+
+ feeds.append((topic, newsarticles))
+ return feeds
+
=== added file 'resources/recipes/paperli_topic.recipe'
--- resources/recipes/paperli_topic.recipe 1970-01-01 00:00:00 +0000
+++ resources/recipes/paperli_topic.recipe 2010-12-12 13:39:53 +0000
@@ -0,0 +1,59 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@xxxxxxxxx>'
+'''
+paperli
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre import strftime
+import re
+
+class paperli_topics(BasicNewsRecipe):
+#-------------------please change here ----------------
+ paperli_tag = 'wikileaks'
+ title = u'The # wikileaks Daily - paperli'
+#-------------------------------------------------------------
+ __author__ = 'Hiroshi Miura'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ description = 'paper.li page about '+ paperli_tag
+ publisher = 'paper.li'
+ category = 'paper.li'
+ language = 'en'
+ encoding = 'utf-8'
+ remove_javascript = True
+ masthead_title = u'The '+ paperli_tag +' Daily'
+ timefmt = '[%y/%m/%d]'
+ base_url = 'http://paper.li'
+ index = base_url+'/tag/'+paperli_tag
+
+
+ def parse_index(self):
+
+ # get topics
+ topics = []
+ soup = self.index_to_soup(self.index)
+ topics_lists = soup.find('div',attrs={'class':'paper-nav-bottom'})
+ for item in topics_lists.findAll('li', attrs={'class':""}):
+ itema = item.find('a',href=True)
+ topics.append({'title': itema.string, 'url': itema['href']})
+
+ #get feeds
+ feeds = []
+ for topic in topics:
+ newsarticles = []
+ soup = self.index_to_soup(''.join([self.base_url, topic['url'] ]))
+ topstories = soup.findAll('div',attrs={'class':'yui-u'})
+ for itt in topstories:
+ itema = itt.find('a',href=True,attrs={'class':'ts'})
+ if itema is not None:
+ itemd = itt.find('div',text=True, attrs={'class':'text'})
+ newsarticles.append({
+ 'title' :itema.string
+ ,'date' :strftime(self.timefmt)
+ ,'url' :itema['href']
+ ,'description':itemd.string
+ })
+ feeds.append((topic['title'], newsarticles))
+ return feeds
+
=== added file 'resources/recipes/uninohimitu.recipe'
--- resources/recipes/uninohimitu.recipe 1970-01-01 00:00:00 +0000
+++ resources/recipes/uninohimitu.recipe 2010-12-12 13:39:53 +0000
@@ -0,0 +1,36 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@xxxxxxxxx>'
+'''
+http://ameblo.jp/sauta19/
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class UniNoHimituKichiBlog(BasicNewsRecipe):
+ title = u'Uni secret base'
+ __author__ = 'Hiroshi Miura'
+ oldest_article = 2
+ publication_type = 'blog'
+ max_articles_per_feed = 20
+ description = 'Japanese famous Cat blog'
+ publisher = ''
+ category = 'cat, pet, japan'
+ language = 'ja'
+ encoding = 'utf-8'
+
+ feeds = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/sauta19/rss20.xml')]
+
+ def parse_feeds(self):
+ feeds = BasicNewsRecipe.parse_feeds(self)
+ for curfeed in feeds:
+ delList = []
+ for a,curarticle in enumerate(curfeed.articles):
+ if re.search(r'rssad.jp', curarticle.url):
+ delList.append(curarticle)
+ if len(delList)>0:
+ for d in delList:
+ index = curfeed.articles.index(d)
+ curfeed.articles[index:index+1] = []
+ return feeds
+
Follow ups