3 # Nyaa Comments RSS Generator
4 # Copyright (c) 2018 Adrian I Lam <me@adrianiainlam.tk>
6 # Permission is hereby granted, free of charge, to any person obtaining
7 # a copy of this software and associated documentation files (the
8 # "Software"), to deal in the Software without restriction, including
9 # without limitation the rights to use, copy, modify, merge, publish,
10 # distribute, sublicense, and/or sell copies of the Software, and to
11 # permit persons to whom the Software is furnished to do so, subject to
12 # the following conditions:
14 # The above copyright notice and this permission notice shall be included
15 # in all copies or substantial portions of the Software.
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 from http
.server
import BaseHTTPRequestHandler
, HTTPServer
27 from socketserver
import ThreadingMixIn
33 from feedgen
.feed
import FeedGenerator
34 import AdvancedHTMLParser
38 class NyaaComments(BaseHTTPRequestHandler
):
42 self
.send_response(200)
43 self
.send_header('Content-type', 'text/plain')
45 self
.wfile
.write(bytes('''
46 Welcome to Nyaa Comments RSS Generator.
48 DISCLAIMER: This site is not affiliated with Nyaa.si
52 - Get your torrent number
53 For example, https://nyaa.si/view/1002779 -> number is 1002779
54 - Your feed URL is https://nyaacomments.tk/1002779
56 Append 's' before the number
57 For example, https://nyaacomments.tk/s1002779
59 Bug reports welcome at <me@adrianiainlam.tk>, or on
60 <https://github.com/adrianiainlam/nyaa-comments-rss-generator>.
62 IMPORTANT: Please avoid updating your feeds too often. I don't mind having
63 my server flooded, but the nyaa.si guys certainly might.
65 #nyaa-dev@Rizon, 2018-02-08T23:22:36Z
66 <ail30> hi, nyaa devs / host. I'm the one who recently added a feature
67 request for comment RSS, which was rejected. I'm wondering if you
68 guys would be okay if I write my own (external) feed generator?
69 Or would you guys not be happy with a bot crawling your site?
70 <@Koala> generally speaking, if we don't notice the bot, we don't care.
71 Just make sure it doesn't spam the site with requests.
72 <~Aureolin> As long as you don't hit any sort of rate limiting we don't
74 <ail30> ok thank you :)
80 if self
.path
[1] == 's':
82 nyaaid
= int(self
.path
[2:])
84 nyaaid
= int(self
.path
[1:])
86 self
.send_response(404)
87 self
.send_header('Content-type', 'text/plain')
89 self
.wfile
.write(bytes('Error: Not a valid torrent number', 'utf-8'))
93 url
= "https://sukebei.nyaa.si/view/" + str(nyaaid
)
95 url
= "https://nyaa.si/view/" + str(nyaaid
)
96 useragent
= "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:58.0) Gecko/20100101 Firefox/58.0"
97 req
= requests
.get(url
, headers
={"user-agent": useragent
})
99 if req
.status_code
!= 200:
100 # return error page with upstream error code
101 self
.send_response(200)
102 self
.send_header('Content-type', 'text/plain')
104 self
.wfile
.write(bytes('Nyaa returns HTTP status ' + str(req
.status_code
), 'utf-8'))
107 parser
= AdvancedHTMLParser
.AdvancedHTMLParser()
108 parser
.parseStr(req
.text
)
112 fg
.link(href
=url
, rel
='alternate')
113 htmltitle
= parser
.getElementsByTagName('title')[0].innerHTML
114 fg
.title('Comments for ' + htmltitle
)
116 fg
.link(href
='https://nyaacomments.tk' + self
.path
, rel
='self')
121 cmt
= parser
.getElementById('com-' + str(i
))
124 authortag
= cmt
.filter(tagname
='a', href__contains
='/user/')[0]
125 author
= authortag
.href
.replace('/user/', '')
126 link
= url
+ "#com-" + str(i
)
128 tsanchor
= cmt
.getElementsByAttr('href', '#com-' + str(i
))[0]
129 timestamp
= int(tsanchor
.getChildren()[0].getAttribute('data-timestamp'))
130 content
= cmt
.getElementsByClassName('comment-content')[0].innerHTML
134 fe
.title('Comment by ' + author
+ ' on ' + htmltitle
)
135 fe
.author({'name': author
})
136 fe
.pubdate(datetime
.datetime
.fromtimestamp(timestamp
, datetime
.timezone
.utc
))
137 fe
.updated(datetime
.datetime
.fromtimestamp(timestamp
, datetime
.timezone
.utc
))
138 fe
.link(href
=link
, rel
='alternate')
139 fe
.content(content
, type='html')
144 #set feed last update time to publish time of last comment
145 if timestamp
is not None:
146 fg
.updated(datetime
.datetime
.fromtimestamp(timestamp
, datetime
.timezone
.utc
))
147 #print(fg.atom_str(pretty=True).decode('utf-8'))
151 self
.send_response(200)
152 self
.send_header('Content-type', 'application/atom+xml')
155 self
.wfile
.write(bytes(fg
.atom_str(pretty
=True).decode('utf-8').replace('&','&').replace(' ','<br>'), 'utf-8'))
162 class ThreadedHTTPServer(ThreadingMixIn
, HTTPServer
):
165 if __name__
== '__main__':
166 server
= ThreadedHTTPServer(('localhost', 2800), NyaaComments
)
167 server
.serve_forever()