first commit
[nyaa-comments-rss-generator.git] / nyaacomments.py
CommitLineData
57b9114a
AIL
1#!/usr/bin/env python3
2
3# Nyaa Comments RSS Generator
4# Copyright (c) 2018 Adrian I Lam <me@adrianiainlam.tk>
5#
6# Permission is hereby granted, free of charge, to any person obtaining
7# a copy of this software and associated documentation files (the
8# "Software"), to deal in the Software without restriction, including
9# without limitation the rights to use, copy, modify, merge, publish,
10# distribute, sublicense, and/or sell copies of the Software, and to
11# permit persons to whom the Software is furnished to do so, subject to
12# the following conditions:
13#
14# The above copyright notice and this permission notice shall be included
15# in all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
25
26from http.server import BaseHTTPRequestHandler, HTTPServer
27from socketserver import ThreadingMixIn
28import threading
29
30import datetime
31
32
33from feedgen.feed import FeedGenerator
34import AdvancedHTMLParser
35import requests
36
37
38class NyaaComments(BaseHTTPRequestHandler):
39 def do_GET(self):
40 if self.path == '/':
41 # return home page
42 self.send_response(200)
43 self.send_header('Content-type', 'text/plain')
44 self.end_headers()
45 self.wfile.write(bytes('''
46Welcome to Nyaa Comments RSS Generator.
47
48DISCLAIMER: This site is not affiliated with Nyaa.si
49
50To use:
51 For Nyaa:
52 - Get your torrent number
53 For example, https://nyaa.si/view/1002779 -> number is 1002779
54 - Your feed URL is https://nyaacomments.tk/1002779
55 For Sukebei.nyaa:
56 Append 's' before the number
57 For example, https://nyaacomments.tk/s1002779
58
59Bug reports welcome at <me@adrianiainlam.tk>, or on
60<https://github.com/adrianiainlam/nyaa-comments-rss-generator>.
61
62IMPORTANT: Please avoid updating your feeds too often. I don't mind having
63my server flooded, but the nyaa.si guys certainly might.
64
65#nyaa-dev@Rizon, 2018-02-08T23:22:36Z
66<ail30> hi, nyaa devs / host. I'm the one who recently added a feature
67 request for comment RSS, which was rejected. I'm wondering if you
68 guys would be okay if I write my own (external) feed generator?
69 Or would you guys not be happy with a bot crawling your site?
70<@Koala> generally speaking, if we don't notice the bot, we don't care.
71 Just make sure it doesn't spam the site with requests.
72<~Aureolin> As long as you don't hit any sort of rate limiting we don't
73 care.
74<ail30> ok thank you :)
75''', 'utf-8'))
76
77 else:
78 sukebei = False
79 try:
80 if self.path[1] == 's':
81 sukebei = True
82 nyaaid = int(self.path[2:])
83 else:
84 nyaaid = int(self.path[1:])
85 except ValueError:
86 self.send_response(404)
87 self.send_header('Content-type', 'text/plain')
88 self.end_headers()
89 self.wfile.write(bytes('Error: Not a valid torrent number', 'utf-8'))
90 return
91
92 if sukebei:
93 url = "https://sukebei.nyaa.si/view/" + str(nyaaid)
94 else:
95 url = "https://nyaa.si/view/" + str(nyaaid)
96 useragent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:58.0) Gecko/20100101 Firefox/58.0"
97 req = requests.get(url, headers={"user-agent": useragent})
98
99 if req.status_code != 200:
100 # return error page with upstream error code
101 self.send_response(200)
102 self.send_header('Content-type', 'text/plain')
103 self.end_headers()
104 self.wfile.write(bytes('Nyaa returns HTTP status ' + str(req.status_code), 'utf-8'))
105
106 else:
107 parser = AdvancedHTMLParser.AdvancedHTMLParser()
108 parser.parseStr(req.text)
109
110
111 fg = FeedGenerator()
112 fg.link(href=url, rel='alternate')
113 htmltitle = parser.getElementsByTagName('title')[0].innerHTML
114 fg.title('Comments for ' + htmltitle)
115 fg.id(url)
116 fg.link(href='https://nyaacomments.tk' + self.path, rel='self')
117
118 i = 1
119 timestamp = None
120 while True:
121 cmt = parser.getElementById('com-' + str(i))
122 if cmt is None:
123 break
124 authortag = cmt.filter(tagname='a', href__contains='/user/')[0]
125 author = authortag.href.replace('/user/', '')
126 link = url + "#com-" + str(i)
127
128 tsanchor = cmt.getElementsByAttr('href', '#com-' + str(i))[0]
129 timestamp = int(tsanchor.getChildren()[0].getAttribute('data-timestamp'))
130 content = cmt.getElementsByClassName('comment-content')[0].innerHTML
131
132 fe = fg.add_entry()
133 fe.id(link)
134 fe.title('Comment by ' + author + ' on ' + htmltitle)
135 fe.author({'name': author})
136 fe.pubdate(datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc))
137 fe.updated(datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc))
138 fe.link(href=link, rel='alternate')
139 fe.content(content, type='html')
140
141 i = i + 1
142
143
144 #set feed last update time to publish time of last comment
145 if timestamp is not None:
146 fg.updated(datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc))
147 #print(fg.atom_str(pretty=True).decode('utf-8'))
148
149
150
151 self.send_response(200)
152 self.send_header('Content-type', 'application/atom+xml')
153 self.end_headers()
154
155 self.wfile.write(bytes(fg.atom_str(pretty=True).decode('utf-8').replace('&amp;','&').replace('&#10;','&lt;br&gt;'), 'utf-8'))
156
157 return
158
159
160
161
162class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
163 pass
164
165if __name__ == '__main__':
166 server = ThreadedHTTPServer(('localhost', 2800), NyaaComments)
167 server.serve_forever()