Commit | Line | Data |
---|---|---|
57b9114a AIL |
1 | #!/usr/bin/env python3 |
2 | ||
3 | # Nyaa Comments RSS Generator | |
4 | # Copyright (c) 2018 Adrian I Lam <me@adrianiainlam.tk> | |
5 | # | |
6 | # Permission is hereby granted, free of charge, to any person obtaining | |
7 | # a copy of this software and associated documentation files (the | |
8 | # "Software"), to deal in the Software without restriction, including | |
9 | # without limitation the rights to use, copy, modify, merge, publish, | |
10 | # distribute, sublicense, and/or sell copies of the Software, and to | |
11 | # permit persons to whom the Software is furnished to do so, subject to | |
12 | # the following conditions: | |
13 | # | |
14 | # The above copyright notice and this permission notice shall be included | |
15 | # in all copies or substantial portions of the Software. | |
16 | # | |
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
18 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
20 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
21 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
22 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
23 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
24 | ||
25 | ||
26 | from http.server import BaseHTTPRequestHandler, HTTPServer | |
27 | from socketserver import ThreadingMixIn | |
28 | import threading | |
29 | ||
30 | import datetime | |
31 | ||
32 | ||
33 | from feedgen.feed import FeedGenerator | |
34 | import AdvancedHTMLParser | |
35 | import requests | |
36 | ||
37 | ||
38 | class NyaaComments(BaseHTTPRequestHandler): | |
39 | def do_GET(self): | |
40 | if self.path == '/': | |
41 | # return home page | |
42 | self.send_response(200) | |
43 | self.send_header('Content-type', 'text/plain') | |
44 | self.end_headers() | |
45 | self.wfile.write(bytes(''' | |
46 | Welcome to Nyaa Comments RSS Generator. | |
47 | ||
48 | DISCLAIMER: This site is not affiliated with Nyaa.si | |
49 | ||
50 | To use: | |
51 | For Nyaa: | |
52 | - Get your torrent number | |
53 | For example, https://nyaa.si/view/1002779 -> number is 1002779 | |
54 | - Your feed URL is https://nyaacomments.tk/1002779 | |
55 | For Sukebei.nyaa: | |
56 | Append 's' before the number | |
57 | For example, https://nyaacomments.tk/s1002779 | |
58 | ||
59 | Bug reports welcome at <me@adrianiainlam.tk>, or on | |
60 | <https://github.com/adrianiainlam/nyaa-comments-rss-generator>. | |
61 | ||
62 | IMPORTANT: Please avoid updating your feeds too often. I don't mind having | |
63 | my server flooded, but the nyaa.si guys certainly might. | |
64 | ||
65 | #nyaa-dev@Rizon, 2018-02-08T23:22:36Z | |
66 | <ail30> hi, nyaa devs / host. I'm the one who recently added a feature | |
67 | request for comment RSS, which was rejected. I'm wondering if you | |
68 | guys would be okay if I write my own (external) feed generator? | |
69 | Or would you guys not be happy with a bot crawling your site? | |
70 | <@Koala> generally speaking, if we don't notice the bot, we don't care. | |
71 | Just make sure it doesn't spam the site with requests. | |
72 | <~Aureolin> As long as you don't hit any sort of rate limiting we don't | |
73 | care. | |
74 | <ail30> ok thank you :) | |
75 | ''', 'utf-8')) | |
76 | ||
77 | else: | |
78 | sukebei = False | |
79 | try: | |
80 | if self.path[1] == 's': | |
81 | sukebei = True | |
82 | nyaaid = int(self.path[2:]) | |
83 | else: | |
84 | nyaaid = int(self.path[1:]) | |
85 | except ValueError: | |
86 | self.send_response(404) | |
87 | self.send_header('Content-type', 'text/plain') | |
88 | self.end_headers() | |
89 | self.wfile.write(bytes('Error: Not a valid torrent number', 'utf-8')) | |
90 | return | |
91 | ||
92 | if sukebei: | |
93 | url = "https://sukebei.nyaa.si/view/" + str(nyaaid) | |
94 | else: | |
95 | url = "https://nyaa.si/view/" + str(nyaaid) | |
96 | useragent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:58.0) Gecko/20100101 Firefox/58.0" | |
97 | req = requests.get(url, headers={"user-agent": useragent}) | |
98 | ||
99 | if req.status_code != 200: | |
100 | # return error page with upstream error code | |
101 | self.send_response(200) | |
102 | self.send_header('Content-type', 'text/plain') | |
103 | self.end_headers() | |
104 | self.wfile.write(bytes('Nyaa returns HTTP status ' + str(req.status_code), 'utf-8')) | |
105 | ||
106 | else: | |
107 | parser = AdvancedHTMLParser.AdvancedHTMLParser() | |
108 | parser.parseStr(req.text) | |
109 | ||
110 | ||
111 | fg = FeedGenerator() | |
112 | fg.link(href=url, rel='alternate') | |
113 | htmltitle = parser.getElementsByTagName('title')[0].innerHTML | |
114 | fg.title('Comments for ' + htmltitle) | |
115 | fg.id(url) | |
116 | fg.link(href='https://nyaacomments.tk' + self.path, rel='self') | |
117 | ||
118 | i = 1 | |
119 | timestamp = None | |
120 | while True: | |
121 | cmt = parser.getElementById('com-' + str(i)) | |
122 | if cmt is None: | |
123 | break | |
124 | authortag = cmt.filter(tagname='a', href__contains='/user/')[0] | |
125 | author = authortag.href.replace('/user/', '') | |
126 | link = url + "#com-" + str(i) | |
127 | ||
128 | tsanchor = cmt.getElementsByAttr('href', '#com-' + str(i))[0] | |
129 | timestamp = int(tsanchor.getChildren()[0].getAttribute('data-timestamp')) | |
130 | content = cmt.getElementsByClassName('comment-content')[0].innerHTML | |
131 | ||
132 | fe = fg.add_entry() | |
133 | fe.id(link) | |
134 | fe.title('Comment by ' + author + ' on ' + htmltitle) | |
135 | fe.author({'name': author}) | |
136 | fe.pubdate(datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)) | |
137 | fe.updated(datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)) | |
138 | fe.link(href=link, rel='alternate') | |
139 | fe.content(content, type='html') | |
140 | ||
141 | i = i + 1 | |
142 | ||
143 | ||
144 | #set feed last update time to publish time of last comment | |
145 | if timestamp is not None: | |
146 | fg.updated(datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)) | |
147 | #print(fg.atom_str(pretty=True).decode('utf-8')) | |
148 | ||
149 | ||
150 | ||
151 | self.send_response(200) | |
152 | self.send_header('Content-type', 'application/atom+xml') | |
153 | self.end_headers() | |
154 | ||
155 | self.wfile.write(bytes(fg.atom_str(pretty=True).decode('utf-8').replace('&','&').replace(' ','<br>'), 'utf-8')) | |
156 | ||
157 | return | |
158 | ||
159 | ||
160 | ||
161 | ||
162 | class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): | |
163 | pass | |
164 | ||
165 | if __name__ == '__main__': | |
166 | server = ThreadedHTTPServer(('localhost', 2800), NyaaComments) | |
167 | server.serve_forever() |