| 1 | #!/usr/bin/env python3 |
| 2 | |
| 3 | # Nyaa Comments RSS Generator |
| 4 | # Copyright (c) 2018 Adrian I Lam <me@adrianiainlam.tk> |
| 5 | # |
| 6 | # Permission is hereby granted, free of charge, to any person obtaining |
| 7 | # a copy of this software and associated documentation files (the |
| 8 | # "Software"), to deal in the Software without restriction, including |
| 9 | # without limitation the rights to use, copy, modify, merge, publish, |
| 10 | # distribute, sublicense, and/or sell copies of the Software, and to |
| 11 | # permit persons to whom the Software is furnished to do so, subject to |
| 12 | # the following conditions: |
| 13 | # |
| 14 | # The above copyright notice and this permission notice shall be included |
| 15 | # in all copies or substantial portions of the Software. |
| 16 | # |
| 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 18 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| 20 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
| 21 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| 22 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 23 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 24 | |
| 25 | |
| 26 | from http.server import BaseHTTPRequestHandler, HTTPServer |
| 27 | from socketserver import ThreadingMixIn |
| 28 | import threading |
| 29 | |
| 30 | import datetime |
| 31 | |
| 32 | |
| 33 | from feedgen.feed import FeedGenerator |
| 34 | import AdvancedHTMLParser |
| 35 | import requests |
| 36 | |
| 37 | |
| 38 | class NyaaComments(BaseHTTPRequestHandler): |
| 39 | def do_GET(self): |
| 40 | if self.path == '/': |
| 41 | # return home page |
| 42 | self.send_response(200) |
| 43 | self.send_header('Content-type', 'text/plain') |
| 44 | self.end_headers() |
| 45 | self.wfile.write(bytes(''' |
| 46 | Welcome to Nyaa Comments RSS Generator. |
| 47 | |
| 48 | DISCLAIMER: This site is not affiliated with Nyaa.si |
| 49 | |
| 50 | To use: |
| 51 | For Nyaa: |
| 52 | - Get your torrent number |
| 53 | For example, https://nyaa.si/view/1002779 -> number is 1002779 |
| 54 | - Your feed URL is https://nyaacomments.tk/1002779 |
| 55 | For Sukebei.nyaa: |
| 56 | Append 's' before the number |
| 57 | For example, https://nyaacomments.tk/s1002779 |
| 58 | |
| 59 | Bug reports welcome at <me@adrianiainlam.tk>, or on |
| 60 | <https://github.com/adrianiainlam/nyaa-comments-rss-generator>. |
| 61 | |
| 62 | IMPORTANT: Please avoid updating your feeds too often. I don't mind having |
| 63 | my server flooded, but the nyaa.si guys certainly might. |
| 64 | |
| 65 | #nyaa-dev@Rizon, 2018-02-08T23:22:36Z |
| 66 | <ail30> hi, nyaa devs / host. I'm the one who recently added a feature |
| 67 | request for comment RSS, which was rejected. I'm wondering if you |
| 68 | guys would be okay if I write my own (external) feed generator? |
| 69 | Or would you guys not be happy with a bot crawling your site? |
| 70 | <@Koala> generally speaking, if we don't notice the bot, we don't care. |
| 71 | Just make sure it doesn't spam the site with requests. |
| 72 | <~Aureolin> As long as you don't hit any sort of rate limiting we don't |
| 73 | care. |
| 74 | <ail30> ok thank you :) |
| 75 | ''', 'utf-8')) |
| 76 | |
| 77 | else: |
| 78 | sukebei = False |
| 79 | try: |
| 80 | if self.path[1] == 's': |
| 81 | sukebei = True |
| 82 | nyaaid = int(self.path[2:]) |
| 83 | else: |
| 84 | nyaaid = int(self.path[1:]) |
| 85 | except ValueError: |
| 86 | self.send_response(404) |
| 87 | self.send_header('Content-type', 'text/plain') |
| 88 | self.end_headers() |
| 89 | self.wfile.write(bytes('Error: Not a valid torrent number', 'utf-8')) |
| 90 | return |
| 91 | |
| 92 | if sukebei: |
| 93 | url = "https://sukebei.nyaa.si/view/" + str(nyaaid) |
| 94 | else: |
| 95 | url = "https://nyaa.si/view/" + str(nyaaid) |
| 96 | useragent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:58.0) Gecko/20100101 Firefox/58.0" |
| 97 | req = requests.get(url, headers={"user-agent": useragent}) |
| 98 | |
| 99 | if req.status_code != 200: |
| 100 | # return error page with upstream error code |
| 101 | self.send_response(200) |
| 102 | self.send_header('Content-type', 'text/plain') |
| 103 | self.end_headers() |
| 104 | self.wfile.write(bytes('Nyaa returns HTTP status ' + str(req.status_code), 'utf-8')) |
| 105 | |
| 106 | else: |
| 107 | parser = AdvancedHTMLParser.AdvancedHTMLParser() |
| 108 | parser.parseStr(req.text) |
| 109 | |
| 110 | |
| 111 | fg = FeedGenerator() |
| 112 | fg.link(href=url, rel='alternate') |
| 113 | htmltitle = parser.getElementsByTagName('title')[0].innerHTML |
| 114 | fg.title('Comments for ' + htmltitle) |
| 115 | fg.id(url) |
| 116 | fg.link(href='https://nyaacomments.tk' + self.path, rel='self') |
| 117 | |
| 118 | i = 1 |
| 119 | timestamp = None |
| 120 | while True: |
| 121 | cmt = parser.getElementById('com-' + str(i)) |
| 122 | if cmt is None: |
| 123 | break |
| 124 | authortag = cmt.filter(tagname='a', href__contains='/user/')[0] |
| 125 | author = authortag.href.replace('/user/', '') |
| 126 | link = url + "#com-" + str(i) |
| 127 | |
| 128 | tsanchor = cmt.getElementsByAttr('href', '#com-' + str(i))[0] |
| 129 | timestamp = int(tsanchor.getChildren()[0].getAttribute('data-timestamp')) |
| 130 | content = cmt.getElementsByClassName('comment-content')[0].innerHTML |
| 131 | |
| 132 | fe = fg.add_entry() |
| 133 | fe.id(link) |
| 134 | fe.title('Comment by ' + author + ' on ' + htmltitle) |
| 135 | fe.author({'name': author}) |
| 136 | fe.pubdate(datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)) |
| 137 | fe.updated(datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)) |
| 138 | fe.link(href=link, rel='alternate') |
| 139 | fe.content(content, type='html') |
| 140 | |
| 141 | i = i + 1 |
| 142 | |
| 143 | |
| 144 | #set feed last update time to publish time of last comment |
| 145 | if timestamp is not None: |
| 146 | fg.updated(datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)) |
| 147 | #print(fg.atom_str(pretty=True).decode('utf-8')) |
| 148 | |
| 149 | |
| 150 | |
| 151 | self.send_response(200) |
| 152 | self.send_header('Content-type', 'application/atom+xml') |
| 153 | self.end_headers() |
| 154 | |
| 155 | self.wfile.write(bytes(fg.atom_str(pretty=True).decode('utf-8').replace('&','&').replace(' ','<br>'), 'utf-8')) |
| 156 | |
| 157 | return |
| 158 | |
| 159 | |
| 160 | |
| 161 | |
| 162 | class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): |
| 163 | pass |
| 164 | |
| 165 | if __name__ == '__main__': |
| 166 | server = ThreadedHTTPServer(('localhost', 2800), NyaaComments) |
| 167 | server.serve_forever() |