From 57b9114abe6c3e604e377706f4d13d401197c90b Mon Sep 17 00:00:00 2001 From: Adrian Iain Lam Date: Sat, 24 Feb 2018 22:46:03 +0000 Subject: [PATCH 1/1] first commit --- nyaacomments.py | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 nyaacomments.py diff --git a/nyaacomments.py b/nyaacomments.py new file mode 100644 index 0000000..af161aa --- /dev/null +++ b/nyaacomments.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 + +# Nyaa Comments RSS Generator +# Copyright (c) 2018 Adrian I Lam +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +from http.server import BaseHTTPRequestHandler, HTTPServer +from socketserver import ThreadingMixIn +import threading + +import datetime + + +from feedgen.feed import FeedGenerator +import AdvancedHTMLParser +import requests + + +class NyaaComments(BaseHTTPRequestHandler): + def do_GET(self): + if self.path == '/': + # return home page + self.send_response(200) + self.send_header('Content-type', 'text/plain') + self.end_headers() + self.wfile.write(bytes(''' +Welcome to Nyaa Comments RSS Generator. + +DISCLAIMER: This site is not affiliated with Nyaa.si + +To use: + For Nyaa: + - Get your torrent number + For example, https://nyaa.si/view/1002779 -> number is 1002779 + - Your feed URL is https://nyaacomments.tk/1002779 + For Sukebei.nyaa: + Append 's' before the number + For example, https://nyaacomments.tk/s1002779 + +Bug reports welcome at , or on +. + +IMPORTANT: Please avoid updating your feeds too often. I don't mind having +my server flooded, but the nyaa.si guys certainly might. + +#nyaa-dev@Rizon, 2018-02-08T23:22:36Z + hi, nyaa devs / host. I'm the one who recently added a feature + request for comment RSS, which was rejected. I'm wondering if you + guys would be okay if I write my own (external) feed generator? + Or would you guys not be happy with a bot crawling your site? +<@Koala> generally speaking, if we don't notice the bot, we don't care. + Just make sure it doesn't spam the site with requests. +<~Aureolin> As long as you don't hit any sort of rate limiting we don't + care. + ok thank you :) +''', 'utf-8')) + + else: + sukebei = False + try: + if self.path[1] == 's': + sukebei = True + nyaaid = int(self.path[2:]) + else: + nyaaid = int(self.path[1:]) + except ValueError: + self.send_response(404) + self.send_header('Content-type', 'text/plain') + self.end_headers() + self.wfile.write(bytes('Error: Not a valid torrent number', 'utf-8')) + return + + if sukebei: + url = "https://sukebei.nyaa.si/view/" + str(nyaaid) + else: + url = "https://nyaa.si/view/" + str(nyaaid) + useragent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:58.0) Gecko/20100101 Firefox/58.0" + req = requests.get(url, headers={"user-agent": useragent}) + + if req.status_code != 200: + # return error page with upstream error code + self.send_response(200) + self.send_header('Content-type', 'text/plain') + self.end_headers() + self.wfile.write(bytes('Nyaa returns HTTP status ' + str(req.status_code), 'utf-8')) + + else: + parser = AdvancedHTMLParser.AdvancedHTMLParser() + parser.parseStr(req.text) + + + fg = FeedGenerator() + fg.link(href=url, rel='alternate') + htmltitle = parser.getElementsByTagName('title')[0].innerHTML + fg.title('Comments for ' + htmltitle) + fg.id(url) + fg.link(href='https://nyaacomments.tk' + self.path, rel='self') + + i = 1 + timestamp = None + while True: + cmt = parser.getElementById('com-' + str(i)) + if cmt is None: + break + authortag = cmt.filter(tagname='a', href__contains='/user/')[0] + author = authortag.href.replace('/user/', '') + link = url + "#com-" + str(i) + + tsanchor = cmt.getElementsByAttr('href', '#com-' + str(i))[0] + timestamp = int(tsanchor.getChildren()[0].getAttribute('data-timestamp')) + content = cmt.getElementsByClassName('comment-content')[0].innerHTML + + fe = fg.add_entry() + fe.id(link) + fe.title('Comment by ' + author + ' on ' + htmltitle) + fe.author({'name': author}) + fe.pubdate(datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)) + fe.updated(datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)) + fe.link(href=link, rel='alternate') + fe.content(content, type='html') + + i = i + 1 + + + #set feed last update time to publish time of last comment + if timestamp is not None: + fg.updated(datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)) + #print(fg.atom_str(pretty=True).decode('utf-8')) + + + + self.send_response(200) + self.send_header('Content-type', 'application/atom+xml') + self.end_headers() + + self.wfile.write(bytes(fg.atom_str(pretty=True).decode('utf-8').replace('&','&').replace(' ','<br>'), 'utf-8')) + + return + + + + +class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): + pass + +if __name__ == '__main__': + server = ThreadedHTTPServer(('localhost', 2800), NyaaComments) + server.serve_forever() -- 2.7.4