Преглед на файлове

Merged #1 "Automatic ID3 tagging"

Deben Oldert преди 6 години
родител
ревизия
0cbba3c838

+ 13 - 2
sites/helper/download.py

@@ -1,4 +1,5 @@
 import console
+import sites.helper.tagging as tagging
 from .request import RawRequest
 from pyquery import PyQuery
 from settings import Settings
@@ -37,10 +38,10 @@ def download(item, type='GET', parameters=None, headers=None, cookies=None, stre
 
     size = file.headers.get('content-length')
 
-    savefileprogress(name, full_name, file, size)
+    savefileprogress(name, full_name, file, size, item)
 
 
-def savefileprogress(name, full_name, file, size):
+def savefileprogress(name, full_name, file, size, item):
     if size is not None:
         size = int(size)
         console.output('Size: {0}'.format(console.format_bytes(size)))
@@ -60,6 +61,16 @@ def savefileprogress(name, full_name, file, size):
             f.write(file.content)
 
         bar.destroy()
+
+        tags = tagging.search_tags(item)
+
+        if tags is not None:
+            item.link_tag_item(tags)
+
+            tagging.download_artwork(tags)
+
+            tagging.write_tags_to_file(f, item)
+
     console.output('Download of {0} completed!'.format(name))
 
 

+ 5 - 0
sites/helper/item.py

@@ -11,6 +11,8 @@ class Item:
         self.size = size
         self.artist = artist
 
+        self.tag_item = None
+
         self.url_formatted = False
 
     def format_original_url(self):
@@ -22,3 +24,6 @@ class Item:
     def set_download_url(self, url):
         console.output('Setting download url to: {0}'.format(url), console.DBG_INFO)
         self.download_url = url
+
+    def link_tag_item(self, tagitem):
+        self.tag_item = tagitem

+ 3 - 1
sites/helper/query.py

@@ -1,9 +1,11 @@
 class Query:
-    def __init__(self, site, method='GET', path=''):
+    def __init__(self, site, method='GET', path='', encoding='html'):
         self.site = site
         self.method = method
         self.query = ''
         self.path = path
+        self.encoding = encoding
+        self.url_path = '{0}/{1}'.format(self.site.url, path)
 
     def add_parameter(self, key, value='{0}'):
         self.query += '{0}={1}&'.format(key, value)

+ 7 - 5
sites/helper/request.py

@@ -19,27 +19,29 @@ class Request:
         return self
 
     def search(self, keywords):
-
-        query = self.site.query.format_query(urllib.parse.quote(keywords))
+        if self.site.query.encoding == '+':
+            query = self.site.query.format_query(urllib.parse.quote(keywords.replace(' ', '+')))
+        else:
+            query = self.site.query.format_query(urllib.parse.quote(keywords))
 
         console.output('Request method: {0}'.format(self.site.query.method), level=console.DBG_INFO)
 
         if self.site.query.method == 'GET':
             request = requests.get(
-                '{0}?{1}'.format(self.site.url, query),
+                '{0}?{1}'.format(self.site.query.url_path, query),
                 headers=self.headers,
                 cookies=self.cookie,
                 allow_redirects=True)
         elif self.site.query.method == 'POST':
             request = requests.post(
-                self.site.url,
+                self.site.query.url_path,
                 data=query,
                 headers=self.headers,
                 cookies=self.cookie,
                 allow_redirects=True)
         else:  # FLAT
             request = requests.get(
-                '{0}/{1}/{2}'.format(self.site.url, self.site.query.path, query),
+                '{0}/{1}'.format(self.site.query.url_path, query),
                 headers=self.headers,
                 cookies=self.cookie,
                 allow_redirects=True)

+ 20 - 0
sites/helper/structure.py

@@ -13,6 +13,10 @@ class Structure:
         self.item_duration_path = None
         self.item_size_path = None
         self.item_artist_path = None
+        self.item_cover_path = None
+        self.item_album_path = None
+        self.item_genre_path = None
+        self.item_label_path = None
 
     def set_container_path(self, path):
         self.container_path = path
@@ -42,6 +46,22 @@ class Structure:
         self.item_size_path = path,
         return self
 
+    def set_cover_path(self, path):
+        self.item_cover_path = path
+        return self
+
+    def set_album_path(self, path):
+        self.item_album_path = path
+        return self
+
+    def set_genre_path(self, path):
+        self.item_genre_path = path
+        return self
+
+    def set_label_path(self, path):
+        self.item_label_path = path
+        return self
+
     def parse(self, html):
         pq = PyQuery(html)
 

+ 79 - 0
sites/helper/tagging.py

@@ -0,0 +1,79 @@
+import console
+from mutagen.id3 import ID3, ID3NoHeaderError
+from mutagen.id3 import ID3, TIT2, TALB, TPE1, TPE2, COMM, TCOM, TCON, TDRC, APIC
+
+from sites.helper.request import RawRequest
+from sites.tags import available
+
+
+def search_tags(item):
+    console.output('Searching for id3 tags')
+
+    items = []
+
+    for site in available:
+        console.output('Searching {0}'.format(site.url))
+        _items = site.perform_search('{0}+{1}'.format(item.artist, item.title))
+        console.output('\tFound {0} results'.format(len(_items)))
+
+        items.extend(_items)
+
+    if len(items) == 0:
+        console.output('No matching tags found')
+        return None
+
+    picked_tag = console.option_picker('Pick the most matching tag',
+                                       items,
+                                       quit=True,
+                                       objects=[
+                                           '__id__',
+                                           'x.title',
+                                           'x.artist',
+                                           'x.album',
+                                           'x.label'
+                                       ],
+                                       table=[
+                                           ('ID', 2),
+                                           ('Title', 50),
+                                           ('Artist', 40),
+                                           ('Album', 50),
+                                           ('Label', 19)
+                                       ])
+    if picked_tag is not None:
+        return items[picked_tag]
+    else:
+        return None
+
+
+def write_tags_to_file(file, item):
+    try:
+        mp3 = ID3(file.name)
+    except ID3NoHeaderError:
+        mp3 = ID3()
+
+    tagitem = item.tag_item
+
+    mp3['TIT2'] = TIT2(encoding=3, text=tagitem.title)
+    mp3['TALB'] = TALB(encoding=3, text=tagitem.album)
+    mp3['TPE1'] = TPE1(encoding=3, text=tagitem.artist)
+    mp3['COMM'] = COMM(encoding=3, text='LABEL:{0};'.format(tagitem.label))
+    mp3['TCON'] = TCON(encoding=3, text=tagitem.genre)
+
+    mp3['APIC'] = APIC(encoding=3,
+                       mime='image/jpeg',
+                       type=3,
+                       desc='Cover',
+                       data=tagitem.cover_image)
+
+    mp3.save(file.name)
+
+
+def download_artwork(tag):
+    console.output('Downloading artwork from {0}'.format(tag.cover_url), level=console.DBG_INFO)
+    cover = RawRequest.get(tag.cover_url)
+
+    if len(cover.content) > 0:
+        tag.set_cover_image(cover.content)
+        return True
+    else:
+        return False

+ 3 - 0
sites/tags/__init__.py

@@ -0,0 +1,3 @@
+from .beatport import Beatport
+
+available = [Beatport()]

+ 43 - 0
sites/tags/beatport.py

@@ -0,0 +1,43 @@
+from sites.tags.default import DefaultTagSite
+from sites.helper.query import Query
+from pyquery import PyQuery
+
+
+class Beatport(DefaultTagSite):
+    def __init__(self):
+        super().__init__()
+        self.url = 'https://beatport.com'
+        self.query = Query(self, 'GET', 'search', '+')\
+            .add_parameter('q', '{0}')\
+            .add_parameter('_pjax', '#pjax-inner-wrapper')
+        self.structure\
+            .set_container_path('div.bucket.tracks')\
+            .set_item_path('li.bucket-item')\
+            .set_title_path('p.buk-track-title')\
+            .set_artist_path('p.buk-track-artists')\
+            .set_album_path('span.buk-track-primary-title')\
+            .set_genre_path('p.buk-track-genre')\
+            .set_label_path('p.buk-track-labels')\
+            .set_cover_path('img.buk-track-artwork')
+        self.request\
+            .add_header('Referer', self.url + '/')\
+            .add_header('Origin', self.url)\
+            .add_header('X-PJAX', 'true')\
+            .add_header('X-PJAX-Container', '#pjax-inner-wrapper')\
+            .add_header('X-Requested-With', 'XMLHttpRequest')
+
+    def format_title(self, html):
+        pq = PyQuery(html)
+
+        title = pq.find('span.buk-track-primary-title').text()
+
+        remix = pq.find('span.buk-track-remixed')
+
+        if remix is not None:
+            title = '{title} ({remix})'.format(title=title, remix=remix.text())
+
+        return title
+
+    def format_cover_url(self, html):
+        url = html.attr['data-src']
+        return url.replace('95x95', '250x250')

+ 32 - 0
sites/tags/default.py

@@ -0,0 +1,32 @@
+from sites.tags.structure import TagStructure
+from sites.helper.request import Request
+from sites.helper.query import Query
+
+
+class DefaultTagSite:
+    def __init__(self):
+        self.url = None
+        self.structure = TagStructure(self)
+        self.request = Request(self)
+        self.query = Query(self)
+
+    def perform_search(self, query):
+        return self.request.search(query)
+
+    def format_title(self, html):
+        return html.text()
+
+    def format_artist(self, html):
+        return html.text()
+
+    def format_album(self, html):
+        return html.text()
+
+    def format_genre(self, html):
+        return html.text()
+
+    def format_label(self, html):
+        return html.text()
+
+    def format_cover_url(self, html):
+        return html.attr['data-src']

+ 38 - 0
sites/tags/structure.py

@@ -0,0 +1,38 @@
+from sites.helper.structure import Structure
+from pyquery import PyQuery
+
+from sites.tags.tagitem import TagItem
+
+
+class TagStructure(Structure):
+    def parse(self, html):
+        pq = PyQuery(html)
+
+        pq.make_links_absolute(base_url=self.site.url)
+
+        results = pq(self.container_path).find(self.item_path)
+
+        items = []
+
+        for result in results:
+            result = pq(result)
+            title = result.find(self.item_title_path) if self.item_title_path is not None else ''
+            artist = result.find(self.item_artist_path) if self.item_artist_path is not None else ''
+            album = result.find(self.item_album_path) if self.item_album_path is not None else ''
+            cover = result.find(self.item_cover_path) if self.item_cover_path is not None else ''
+            genre = result.find(self.item_genre_path) if self.item_genre_path is not None else ''
+            label = result.find(self.item_label_path) if self.item_label_path is not None else ''
+
+            if title is not None and artist is not None:
+                item = TagItem()
+
+                item.set_title(self.site.format_title(title))
+                item.set_artist(self.site.format_artist(artist))
+                item.set_album(self.site.format_album(album))
+                item.set_cover_url(self.site.format_cover_url(cover))
+                item.set_genre(self.site.format_genre(genre))
+                item.set_label(self.site.format_label(label))
+
+                items.append(item)
+
+        return items

+ 30 - 0
sites/tags/tagitem.py

@@ -0,0 +1,30 @@
+class TagItem:
+    def __init__(self):
+        self.title = None,
+        self.artist = None
+        self.album = None
+        self.genre = None
+        self.label = None
+        self.cover_url = None
+        self.cover_image = None
+
+    def set_title(self, title):
+        self.title = title
+
+    def set_album(self, album):
+        self.album = album
+
+    def set_artist(self, artist):
+        self.artist = artist
+
+    def set_genre(self, genre):
+        self.genre = genre
+
+    def set_label(self, label):
+        self.label = label
+
+    def set_cover_url(self, url):
+        self.cover_url = url
+
+    def set_cover_image(self, image):
+        self.cover_image = image