Implementación del modo sin conexión para Yandex.Music

Introducción



Hoy consideraremos un servicio de música tan conocido como Yandex.Music. Un buen servicio en general, pero con un inconveniente importante: la imposibilidad de trabajar sin conexión. Intentaremos corregir este molesto malentendido utilizando las herramientas disponibles.



Herramientas



Así que necesitamos:





Autorización



Los usuarios no autorizados del servicio solo pueden acceder a segmentos de canciones de hasta 30 segundos de duración. Claramente, esto no es suficiente para escuchar con calidad. Iniciaremos sesión de la forma más natural, a través de un formulario web y recibiremos cookies. Esto nos ayudará a abridor para realizar solicitudes y HTMLParser para analizar formularios.



def resolve_cookie(login: str, password: str) -> str:
    cookies = CookieJar()
    opener = urllib.request.build_opener(
        urllib.request.HTTPCookieProcessor(cookies),
        urllib.request.HTTPRedirectHandler())
    response = opener.open("https://passport.yandex.ru")
    doc = response.read()
    parser = FormParser()
    parser.feed(doc.decode("utf-8"))
    parser.close()
    parser.params["login"] = login
    response = opener.open(parser.url or response.url, urllib.parse.urlencode(parser.params).encode("utf-8"))
    doc = response.read()
    parser = FormParser()
    parser.feed(doc.decode("utf-8"))
    parser.close()
    parser.params["login"] = login
    parser.params["passwd"] = password
    response = opener.open(parser.url or response.url, urllib.parse.urlencode(parser.params).encode("utf-8"))
    cookie_data = {}
    for item in cookies:
        if item.domain == ".yandex.ru":
            cookie_data[item.name] = item.value
    if "yandex_login" not in cookie_data:
        keys = ", ".join(cookie_data.keys())
        raise Exception(f"Invalid cookie_data {keys}")
    return "; ".join(map(lambda v: f"{v[0]}={v[1]}", cookie_data.items()))


https://passport.yandex.ru login. , , . . — . yandex_login, . .



Yandex Music (HTML) API



. , aiohttp. html BeautifulSoup. , , -.



class YandexMusicApi:
    host = "music.yandex.ru"
    base_url = f"https://{host}"

    def __init__(self, cookie: str):
        self.headers = Headers(self.host, cookie)

    async def _request(self, end_point: str):
        async with aiohttp.ClientSession() as session:
            url = f"{self.base_url}/{end_point}"
            async with session.request(method="GET", url=url) as response:
                return await response.read()

    async def get_favorite_artists(self, login: str) -> List[Artist]:
        body = await self._request(f"users/{login}/artists")
        soup = BeautifulSoup(body, "lxml")
        artists_soup = soup.find("div", class_="page-users__artists")
        if artists_soup is None:
            caption = soup.find("div", class_="page-users__caption")
            if caption:
                raise Exception(caption.contents[0])
        result = []
        for artist_soup in artists_soup.find_all("div", class_="artist"):
            title_soup = artist_soup.find("div", class_="artist__name")
            title = title_soup.attrs["title"]
            title_href_soup = title_soup.find("a")
            id_ = int(title_href_soup.attrs["href"].split("/")[-1])
            result.append(Artist(id_, title))
        return result


, https://music.yandex.ru/users/<login>/artists page-users__artists . title artist__name. Id split .

, .





. , . — yandex-. , . Network , https://{host}/get-mp3/{sign}/{ts}/{path}, sign. (XGRlBW9FXlekgbPrRHuSiA) . , .



    async def get_track_url(self, album_id: int, track_id: int) -> str:
        async with aiohttp.ClientSession() as session:
            url = f"{self.base_url}/api/v2.1/handlers/track/{track_id}:{album_id}/" \
                  f"web-album-track-track-main/download/m?hq=0&external-domain={self.host}&overembed=no&__t={timestamp()}"
            page = f"album/{album_id}"
            headers = self.headers.build(page)
            async with session.request(method="GET", url=url, headers=headers) as response:
                body = await response.json()
                src = body["src"]
                src += f"&format=json&external-domain={self.host}&overembed=no&__t={timestamp()}"
                result = parse.urlparse(src)
                headers = self.headers.build(page, {
                    ":authority": "storage.mds.yandex.net",
                    ":method": "GET",
                    ":path": f"{result.path}/{result.query}",
                    ":scheme": "https",
                }, True)
                async with session.request(method="GET", url=src, headers=headers) as response:
                    body = await response.json()
                    host = body["host"]
                    path = body["path"]
                    s = body["s"]
                    ts = body["ts"]
                    sign = md5(f"XGRlBW9FXlekgbPrRHuSiA{path[1::]}{s}".encode("utf-8")).hexdigest()
                    url = f"https://{host}/get-mp3/{sign}/{ts}/{path}"
                    return url


, .





, aiohttp, — aiofile .



    async def download_file(cls, url: str, filename: str):
        async with aiohttp.ClientSession() as session:
            async with session.request(method="GET", url=url) as response:
                data = await response.read()
                async with AIOFile(filename, "wb") as afp:
                    await afp.write(data)
                    await afp.fsync()


. , , mp3, , , . , , . , . .





, . , , . , , . (- mp3 ).



    async def download_artist(self, artist: Artist, depth: Depth = Depth.NORMAL):
        artist_progress = tqdm(total=0, desc=artist.title, position=1, ascii=True)
        albums = await self.api.get_artist_albums(artist.id)
        artist_progress.total = len(albums)
        artist_progress.refresh()
        for album in albums:
            album_dir = os.path.join(self.target_dir, normalize(artist.title), f"{album.year} - {normalize(album.title)}")
            if depth < Depth.ALBUMS and os.path.exists(album_dir):
                artist_progress.update()
                continue
            album_progress = tqdm(total=0, desc=f"> {album.title}", position=0, ascii=True)
            tracks = await self.api.get_album_tracks(album.id)
            album_progress.total = len(tracks)
            album_progress.refresh()
            os.makedirs(album_dir, exist_ok=True)
            if album.cover:
                album_progress.total += 1
                cover_filename = os.path.join(album_dir, "cover.jpg")
                if not os.path.exists(cover_filename):
                    await self.download_file(album.cover, cover_filename)
                album_progress.update()
            for track in tracks:
                target_filename = os.path.join(album_dir, f"{track.num:02d}. {normalize(track.title)}.mp3")
                if depth >= Depth.TRACKS or not os.path.exists(target_filename):
                    url = await self.api.get_track_url(track.album_id, track.id)
                    await self.download_file(url, target_filename)
                    self.write_tags(target_filename, {
                        "title": track.title,
                        "tracknumber": str(track.num),
                        "artist": artist.title,
                        "album": album.title,
                        "date": str(album.year),
                    })
                album_progress.update()
            album_progress.close()
            artist_progress.update()
        artist_progress.close()


, , AC/DC . normalize:



def normalize(name: str) -> str:
    return name.replace("/", "-")


, ( ) . . asyncio.Semaphore asyncio.gather.

, .





, , , . .credentials, . .cookie .



def resolve_cookie() -> str:
    base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
    cookie_file = os.path.join(base_dir, ".cookie")
    if os.path.exists(cookie_file):
        with open(cookie_file, "rt") as file:
            return file.read()
    credentials_file = os.path.join(base_dir, ".credentials")
    if os.path.exists(credentials_file):
        config = configparser.ConfigParser()
        config.read(credentials_file)
        login = config["yandex"]["login"]
        password = config["yandex"]["password"]
    else:
        raise Exception(f"""Create \"{credentials_file}\" with content

[yandex]
login=<user_login>
password=<user_password>
""")
    cookie = auth.resolve_cookie(login, password)
    with open(cookie_file, "wt") as file:
        file.write(cookie)
    return cookie


, , . argparse, .

:



  • -a (--artist), Id , ,
  • -o (--output), , — Music .
  • -d (--depth), ,

    • - 0 (NORMAL) , , ,
    • Value 1 (ALBUMS)recorre todas las pistas del álbum y descarga las que faltan.
    • El valor 2 (TRACKS)descarga y sobrescribe las pistas incluso si ya están presentes en el sistema de archivos.


async def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-a", "--artist", help="Artist ID")
    parser.add_argument("-o", "--output", default=f"{Path.home()}/Music",
                        help=f"Output directory, default {Path.home()}/Music")
    parser.add_argument("-d", "--depth", default=0, type=int,
                        help=f"Exists files check depth, {enum_print(Depth)}")
    args = parser.parse_args()
    cookie = resolve_cookie()
    api = YandexMusicApi(cookie)
    agent = YandexMusicAgent(api, args.output)
    if args.artist:
        artist = await api.get_artist(args.artist)
        await agent.download_artist(artist, args.depth)
    else:
        email = re.compile(".*?yandex_login=(.*?);.*?", re.M).match(cookie).group(1)
        await agent.download_favorites(email, args.depth)


Y ahora, finalmente, podemos ejecutarlo todo:



if __name__ == "__main__":
    asyncio.run(main())


Gracias por su atención. Ahora que sabe cómo implementar una API que no existe, descargue la no descargable y conviértase en el orgulloso propietario de su propia colección de música.



El resultado se puede ver en el repositorio yandex.music.agent




All Articles