diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..e73a4c6 --- /dev/null +++ b/.env.example @@ -0,0 +1,6 @@ +S3_ACCESS_KEY="" +S3_SECRET_KEY="" +S3_BUCKET_NAME="" +S3_ENDPOINT_URL="" +S3_REGION_NAME="" +S3_SIGNATURE_VERSION="" diff --git a/app/utils/uploader.py b/app/utils/uploader.py index dcc173d..9f4a070 100644 --- a/app/utils/uploader.py +++ b/app/utils/uploader.py @@ -1,5 +1,9 @@ +import os from abc import ABC, abstractmethod +import boto3 +from botocore.client import Config + class ChunkUploadBackend(ABC): @abstractmethod @@ -11,7 +15,7 @@ class ChunkUploadBackend(ABC): """Загрузка очередного чанка""" @abstractmethod - def finish(self) -> None: + def finish(self) -> any: """Завершение загрузки""" @abstractmethod @@ -21,24 +25,96 @@ class ChunkUploadBackend(ABC): class DiskChunkUploadBackend(ChunkUploadBackend): def __init__(self, base_path: str): - import os - - self.os = os - self.base_path = base_path - self.os.makedirs(self.base_path, exist_ok=True) + os.makedirs(self.base_path, exist_ok=True) self._file = None def start(self, filename): - self._file = open(self.os.path.join(self.base_path, filename), "wb") + self._file = open(os.path.join(self.base_path, filename), "wb") def upload_chunk(self, chunk: bytes): self._file.write(chunk) def finish(self): + if self._file is None: + return self._file.close() + return os.path.join(self.base_path, self._file.name) def abort(self): if self._file: self._file.close() self._file = None + + +class S3ChunkUploadBackend(ChunkUploadBackend): + def __init__(self, key_prefix: str = ""): + from dotenv import load_dotenv + + load_dotenv(".env") + + self.s3 = boto3.client( + service_name="s3", + aws_access_key_id=os.getenv("S3_ACCESS_KEY"), + aws_secret_access_key=os.getenv("S3_SECRET_KEY"), + endpoint_url=os.getenv("S3_ENDPOINT_URL"), + region_name=os.getenv("S3_REGION_NAME"), + use_ssl=True, + config=Config(signature_version=os.getenv("S3_SIGNATURE_VERSION")), + ) + self.bucket = os.getenv("S3_BUCKET_NAME") + self.key_prefix = key_prefix + self.upload_id = None + self.parts = [] + self.part_number = 1 + self.buffer = bytearray() + self.multipart_threshold = 5 * 1024 * 1024 # 5MB + self.key = None + + def start(self, filename: str) -> None: + self.key = f"{self.key_prefix}{filename}" + response = self.s3.create_multipart_upload(Bucket=self.bucket, Key=self.key) + self.upload_id = response["UploadId"] + + def upload_chunk(self, chunk: bytes) -> None: + self.buffer.extend(chunk) + + if len(self.buffer) >= self.multipart_threshold: + self._flush_part() + + def _flush_part(self): + response = self.s3.upload_part( + Bucket=self.bucket, + Key=self.key, + PartNumber=self.part_number, + UploadId=self.upload_id, + Body=bytes(self.buffer), + ) + + self.parts.append({"PartNumber": self.part_number, "ETag": response["ETag"]}) + + self.part_number += 1 + self.buffer.clear() + + def finish(self): + if self.buffer: + self._flush_part() + # Сбрасываем счетчик + self.part_number = 1 + response = self.s3.complete_multipart_upload( + Bucket=self.bucket, + Key=self.key, + UploadId=self.upload_id, + MultipartUpload={"Parts": self.parts}, + ) + # Сбрасываем части + self.parts = [] + return response + + def abort(self) -> None: + if self.upload_id: + self.s3.abort_multipart_upload( + Bucket=self.bucket, Key=self.key, UploadId=self.upload_id + ) + self.part_number = 1 + self.parts = [] diff --git a/app/utils/youtube.py b/app/utils/youtube.py index a5a4199..2e9a35f 100644 --- a/app/utils/youtube.py +++ b/app/utils/youtube.py @@ -46,25 +46,38 @@ class YtDlpManager: "--no-warnings", "-o", "-", - self.url + self.url, ] - print("Start processing") - process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, bufsize=0) - print("Write filename to upload backend") - self.backend.start(self.title + ".mp4") - print("Start write chunk to upload backend") - chunk_size = 1024 ** 2 + return self._processing(command, self.title + ".mp4") + + def download_audio(self): + command = [ + "yt-dlp", + "-f", + "bestaudio", + "--no-part", + "--quiet", + "--no-warnings", + "-o", + "-", + self.url, + ] + return self._processing(command, self.title + ".m4a") + + def _processing(self, command: list[str], filename: str, chunk_size: int = 1024**2): + process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, bufsize=0 + ) + self.backend.start(filename) length = 0 while True: chunk = process.stdout.read(chunk_size) if not chunk: break length += chunk_size - print("Write chunk to backend", length) self.backend.upload_chunk(chunk) - print("End writing to backend") ret = process.wait() - print("Check ret status") if ret != 0: self.backend.abort() raise RuntimeError(f"yt-dlp failed, status code: {ret}") + return self.backend.finish() diff --git a/pyproject.toml b/pyproject.toml index 90b4b59..85b7963 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,8 +5,10 @@ description = "Add your description here" readme = "README.md" requires-python = ">=3.14" dependencies = [ + "boto3>=1.42.52", "fastapi[standard]>=0.129.0", "httpx[http2]>=0.28.1", + "python-dotenv>=1.2.1", "tqdm>=4.67.3", "yt-dlp[default]>=2026.2.4", ] diff --git a/test.py b/test.py index 2a974fc..4aaddeb 100644 --- a/test.py +++ b/test.py @@ -1,22 +1,19 @@ -from app.utils.downloader import HttpStreamingDownloader from app.utils.uploader import DiskChunkUploadBackend +from app.utils.uploader import S3ChunkUploadBackend from app.utils.youtube import YtDlpManager def download(url: str): - upload_backend = DiskChunkUploadBackend("trash_holder") + # upload_backend = DiskChunkUploadBackend("trash_holder") + upload_backend = S3ChunkUploadBackend("2") youtube = YtDlpManager(url, upload_backend) - youtube.download_video(360) - # downloader = HttpStreamingDownloader(upload_backend) - # youtube = YtDlpInfo(url) - # video = youtube.get_video_url("480p") - # video_name = youtube.title + ".mp4" - # audio = youtube.get_audio_url() - # audio_name = youtube.title + ".m4a" - - # downloader.download(video.url, video_name, video.headers, video.chunk_size) - # downloader.download(audio.url, audio_name, audio.headers, audio.chunk_size) - + print("Download Video") + res = youtube.download_video(360) + print(res) + print("Download Audio") + res = youtube.download_audio() + print(res) + print("Success") def main(): url = "https://youtu.be/OSAOh4L41Wg" diff --git a/uv.lock b/uv.lock index 84b65d7..3d60088 100644 --- a/uv.lock +++ b/uv.lock @@ -32,6 +32,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, ] +[[package]] +name = "boto3" +version = "1.42.52" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/41/ed/8eacb8ec7bf264079608be5f9a2a57e31e7fed7a791bb3b15500ca9274a5/boto3-1.42.52.tar.gz", hash = "sha256:ff4a4afb832f63a1358e11fe6eb321da0f4767979c6721dd32fb02e6eabcebf5", size = 112811, upload-time = "2026-02-18T21:54:57.804Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/2a/de34ad6c43c56fe6dd5824bff2cd7fdef5edd9de0617cbd217040318ba97/boto3-1.42.52-py3-none-any.whl", hash = "sha256:7b3e0c4bfd8815a3df64fbe98fc9f87dfb12bd7a783cf63dfc2f166c66798c9d", size = 140556, upload-time = "2026-02-18T21:54:56.609Z" }, +] + +[[package]] +name = "botocore" +version = "1.42.52" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/37/7044e09d416ff746d23c7456e8c30ddade1154ecd08814b17ab7e2c20fb0/botocore-1.42.52.tar.gz", hash = "sha256:3bdef10aee4cee13ff019b6a1423a2ce3ca17352328d9918157a1829e5cc9be1", size = 14917923, upload-time = "2026-02-18T21:54:48.06Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/67/bbd723d489b25ff9f94a734e734986bb8343263dd024a3846291028c26d0/botocore-1.42.52-py3-none-any.whl", hash = "sha256:c3a0b7138a4c5a534da0eb2444c19763b4d03ba2190c0602c49315e54efd7252", size = 14588731, upload-time = "2026-02-18T21:54:45.532Z" }, +] + [[package]] name = "brotli" version = "1.2.0" @@ -390,6 +418,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "markdown-it-py" version = "4.0.0" @@ -573,6 +610,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + [[package]] name = "python-dotenv" version = "1.2.1" @@ -697,6 +746,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/62/b88e5879512c55b8ee979c666ee6902adc4ed05007226de266410ae27965/rignore-0.7.6-cp314-cp314t-win_arm64.whl", hash = "sha256:b83adabeb3e8cf662cabe1931b83e165b88c526fa6af6b3aa90429686e474896", size = 656035, upload-time = "2025-11-05T21:41:31.13Z" }, ] +[[package]] +name = "s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, +] + [[package]] name = "sentry-sdk" version = "2.53.0" @@ -719,6 +780,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, ] +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + [[package]] name = "starlette" version = "0.52.1" @@ -898,16 +968,20 @@ name = "youtube-microservice" version = "0.1.0" source = { virtual = "." } dependencies = [ + { name = "boto3" }, { name = "fastapi", extra = ["standard"] }, { name = "httpx", extra = ["http2"] }, + { name = "python-dotenv" }, { name = "tqdm" }, { name = "yt-dlp", extra = ["default"] }, ] [package.metadata] requires-dist = [ + { name = "boto3", specifier = ">=1.42.52" }, { name = "fastapi", extras = ["standard"], specifier = ">=0.129.0" }, { name = "httpx", extras = ["http2"], specifier = ">=0.28.1" }, + { name = "python-dotenv", specifier = ">=1.2.1" }, { name = "tqdm", specifier = ">=4.67.3" }, { name = "yt-dlp", extras = ["default"], specifier = ">=2026.2.4" }, ]