Добавил возможность загружать файлы сразу в S3

This commit is contained in:
Viner Abubakirov
2026-02-19 13:52:02 +05:00
parent 1981cb7da3
commit 8ac132e503
6 changed files with 198 additions and 30 deletions

6
.env.example Normal file
View File

@@ -0,0 +1,6 @@
S3_ACCESS_KEY=""
S3_SECRET_KEY=""
S3_BUCKET_NAME=""
S3_ENDPOINT_URL=""
S3_REGION_NAME=""
S3_SIGNATURE_VERSION=""

View File

@@ -1,5 +1,9 @@
import os
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
import boto3
from botocore.client import Config
class ChunkUploadBackend(ABC): class ChunkUploadBackend(ABC):
@abstractmethod @abstractmethod
@@ -11,7 +15,7 @@ class ChunkUploadBackend(ABC):
"""Загрузка очередного чанка""" """Загрузка очередного чанка"""
@abstractmethod @abstractmethod
def finish(self) -> None: def finish(self) -> any:
"""Завершение загрузки""" """Завершение загрузки"""
@abstractmethod @abstractmethod
@@ -21,24 +25,96 @@ class ChunkUploadBackend(ABC):
class DiskChunkUploadBackend(ChunkUploadBackend): class DiskChunkUploadBackend(ChunkUploadBackend):
def __init__(self, base_path: str): def __init__(self, base_path: str):
import os
self.os = os
self.base_path = base_path self.base_path = base_path
self.os.makedirs(self.base_path, exist_ok=True) os.makedirs(self.base_path, exist_ok=True)
self._file = None self._file = None
def start(self, filename): def start(self, filename):
self._file = open(self.os.path.join(self.base_path, filename), "wb") self._file = open(os.path.join(self.base_path, filename), "wb")
def upload_chunk(self, chunk: bytes): def upload_chunk(self, chunk: bytes):
self._file.write(chunk) self._file.write(chunk)
def finish(self): def finish(self):
if self._file is None:
return
self._file.close() self._file.close()
return os.path.join(self.base_path, self._file.name)
def abort(self): def abort(self):
if self._file: if self._file:
self._file.close() self._file.close()
self._file = None self._file = None
class S3ChunkUploadBackend(ChunkUploadBackend):
def __init__(self, key_prefix: str = ""):
from dotenv import load_dotenv
load_dotenv(".env")
self.s3 = boto3.client(
service_name="s3",
aws_access_key_id=os.getenv("S3_ACCESS_KEY"),
aws_secret_access_key=os.getenv("S3_SECRET_KEY"),
endpoint_url=os.getenv("S3_ENDPOINT_URL"),
region_name=os.getenv("S3_REGION_NAME"),
use_ssl=True,
config=Config(signature_version=os.getenv("S3_SIGNATURE_VERSION")),
)
self.bucket = os.getenv("S3_BUCKET_NAME")
self.key_prefix = key_prefix
self.upload_id = None
self.parts = []
self.part_number = 1
self.buffer = bytearray()
self.multipart_threshold = 5 * 1024 * 1024 # 5MB
self.key = None
def start(self, filename: str) -> None:
self.key = f"{self.key_prefix}{filename}"
response = self.s3.create_multipart_upload(Bucket=self.bucket, Key=self.key)
self.upload_id = response["UploadId"]
def upload_chunk(self, chunk: bytes) -> None:
self.buffer.extend(chunk)
if len(self.buffer) >= self.multipart_threshold:
self._flush_part()
def _flush_part(self):
response = self.s3.upload_part(
Bucket=self.bucket,
Key=self.key,
PartNumber=self.part_number,
UploadId=self.upload_id,
Body=bytes(self.buffer),
)
self.parts.append({"PartNumber": self.part_number, "ETag": response["ETag"]})
self.part_number += 1
self.buffer.clear()
def finish(self):
if self.buffer:
self._flush_part()
# Сбрасываем счетчик
self.part_number = 1
response = self.s3.complete_multipart_upload(
Bucket=self.bucket,
Key=self.key,
UploadId=self.upload_id,
MultipartUpload={"Parts": self.parts},
)
# Сбрасываем части
self.parts = []
return response
def abort(self) -> None:
if self.upload_id:
self.s3.abort_multipart_upload(
Bucket=self.bucket, Key=self.key, UploadId=self.upload_id
)
self.part_number = 1
self.parts = []

View File

@@ -46,25 +46,38 @@ class YtDlpManager:
"--no-warnings", "--no-warnings",
"-o", "-o",
"-", "-",
self.url self.url,
] ]
print("Start processing") return self._processing(command, self.title + ".mp4")
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, bufsize=0)
print("Write filename to upload backend") def download_audio(self):
self.backend.start(self.title + ".mp4") command = [
print("Start write chunk to upload backend") "yt-dlp",
chunk_size = 1024 ** 2 "-f",
"bestaudio",
"--no-part",
"--quiet",
"--no-warnings",
"-o",
"-",
self.url,
]
return self._processing(command, self.title + ".m4a")
def _processing(self, command: list[str], filename: str, chunk_size: int = 1024**2):
process = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, bufsize=0
)
self.backend.start(filename)
length = 0 length = 0
while True: while True:
chunk = process.stdout.read(chunk_size) chunk = process.stdout.read(chunk_size)
if not chunk: if not chunk:
break break
length += chunk_size length += chunk_size
print("Write chunk to backend", length)
self.backend.upload_chunk(chunk) self.backend.upload_chunk(chunk)
print("End writing to backend")
ret = process.wait() ret = process.wait()
print("Check ret status")
if ret != 0: if ret != 0:
self.backend.abort() self.backend.abort()
raise RuntimeError(f"yt-dlp failed, status code: {ret}") raise RuntimeError(f"yt-dlp failed, status code: {ret}")
return self.backend.finish()

View File

@@ -5,8 +5,10 @@ description = "Add your description here"
readme = "README.md" readme = "README.md"
requires-python = ">=3.14" requires-python = ">=3.14"
dependencies = [ dependencies = [
"boto3>=1.42.52",
"fastapi[standard]>=0.129.0", "fastapi[standard]>=0.129.0",
"httpx[http2]>=0.28.1", "httpx[http2]>=0.28.1",
"python-dotenv>=1.2.1",
"tqdm>=4.67.3", "tqdm>=4.67.3",
"yt-dlp[default]>=2026.2.4", "yt-dlp[default]>=2026.2.4",
] ]

23
test.py
View File

@@ -1,22 +1,19 @@
from app.utils.downloader import HttpStreamingDownloader
from app.utils.uploader import DiskChunkUploadBackend from app.utils.uploader import DiskChunkUploadBackend
from app.utils.uploader import S3ChunkUploadBackend
from app.utils.youtube import YtDlpManager from app.utils.youtube import YtDlpManager
def download(url: str): def download(url: str):
upload_backend = DiskChunkUploadBackend("trash_holder") # upload_backend = DiskChunkUploadBackend("trash_holder")
upload_backend = S3ChunkUploadBackend("2")
youtube = YtDlpManager(url, upload_backend) youtube = YtDlpManager(url, upload_backend)
youtube.download_video(360) print("Download Video")
# downloader = HttpStreamingDownloader(upload_backend) res = youtube.download_video(360)
# youtube = YtDlpInfo(url) print(res)
# video = youtube.get_video_url("480p") print("Download Audio")
# video_name = youtube.title + ".mp4" res = youtube.download_audio()
# audio = youtube.get_audio_url() print(res)
# audio_name = youtube.title + ".m4a" print("Success")
# downloader.download(video.url, video_name, video.headers, video.chunk_size)
# downloader.download(audio.url, audio_name, audio.headers, audio.chunk_size)
def main(): def main():
url = "https://youtu.be/OSAOh4L41Wg" url = "https://youtu.be/OSAOh4L41Wg"

74
uv.lock generated
View File

@@ -32,6 +32,34 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
] ]
[[package]]
name = "boto3"
version = "1.42.52"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "botocore" },
{ name = "jmespath" },
{ name = "s3transfer" },
]
sdist = { url = "https://files.pythonhosted.org/packages/41/ed/8eacb8ec7bf264079608be5f9a2a57e31e7fed7a791bb3b15500ca9274a5/boto3-1.42.52.tar.gz", hash = "sha256:ff4a4afb832f63a1358e11fe6eb321da0f4767979c6721dd32fb02e6eabcebf5", size = 112811, upload-time = "2026-02-18T21:54:57.804Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2d/2a/de34ad6c43c56fe6dd5824bff2cd7fdef5edd9de0617cbd217040318ba97/boto3-1.42.52-py3-none-any.whl", hash = "sha256:7b3e0c4bfd8815a3df64fbe98fc9f87dfb12bd7a783cf63dfc2f166c66798c9d", size = 140556, upload-time = "2026-02-18T21:54:56.609Z" },
]
[[package]]
name = "botocore"
version = "1.42.52"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "jmespath" },
{ name = "python-dateutil" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c2/37/7044e09d416ff746d23c7456e8c30ddade1154ecd08814b17ab7e2c20fb0/botocore-1.42.52.tar.gz", hash = "sha256:3bdef10aee4cee13ff019b6a1423a2ce3ca17352328d9918157a1829e5cc9be1", size = 14917923, upload-time = "2026-02-18T21:54:48.06Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/94/67/bbd723d489b25ff9f94a734e734986bb8343263dd024a3846291028c26d0/botocore-1.42.52-py3-none-any.whl", hash = "sha256:c3a0b7138a4c5a534da0eb2444c19763b4d03ba2190c0602c49315e54efd7252", size = 14588731, upload-time = "2026-02-18T21:54:45.532Z" },
]
[[package]] [[package]]
name = "brotli" name = "brotli"
version = "1.2.0" version = "1.2.0"
@@ -390,6 +418,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
] ]
[[package]]
name = "jmespath"
version = "1.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" },
]
[[package]] [[package]]
name = "markdown-it-py" name = "markdown-it-py"
version = "4.0.0" version = "4.0.0"
@@ -573,6 +610,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
] ]
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "six" },
]
sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
]
[[package]] [[package]]
name = "python-dotenv" name = "python-dotenv"
version = "1.2.1" version = "1.2.1"
@@ -697,6 +746,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/79/62/b88e5879512c55b8ee979c666ee6902adc4ed05007226de266410ae27965/rignore-0.7.6-cp314-cp314t-win_arm64.whl", hash = "sha256:b83adabeb3e8cf662cabe1931b83e165b88c526fa6af6b3aa90429686e474896", size = 656035, upload-time = "2025-11-05T21:41:31.13Z" }, { url = "https://files.pythonhosted.org/packages/79/62/b88e5879512c55b8ee979c666ee6902adc4ed05007226de266410ae27965/rignore-0.7.6-cp314-cp314t-win_arm64.whl", hash = "sha256:b83adabeb3e8cf662cabe1931b83e165b88c526fa6af6b3aa90429686e474896", size = 656035, upload-time = "2025-11-05T21:41:31.13Z" },
] ]
[[package]]
name = "s3transfer"
version = "0.16.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "botocore" },
]
sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" },
]
[[package]] [[package]]
name = "sentry-sdk" name = "sentry-sdk"
version = "2.53.0" version = "2.53.0"
@@ -719,6 +780,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
] ]
[[package]]
name = "six"
version = "1.17.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
]
[[package]] [[package]]
name = "starlette" name = "starlette"
version = "0.52.1" version = "0.52.1"
@@ -898,16 +968,20 @@ name = "youtube-microservice"
version = "0.1.0" version = "0.1.0"
source = { virtual = "." } source = { virtual = "." }
dependencies = [ dependencies = [
{ name = "boto3" },
{ name = "fastapi", extra = ["standard"] }, { name = "fastapi", extra = ["standard"] },
{ name = "httpx", extra = ["http2"] }, { name = "httpx", extra = ["http2"] },
{ name = "python-dotenv" },
{ name = "tqdm" }, { name = "tqdm" },
{ name = "yt-dlp", extra = ["default"] }, { name = "yt-dlp", extra = ["default"] },
] ]
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "boto3", specifier = ">=1.42.52" },
{ name = "fastapi", extras = ["standard"], specifier = ">=0.129.0" }, { name = "fastapi", extras = ["standard"], specifier = ">=0.129.0" },
{ name = "httpx", extras = ["http2"], specifier = ">=0.28.1" }, { name = "httpx", extras = ["http2"], specifier = ">=0.28.1" },
{ name = "python-dotenv", specifier = ">=1.2.1" },
{ name = "tqdm", specifier = ">=4.67.3" }, { name = "tqdm", specifier = ">=4.67.3" },
{ name = "yt-dlp", extras = ["default"], specifier = ">=2026.2.4" }, { name = "yt-dlp", extras = ["default"], specifier = ">=2026.2.4" },
] ]