diff --git a/.gitignore b/.gitignore index cb81a46..ab9955b 100644 --- a/.gitignore +++ b/.gitignore @@ -110,3 +110,4 @@ reportlog.json .ruff_cache/ .pdm.toml requirements.txt +src/huesoporro/tts_files/ diff --git a/Makefile b/Makefile index 7b1af53..daabaf9 100644 --- a/Makefile +++ b/Makefile @@ -7,3 +7,9 @@ tests: uv run pytest --cov=halig -vv tests --report-log reportlog.json uv run coverage html uv run coverage xml + +compile: + uv run pyinstaller markovbot.spec + +clean-compile: + uv run pyinstaller markovbot.spec --clean diff --git a/README.md b/README.md index e69de29..22880de 100644 --- a/README.md +++ b/README.md @@ -0,0 +1 @@ +# huesoporro diff --git a/pyproject.toml b/pyproject.toml index 207f2de..ff92116 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,23 +1,24 @@ [project] -name = "markovbot-gui" +name = "huesoporro" version = "0.1.2" -description = "Markov Chain Bot GUI" +description = "Misc Twitch bots" readme = "README.md" authors = [ - { name = "tomaarsen" }, { name = "185504a9", email = "catalin@roboces.dev" } ] requires-python = ">=3.11" dependencies = [ - "kivy[base]>=2.3.0", "nltk>=3.9.1", - "pillow>=10.4.0", "platformdirs>=4.3.6", "pydantic>=2.9.2", "pydantic-settings>=2.6.0", "pyinstaller>=6.11.0", "twitchwebsocket>=1.2.1", "loguru>=0.7.2", + "ffmpeg>=1.4", + "ffmpeg-python>=0.2.0", + "gtts>=2.5.4", + "litestar[standard]>=2.13.0", ] [tool.uv] @@ -29,22 +30,13 @@ dev-dependencies = [ [[tool.mypy.overrides]] module = [ - "kivy", - "kivy.uix.widget", - "kivy.uix.popup", - "kivy.uix.button", - "kivy.uix.boxlayout", - "kivy.uix.textinput", - "kivy.uix.label", - "kivy.metrics", - "kivy.app", - "kivy.clock", "nltk", "nltk.tokenize", "nltk.tokenize.treebank", "nltk.tokenize.destructive", "TwitchWebsocket", - "tokenizer" + "tokenizer", + "gtts" ] ignore_missing_imports = true diff --git a/src/markovbot_gui/__init__.py b/src/huesoporro/__init__.py similarity index 100% rename from src/markovbot_gui/__init__.py rename to src/huesoporro/__init__.py diff --git a/src/markovbot_gui/libs/LICENSE b/src/huesoporro/libs/LICENSE similarity index 100% rename from src/markovbot_gui/libs/LICENSE rename to src/huesoporro/libs/LICENSE diff --git a/src/markovbot_gui/libs/README.md b/src/huesoporro/libs/README.md similarity index 100% rename from src/markovbot_gui/libs/README.md rename to src/huesoporro/libs/README.md diff --git a/src/markovbot_gui/libs/__init__.py b/src/huesoporro/libs/__init__.py similarity index 100% rename from src/markovbot_gui/libs/__init__.py rename to src/huesoporro/libs/__init__.py diff --git a/src/markovbot_gui/libs/db.py b/src/huesoporro/libs/db.py similarity index 99% rename from src/markovbot_gui/libs/db.py rename to src/huesoporro/libs/db.py index f547a81..1ee5484 100644 --- a/src/markovbot_gui/libs/db.py +++ b/src/huesoporro/libs/db.py @@ -191,10 +191,11 @@ class Database: fetch=True, ): logger.info("Creating backup before updating Database...") + # Connect to both the new and backup, backup, and close both def progress(status, remaining, total): - logging.debug(f"Copied {total-remaining} of {total} pages...") + logging.debug(f"Copied {total - remaining} of {total} pages...") conn = sqlite3.connect(f"MarkovChain_{channel.replace('#', '').lower()}.db") back_conn = sqlite3.connect( @@ -265,7 +266,7 @@ class Database: ); """) self.add_execute_queue( - f'INSERT INTO MarkovGrammar{first_char}{second_char} SELECT * FROM MarkovGrammar{first_char} WHERE word2 LIKE "{second_char}%";', # noqa: S608 + f'INSERT INTO MarkovGrammar{first_char}{second_char} SELECT * FROM MarkovGrammar{first_char} WHERE word2 LIKE "{second_char}%";', ) self.add_execute_queue( f'DELETE FROM MarkovGrammar{first_char} WHERE word2 LIKE "{second_char}%";', # noqa: S608 @@ -356,7 +357,7 @@ class Database: from nltk import ngrams - from src.markovbot_gui.libs.tokenizer import tokenize + from src.huesoporro.libs.tokenizer import tokenize channel = channel.replace("#", "").lower() copyfile( diff --git a/src/markovbot_gui/libs/markov_chain_bot.py b/src/huesoporro/libs/markov_chain_bot.py similarity index 95% rename from src/markovbot_gui/libs/markov_chain_bot.py rename to src/huesoporro/libs/markov_chain_bot.py index 2e661b5..5801cc5 100644 --- a/src/markovbot_gui/libs/markov_chain_bot.py +++ b/src/huesoporro/libs/markov_chain_bot.py @@ -6,10 +6,10 @@ from loguru import logger from nltk.tokenize import sent_tokenize from TwitchWebsocket import Message, TwitchWebsocket -from src.markovbot_gui.libs.db import Database -from src.markovbot_gui.libs.settings import Settings -from src.markovbot_gui.libs.timer import LoopingTimer -from src.markovbot_gui.libs.tokenizer import detokenize, tokenize +from src.huesoporro.libs.db import Database +from src.huesoporro.libs.settings import Settings +from src.huesoporro.libs.timer import LoopingTimer +from src.huesoporro.libs.tokenizer import detokenize, tokenize class Commands(StrEnum): @@ -249,6 +249,29 @@ class MarkovChain: def message_handler(self, message: Message): # noqa: C901, PLR0911, PLR0912 try: + breakpoint() + + tts_message = { + "badge-info": "subscriber/4", + "badges": "vip/1,subscriber/3,sub-gifter/5", + "color": "#F79AC6", + "custom-reward-id": "8c454446-73b0-480f-946e-d6b5f5c5e331", + "display-name": "robosap1ens__", + "emotes": "", + "first-msg": "0", + "flags": "", + "id": "6cbd37eb-49ae-41f5-b073-345275c91a07", + "mod": "0", + "returning-chatter": "0", + "room-id": "600944302", + "subscriber": "1", + "tmi-sent-ts": "1733252657689", + "turbo": "0", + "user-id": "713968248", + "user-type": "", + "vip": "1", + } + if not message.user or message.user in self.s.denied_users: logger.debug(f"User {message.user} can't send messages") return diff --git a/src/markovbot_gui/libs/settings.py b/src/huesoporro/libs/settings.py similarity index 100% rename from src/markovbot_gui/libs/settings.py rename to src/huesoporro/libs/settings.py diff --git a/src/markovbot_gui/libs/timer.py b/src/huesoporro/libs/timer.py similarity index 100% rename from src/markovbot_gui/libs/timer.py rename to src/huesoporro/libs/timer.py diff --git a/src/markovbot_gui/libs/tokenizer.py b/src/huesoporro/libs/tokenizer.py similarity index 100% rename from src/markovbot_gui/libs/tokenizer.py rename to src/huesoporro/libs/tokenizer.py diff --git a/src/huesoporro/main.py b/src/huesoporro/main.py new file mode 100644 index 0000000..31ed551 --- /dev/null +++ b/src/huesoporro/main.py @@ -0,0 +1,254 @@ +import asyncio +import os +from collections import deque +from functools import lru_cache +from hashlib import sha512 +from pathlib import Path + +import uvicorn +from gtts import gTTS +from litestar import Litestar, MediaType, WebSocket, get +from litestar.connection import ASGIConnection +from litestar.di import Provide +from litestar.exceptions import HTTPException +from litestar.handlers import BaseRouteHandler, WebsocketListener +from litestar.static_files import StaticFilesConfig +from litestar.types import Scope +from loguru import logger +from pydantic import Field, SecretStr +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + api_key: SecretStr + port: int = 8000 + host: str = "0.0.0.0" + static_files_path: Path = Field( + default_factory=lambda: Path(__file__).parent / "static" + ) + tts_cache_path: Path = Field( + default_factory=lambda: Path(__file__).parent / "tts_files" + ) + db_filepath: Path = Field( + default_factory=lambda: Path(__file__).parent / "huesoporro.db" + ) + + @staticmethod + @lru_cache(maxsize=1) + def get(): + return Settings() + + +class TTSManager: + TEXT_MAX_LENGTH: int = 400 + + def __init__(self, base_dir="tts_files", max_queue_size=10): + # Create directory for TTS files + self.base_dir = base_dir + os.makedirs(base_dir, exist_ok=True) + + # TTS Queue + self.queue = deque(maxlen=max_queue_size) + + # Connected WebSocket clients + self._clients: list[WebSocket] = [] + + # Currently playing audio + self.current_audio = None + + # Lock to prevent race conditions + self._lock = asyncio.Lock() + + def generate_tts(self, text, language="pt", tld="com.br"): + # Generate unique filename + text = text[0 : self.TEXT_MAX_LENGTH] + filename = ( + Path(__file__).parent + / "tts_files" + / f"{sha512(text.lower().encode()).hexdigest()}.mp3" + ) + if filename.exists(): + return { + "filename": filename.name, + "text": text, + "filepath": os.path.join(self.base_dir, filename), + "language": language, + "tld": tld, + } + logger.info(f"Generating TTS for '{text}'") + # Generate TTS + tts = gTTS(text=text, lang=language, tld=tld) + tts.save(str(filename)) + + # Create audio info + audio_info = { + "filename": filename.name, + "text": text, + "filepath": filename, + "language": language, + "tld": tld, + } + + return audio_info + + @property + def clients(self): + return self._clients + + def add_client(self, socket: WebSocket): + self._clients.append(socket) + + def remove_client(self, socket: WebSocket): + self._clients.remove(socket) + + async def add_to_queue(self, text, language="pt", tld="com.br"): + """Add TTS request to queue and start processing if not already running""" + async with self._lock: + # Generate TTS file + audio_info = self.generate_tts(text, language, tld) + + # Add to queue + self.queue.append(audio_info) + + # If this is the only item, start processing + if len(self.queue) == 1: + asyncio.create_task(self.process_queue()) + + return audio_info + + async def process_queue(self): + """Process queue and stream audio to connected clients""" + while True: + async with self._lock: + # Check if queue is empty + if not self.queue: + return + + # Get next audio file + audio_info = self.queue[0] + + try: + # Read the entire audio file + with open(audio_info["filepath"], "rb") as audio_file: + # Log file details + # file_size = os.path.getsize(audio_info["filepath"]) + file_size = Path(audio_info["filepath"]).stat().st_size + logger.info( + f"Streaming file: {audio_info['filename']}, Size: {file_size} bytes" + ) + + # Stream audio to all connected clients + for client in self._clients: + try: + # Reset file pointer to beginning + audio_file.seek(0) + + # Send file size first (as a header) + await client.send_text(f"FILE_HEADER:{file_size}") + + # Stream file in chunks + chunk = audio_file.read(128) # Larger chunk size + chunk_count = 0 + while chunk: + logger.info(f"Streamed {chunk_count} chunks") + chunk_count += 1 + await client.send_bytes(chunk) + chunk = audio_file.read(128) + + # Send file footer + await client.send_text("FILE_FOOTER") + + except Exception: # noqa: BLE001 + logger.error( + f"Error streaming to client {client.client}. Removing it." + ) + if client in self._clients: + self.remove_client(client) + + except Exception as e: # noqa: BLE001 + logger.error(f"Error processing audio file: {e}") + + # Remove the processed item from the queue + async with self._lock: + if self.queue and self.queue[0] == audio_info: + self.queue.popleft() + + +def authenticate(connection: ASGIConnection, route_handler: BaseRouteHandler) -> None: + """If the client didn't send the API key, deny access""" + api_key = Settings.get().api_key.get_secret_value() + + client_api_key = connection.headers.get("X-API-KEY") or connection.query_params.get( + "api_key" + ) + + if not client_api_key or client_api_key != api_key: + raise HTTPException(status_code=401, detail="Unauthorized") + + +class WebsocketHandler(WebsocketListener): + path = "/ws" + guards = [authenticate] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.tts_manager = TTSManager() + + async def on_accept(self, socket: WebSocket) -> None: + # Add client to the list of connected clients + self.tts_manager.add_client(socket) + logger.info( + f"Connection accepted from {socket.client.host}:{socket.client.port}" # type: ignore[union-attr] + ) + + async def on_disconnect(self, socket: WebSocket) -> None: + # Remove client from the list + if socket in self.tts_manager.clients: + self.tts_manager.remove_client(socket) + logger.info(f"Connection closed by {socket.client.host}:{socket.client.port}") # type: ignore[union-attr] + + async def on_receive(self, data: str) -> None: + # When text is received, add it to TTS queue + await self.tts_manager.add_to_queue(data) + + +@get( + "/overlay", + media_type=MediaType.HTML, + dependencies={"s": Provide(Settings.get)}, + guards=[authenticate], +) +def get_overlay(s: Settings) -> str: + with (s.static_files_path / "overlay.html").open() as f: + return f.read() + + +async def after_exception_handler(exc: Exception, scope: Scope) -> None: + """Hook function that will be invoked after each exception.""" + state = scope["app"].state + if not hasattr(state, "error_count"): + state.error_count = 1 + else: + state.error_count += 1 + + logger.info( + f"an exception of type {type(exc).__name__} has occurred for requested path {scope['path']} and the application error count is {state.error_count}.", + ) + + +def create_app(): + return Litestar( + route_handlers=[get_overlay, WebsocketHandler], + static_files_config=( + StaticFilesConfig( + path="/tts_files", directories=[Settings.get().tts_cache_path] + ), + ), + after_exception=[after_exception_handler], + ) + + +if __name__ == "__main__": + settings = Settings.get() + app = create_app() + uvicorn.run(app, host=settings.host, port=settings.port) diff --git a/src/huesoporro/static/index.html b/src/huesoporro/static/index.html new file mode 100644 index 0000000..d32d19b --- /dev/null +++ b/src/huesoporro/static/index.html @@ -0,0 +1,31 @@ + + + + + +
+ + + + + + + +