This commit is contained in:
cătălin 2024-12-11 16:54:21 +01:00
commit 06137806d3
No known key found for this signature in database
20 changed files with 800 additions and 2865 deletions

19
.gitignore vendored
View file

@ -110,3 +110,22 @@ reportlog.json
.ruff_cache/
.pdm.toml
requirements.txt
src/huesoporro/tts_files/
# Devenv
.devenv*
devenv.local.nix
# direnv
.direnv
# pre-commit
.pre-commit-config.yaml
# Devenv
.devenv*
devenv.local.nix
# direnv
.direnv
# pre-commit
.pre-commit-config.yaml

View file

@ -1,3 +1,5 @@
files: src|tests
exclude: ^$
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
@ -16,21 +18,38 @@ repos:
- id: mixed-line-ending
args: [ --fix=lf ]
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.6.4
hooks:
- id: ruff
args:
- --fix
- --exit-non-zero-on-fix
- id: ruff-format
- repo: local
hooks:
- id: mypy
name: mypy
entry: uv run mypy
entry: uv run mypy --check-untyped-defs
language: system
types: [ python ]
exclude: LICENSE|helm
exclude_types:
- markdown
- css
- html
- id: ruff-format
name: ruff format
language: system
entry: ruff format .
exclude: LICENSE|helm
exclude_types:
- markdown
- css
- html
- id: ruff-check
name: ruff check
language: system
entry: ruff check . --fix --exit-non-zero-on-fix
exclude: LICENSE|helm
exclude_types:
- markdown
- css
- html

View file

@ -1,9 +1,18 @@
PROJECT_NAME := "huesoporro"
PROJECT_TAG := "latest"
PROJECT_TARGET := "serve"
fmt:
uvx pre-commit run --all-files --color always
.PHONY: tests
tests:
uv run pytest --cov=halig -vv tests --report-log reportlog.json
uv run coverage html
uv run coverage xml
serve:
uv run python -m src.huesoporro.main
build:
docker build . -t git.roboces.dev/catalin/$(PROJECT_NAME):$(PROJECT_TAG) --target $(PROJECT_TARGET)

View file

@ -0,0 +1 @@
# huesoporro

View file

@ -1,38 +0,0 @@
# -*- mode: python ; coding: utf-8 -*-
from kivy_deps import sdl2, glew
a = Analysis(
['src\\markovbot_gui\\main.py'],
pathex=[],
binaries=[],
datas=[],
hiddenimports=[],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[],
noarchive=False,
optimize=0,
)
pyz = PYZ(a.pure)
exe = EXE(
pyz,
a.scripts,
a.binaries,
a.datas,
*[Tree(p) for p in (sdl2.dep_bins + glew.dep_bins)],
name='markovbot',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
upx_exclude=[],
runtime_tmpdir=None,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)

View file

@ -1,50 +1,41 @@
[project]
name = "markovbot-gui"
version = "0.1.2"
description = "Markov Chain Bot GUI"
name = "huesoporro"
version = "0.2.0"
description = "Misc Twitch bots"
readme = "README.md"
authors = [
{ name = "tomaarsen" },
{ name = "185504a9", email = "catalin@roboces.dev" }
]
requires-python = ">=3.11"
dependencies = [
"kivy[base]>=2.3.0",
"nltk>=3.9.1",
"pillow>=10.4.0",
"platformdirs>=4.3.6",
"pydantic>=2.9.2",
"pydantic-settings>=2.6.0",
"pyinstaller>=6.11.0",
"twitchwebsocket>=1.2.1",
"loguru>=0.7.2",
"ffmpeg>=1.4",
"ffmpeg-python>=0.2.0",
"gtts>=2.5.4",
"litestar[standard]>=2.13.0",
"httpx>=0.28.0",
]
[tool.uv]
dev-dependencies = [
"mypy>=1.13.0",
"pyright>=1.1.387",
"ruff>=0.7.0",
]
[[tool.mypy.overrides]]
module = [
"kivy",
"kivy.uix.widget",
"kivy.uix.popup",
"kivy.uix.button",
"kivy.uix.boxlayout",
"kivy.uix.textinput",
"kivy.uix.label",
"kivy.metrics",
"kivy.app",
"kivy.clock",
"nltk",
"nltk.tokenize",
"nltk.tokenize.treebank",
"nltk.tokenize.destructive",
"TwitchWebsocket",
"tokenizer"
"tokenizer",
"gtts"
]
ignore_missing_imports = true

View file

@ -1,154 +0,0 @@
import queue
import threading
from pathlib import Path
from traceback import print_exc
from kivy.clock import Clock
from kivy.metrics import dp
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.button import Button
from kivy.uix.textinput import TextInput
from loguru import logger
from src.markovbot_gui.libs.markov_chain_bot import MarkovChain
from src.markovbot_gui.libs.settings import Settings
class QueueHandler:
def __init__(self, queue):
self.queue = queue
def write(self, message):
self.queue.put(message)
def flush(self):
pass
class BotRunner(BoxLayout):
def __init__(self, settings_path: Path, **kwargs):
super().__init__(**kwargs)
self.settings_path = settings_path
self.orientation = "vertical"
self.spacing = dp(10)
self.padding = dp(20)
self.bot_thread = None
self.log_queue: queue.Queue = queue.Queue()
self.settings = Settings.read(self.settings_path)
self.queue_handler = QueueHandler(self.log_queue)
logger.remove()
logger.add(
self.queue_handler,
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}",
level=self.settings.log_level,
)
self.log_display = TextInput(
multiline=True,
readonly=True,
size_hint=(1, 1),
background_color=[0.1, 0.1, 0.1, 1], # Dark background
foreground_color=[0.9, 0.9, 0.9, 1], # Light text
)
self.add_widget(self.log_display)
# Create button layout
button_layout = BoxLayout(
orientation="horizontal",
size_hint=(1, None),
height=dp(40),
spacing=dp(10),
)
# Create start button
self.start_button = Button(
text="Start Bot",
size_hint=(None, None),
size=(dp(100), dp(40)),
)
self.start_button.bind(on_release=self.start_bot)
button_layout.add_widget(self.start_button)
# Create stop button
self.stop_button = Button(
text="Stop Bot",
size_hint=(None, None),
size=(dp(100), dp(40)),
disabled=True,
)
self.stop_button.bind(on_release=self.stop_bot)
button_layout.add_widget(self.stop_button)
# Create clear log button
self.clear_button = Button(
text="Clear Log",
size_hint=(None, None),
size=(dp(100), dp(40)),
)
self.clear_button.bind(on_release=self.clear_log)
button_layout.add_widget(self.clear_button)
self.add_widget(button_layout)
Clock.schedule_interval(self.update_log, 0.1)
def start_bot(self, instance=None):
try:
# Create and start bot thread
self.bot_thread = threading.Thread(target=self.run_bot_thread, daemon=True)
self.bot_thread.start()
self.start_button.disabled = True
self.stop_button.disabled = False
logger.info("Starting bot...")
except Exception as e: # noqa: BLE001
logger.error(f"Failed to start bot: {e}")
def run_bot_thread(self):
try:
self.bot = MarkovChain(self.settings)
self.bot.run_bot()
except Exception: # noqa: BLE001
logger.exception("Bot error")
finally:
Clock.schedule_once(lambda dt: self.reset_button_states(), 0)
def stop_bot(self, _=None):
self.bot.stop_bot()
# Wait for thread to finish
if self.bot_thread and self.bot_thread.is_alive():
self.bot_thread.join(timeout=3.0)
logger.info("Bot stopped")
self.reset_button_states()
def reset_button_states(self):
self.start_button.disabled = False
self.stop_button.disabled = True
def clear_log(self, instance=None):
self.log_display.text = ""
logger.info("Log cleared")
def update_log(self, dt):
try:
while not self.log_queue.empty():
message = self.log_queue.get_nowait()
if message.strip(): # Only add non-empty messages
self.log_display.text += message
# Keep only the last 1000 lines to prevent memory issues
lines = self.log_display.text.split("\n")
if len(lines) > 1000: # noqa: PLR2004
self.log_display.text = "\n".join(lines[-1000:]) + "\n"
# Auto-scroll to bottom
self.log_display.cursor = (0, len(self.log_display.text))
except queue.Empty:
pass
except Exception: # noqa: BLE001
print_exc()

View file

@ -1,161 +0,0 @@
from pathlib import Path
from kivy.clock import Clock
from kivy.metrics import dp
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.button import Button
from kivy.uix.label import Label
from kivy.uix.popup import Popup
from kivy.uix.textinput import TextInput
from src.markovbot_gui.libs.settings import Settings
from src.markovbot_gui.libs.timer import logger
class ConfigWindow(BoxLayout):
def __init__(self, config_path: Path, **kwargs):
super().__init__(**kwargs)
self.config_path = config_path
self.orientation = "vertical"
self.spacing = dp(10)
self.padding = dp(20)
# Load existing configuration
default_config = {
"Host": "irc.chat.twitch.tv",
"Port": 6667,
"Channel": "#<channel>",
"Nickname": "<name>",
"Authentication": "oauth:<auth>",
"DeniedUsers": ["StreamElements", "Nightbot", "Moobot", "Marbiebot"],
"Cooldown": 20,
"KeyLength": 2,
"MaxSentenceWordAmount": 25,
"MinSentenceWordAmount": -1,
"HelpMessageTimer": 60 * 60 * 5, # 18000 seconds, 5 hours
"AutomaticGenerationTimer": -1,
"WhisperCooldown": True,
"EnableGenerateCommand": True,
"SentenceSeparator": " - ",
"AllowGenerateParams": True,
}
if config_path.exists():
self.s = Settings.read(config_path)
else:
self.s = Settings(**default_config) # type: ignore[arg-type]
self.s.write(config_path)
# Create widgets
# Channel input
channel_layout = BoxLayout(
orientation="horizontal",
size_hint_y=None,
height=dp(40),
)
channel_label = Label(text="Channel:", size_hint_x=0.3)
self.channel_input = TextInput(
multiline=False,
size_hint_x=0.7,
text=self.s.channel,
)
channel_layout.add_widget(channel_label)
channel_layout.add_widget(self.channel_input)
# Nickname input
nickname_layout = BoxLayout(
orientation="horizontal",
size_hint_y=None,
height=dp(40),
)
nickname_label = Label(text="Nickname:", size_hint_x=0.3)
self.nickname_input = TextInput(
multiline=False,
size_hint_x=0.7,
text=self.s.nickname,
)
nickname_layout.add_widget(nickname_label)
nickname_layout.add_widget(self.nickname_input)
# Authentication input
auth_layout = BoxLayout(
orientation="horizontal",
size_hint_y=None,
height=dp(40),
)
auth_label = Label(text="Auth:", size_hint_x=0.3)
self.auth_input = TextInput(
multiline=False,
size_hint_x=0.7,
password=True,
text=self.s.authentication,
)
auth_layout.add_widget(auth_label)
auth_layout.add_widget(self.auth_input)
automatic_generation_label = Label(text="Automatic generation (seconds): ")
self.automatic_generation_input = TextInput(
multiline=False,
size_hint_x=0.7,
text=str(self.s.automatic_generation_timer),
)
automatic_generation_layout = BoxLayout(
orientation="horizontal",
size_hint_y=None,
height=dp(40),
)
automatic_generation_layout.add_widget(automatic_generation_label)
automatic_generation_layout.add_widget(self.automatic_generation_input)
# Save button
save_button = Button(
text="Save",
size_hint=(None, None),
size=(dp(100), dp(40)),
pos_hint={"center_x": 0.5},
)
save_button.bind(on_release=self.save_config)
# Add all widgets to the layout
self.add_widget(channel_layout)
self.add_widget(nickname_layout)
self.add_widget(auth_layout)
self.add_widget(automatic_generation_layout)
self.add_widget(save_button)
def save_config(self, instance):
try:
self.s.channel = self.channel_input.text.strip()
self.s.nickname = self.nickname_input.text.strip()
self.s.authentication = self.auth_input.text.strip()
self.s.automatic_generation_timer = int(
self.automatic_generation_input.text
)
if 0 < self.s.automatic_generation_timer < 29: # noqa: PLR2004
raise ValueError(
"Value for 'Automatic generation' must be at least 30 seconds, " # noqa: EM101
"or a negative number for no automatic generations."
)
self.s.write(self.config_path)
# Show success message
success_popup = Popup(
title="Success",
content=Label(text="Configuration saved successfully"),
size_hint=(None, None),
size=(dp(250), dp(100)),
)
success_popup.open()
Clock.schedule_once(success_popup.dismiss, 1)
except Exception as e: # noqa: BLE001
self.show_error_message(f"Failed to save configuration:\n{e!s}")
error_popup = Popup(
title="Error",
content=Label(text=f"Failed to save configuration:\n{e!s}"),
size_hint=(None, None),
size=(dp(400), dp(150)),
)
error_popup.open()
logger.exception("Failed to save configuration")

View file

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2019 CubieDev
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -1,314 +0,0 @@
# TwitchMarkovChain
Twitch Bot for generating messages based on what it learned from chat
---
## Explanation
When the bot has started, it will start listening to chat messages in the channel listed in the `settings.json` file. Any chat message not sent by a denied user will be learned from. Whenever someone then requests a message to be generated, a [Markov Chain](https://en.wikipedia.org/wiki/Markov_chain) will be used with the learned data to generate a sentence. **Note that the bot is unaware of the meaning of any of its inputs and outputs. This means it can use bad language if it was taught to use bad language by people in chat. You can add a list of banned words it should never learn or say. Use at your own risk.**
Whenever a message is deleted from chat, it's contents will be unlearned at 5 times the rate a normal message is learned from.
The bot will avoid learning from commands, or from messages containing links.
---
## How it works
### Sentence Parsing
To explain how the bot works, I will provide an example situation with two messages that are posted in Twitch chat. The messages are:
> Curly fries are the worst kind of fries
> Loud people are the reason I don't go to the movies anymore
Let's start with the first sentence and parse it like the bot will. To do so, we will split up the sentence in sections of `keyLength + 1` words. As `keyLength` has been set to `2` in the [Settings](#settings) section, each section has `3` words.
```txt
Curly fries are the worst kind of fries
[Curly fries:are]
[fries are:the]
[are the:worst]
[the worst:kind]
[worst kind:of]
[kind of:fries]
```
For each of these sections of three words, the last word is considered the output, while all other words it are considered inputs.
These words are then turned into a variation of a [Grammar](https://en.wikipedia.org/wiki/Formal_grammar):
```txt
"Curly fries" -> "are"
"fries are" -> "the"
"are the" -> "worst"
"the worst" -> "kind"
"worst kind" -> "of"
"kind of" -> "fries"
```
This can be considered a mathematical function that, when given input "the worst", will output "kind".
In order for the program to know where sentences begin, we also add the first `keyLength` words to a seperate Database table, where a list of possible starts of sentences reside.
This exact same process is applied to the second sentence as well. After doing so, the resulting grammar (and our corresponding database table) looks like:
```txt
"Curly fries" -> "are"
"fries are" -> "the"
"are the" -> "worst" | "reason"
"the worst" -> "kind"
"worst kind" -> "of"
"kind of" -> "fries"
"Loud people" -> "are"
"people are" -> "the"
"the reason" -> "I"
"reason I" -> "don't"
"I don't" -> "go"
"don't go" -> "to"
"go to" -> "the"
"to the" -> "movies"
"the movies" -> "anymore"
```
and in the database table for starts of sentences:
```txt
"Curly fries"
"Loud people"
```
Note that the | is considered to be _"or"_. In the case of the bold text above, it could be read as: if the given input is "are the", then the output is either _"worst"_ **or** _"reason"_.
In practice, more frequent phrases will have higher precedence. The more often a phrase is said, the more likely it is to be generated.
---
### Generation
When a message is generated with `!generate`, a random start of a sentence is picked from the database table of starts of sentences. In our example the randomly picked start is _"Curly fries"_.
Now, in a loop:
- The output for the input is generated via the grammar.
- And the input for the next iteration in the loop is shifted:
- Remove the first word from the input.
- Add the new output word to the end of the input.
So, the input starts as _"Curly Fries"_. The output for this input is generated via the grammar, which gives us _"are"_. Then, the input is updated. _"Curly"_ is removed, and _"are"_ is added to the input. The new input for the next iteration will be _"Fries are"_ as a result. This process repeats until no more words can be generated, or if a word limit is reached.
A more programmatic example of this would be this:
```python
# This initial sentence is either from the database for starts of sentences,
# or from words passed in Twitch chat
sentence = ["Curly", "fries"]
for i in range(sentence_length):
# Generate a word using last 2 words in the partial sentence,
# and append it to the partial sentence
sentence.append(generate(sentence[-2:]))
```
It's common for an input sequence to have multiple possible outputs, as we can see in the bold part of the previous grammar. This allows learned information from multiple messages to be merged into one message. For instance, some potential outputs from the given example are
> Curly fries are the reason I don't go to the movies anymore
or
> Loud people are the worst kind of fries
---
## Commands
Chat members can generate chat-like messages using the following commands (Note that they are aliases):
```txt
!generate [words]
!g [words]
```
Example:
```txt
!g Curly
```
Result (for example):
```txt
Curly fries are the reason I don't go to the movies anymore
```
- The bot will, when given this command, try to complete the start of the sentence which was given.
- If it cannot, an appropriate error message will be sent to chat.
- Any number of words may be given, including none at all.
- Everyone can use it.
Furthermore, chat members can find a link to [How it works](#how-it-works) by using one of the following commands:
```txt
!ghelp
!genhelp
!generatehelp
```
The use of this command makes the bot post this message in chat:
> Learn how this bot generates sentences here: <https://github.com/CubieDev/TwitchMarkovChain#how-it-works>
---
### Streamer commands
All of these commands can be whispered to the bot account, or typed in chat.
To disable the bot from generating messages, while still learning from regular chat messages:
```txt
!disable
```
After disabling the bot, it can be re-enabled using:
```txt
!enable
```
Changing the cooldown between generations is possible with one of the following two commands:
```txt
!setcooldown <seconds>
!setcd <seconds>
```
Example:
```txt
!setcd 30
```
Which sets the cooldown between generations to 30 seconds.
---
### Moderator commands
All of these commands must be whispered to the bot account.
Moderators (and the broadcaster) can modify the blacklist to prevent the bot learning words it shouldn't.
To add `word` to the blacklist, a moderator can whisper the bot:
```txt
!blacklist <word>
```
Similarly, to remove `word` from the blacklist, a moderator can whisper the bot:
```txt
!whitelist <word>
```
And to check whether `word` is already on the blacklist or not, a moderator can whisper the bot:
```txt
!check <word>
```
---
## Settings
This bot is controlled by a `settings.json` file, which has the following structure:
```json
{
"Host": "irc.chat.twitch.tv",
"Port": 6667,
"Channel": "#<channel>",
"Nickname": "<name>",
"Authentication": "oauth:<auth>",
"DeniedUsers": ["StreamElements", "Nightbot", "Moobot", "Marbiebot"],
"AllowedUsers": [],
"Cooldown": 20,
"KeyLength": 2,
"MaxSentenceWordAmount": 25,
"MinSentenceWordAmount": -1,
"HelpMessageTimer": 18000,
"AutomaticGenerationTimer": -1,
"WhisperCooldown": true,
"EnableGenerateCommand": true,
"SentenceSeparator": " - ",
"AllowGenerateParams": true,
"GenerateCommands": ["!generate", "!g"]
}
```
| **Parameter** | **Meaning** | **Example** |
| -------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------- |
| `Host` | The URL that will be used. Do not change. | `"irc.chat.twitch.tv"` |
| `Port` | The Port that will be used. Do not change. | `6667` |
| `Channel` | The Channel that will be connected to. | `"#CubieDev"` |
| `Nickname` | The Username of the bot account. | `"CubieB0T"` |
| `Authentication` | The OAuth token for the bot account. | `"oauth:pivogip8ybletucqdz4pkhag6itbax"` |
| `DeniedUsers` | The list of (bot) accounts whose messages should not be learned from. The bot itself it automatically added to this. | `["StreamElements", "Nightbot", "Moobot", "Marbiebot"]` |
| `AllowedUsers` | A list of users with heightened permissions. Gives these users the same power as the channel owner, allowing them to bypass cooldowns, set cooldowns, disable or enable the bot, etc. | `["Michelle", "Cubie"]` |
| `Cooldown` | A cooldown in seconds between successful generations. If a generation fails (eg inputs it can't work with), then the cooldown is not reset and another generation can be done immediately. | `20` |
| `KeyLength` | A technical parameter which, in my previous implementation, would affect how closely the output matches the learned inputs. In the current implementation the database structure does not allow this parameter to be changed. Do not change. | `2` |
| `MaxSentenceWordAmount` | The maximum number of words that can be generated. Prevents absurdly long and spammy generations. | `25` |
| `MinSentenceWordAmount` | The minimum number of words that can be generated. Might generate multiple sentences, separated by the value from `SentenceSeparator`. Prevents very short generations. -1 to disable. | `-1` |
| `HelpMessageTimer` | The amount of seconds between sending help messages that links to [How it works](#how-it-works). -1 for no help messages. Defaults to once every 5 hours. | `18000` |
| `AutomaticGenerationTimer` | The amount of seconds between automatically sending a generated message, as if someone wrote `!g`. -1 for no automatic generations. | `-1` |
| `WhisperCooldown` | Allows the bot to whisper a user the remaining cooldown after that user has attempted to generate a message. | `true` |
| `EnableGenerateCommand` | Globally enables/disables the generate command. | `true` |
| `SentenceSeparator` | The separator between multiple sentences. Only relevant if `MinSentenceWordAmount` > 0, as only then can multiple sentences be generated. Sensible values for this might be `", "`, `". "`, `" - "` or `" "`. | `" - "` |
| `AllowGenerateParams` | Allow chat to supply a partial sentence which the bot finishes, e.g. `!generate hello, I am`. If `false`, all values after the generation command will be ignored. | `true` |
| `GenerateCommands` | The generation commands that the bot will listen for. Defaults to `["!generate", "!g"]`. Useful if your chat is used to commands with `~`, `-`, `/`, etc. | `["!generate", "!g"]` |
_Note that the example OAuth token is not an actual token, but merely a generated string to give an indication what it might look like._
I got my real OAuth token from <https://twitchapps.com/tmi/>.
---
### Blacklist
You may add words to a blacklist by adding them on a separate line in `blacklist.txt`. Each word is case insensitive. By default, this file only contains `<start>` and `<end>`, which are required for the current implementation.
Words can also be added or removed from the blacklist via whispers, as is described in the [Moderator Command](#moderator-commands) section.
---
## Requirements
- [Python 3.6+](https://www.python.org/downloads/)
- [Module requirements](requirements.txt)
- Install these modules using `pip install -r requirements.txt` in the commandline.
Among these modules is my own [TwitchWebsocket](https://github.com/tomaarsen/TwitchWebsocket) wrapper, which makes making a Twitch chat bot a lot easier.
This repository can be seen as an implementation using this wrapper.
---
### Contributors
My gratitude is extended to the following contributors who've decided to help out.
* [@DoctorInsano](https://github.com/DoctorInsano) - Several small fixes and improvements in [v1.0](https://github.com/tomaarsen/TwitchMarkovChain/releases/tag/v1.0).
* [@justinrusso](https://github.com/justinrusso) - Several features, refactors and fixes, that represent the core of [v2.0](https://github.com/tomaarsen/TwitchMarkovChain/releases/tag/v2.0) and [v2.1](https://github.com/tomaarsen/TwitchMarkovChain/releases/tag/v2.1).
---
## Other Twitch Bots
- [TwitchAIDungeon](https://github.com/CubieDev/TwitchAIDungeon)
- [TwitchGoogleTranslate](https://github.com/CubieDev/TwitchGoogleTranslate)
- [TwitchCubieBotGUI](https://github.com/CubieDev/TwitchCubieBotGUI)
- [TwitchCubieBot](https://github.com/CubieDev/TwitchCubieBot)
- [TwitchRandomRecipe](https://github.com/CubieDev/TwitchRandomRecipe)
- [TwitchUrbanDictionary](https://github.com/CubieDev/TwitchUrbanDictionary)
- [TwitchRhymeBot](https://github.com/CubieDev/TwitchRhymeBot)
- [TwitchWeather](https://github.com/CubieDev/TwitchWeather)
- [TwitchDeathCounter](https://github.com/CubieDev/TwitchDeathCounter)
- [TwitchSuggestDinner](https://github.com/CubieDev/TwitchSuggestDinner)
- [TwitchPickUser](https://github.com/CubieDev/TwitchPickUser)
- [TwitchSaveMessages](https://github.com/CubieDev/TwitchSaveMessages)
- [TwitchMMLevelPickerGUI](https://github.com/CubieDev/TwitchMMLevelPickerGUI) (Mario Maker 2 specific bot)
- [TwitchMMLevelQueueGUI](https://github.com/CubieDev/TwitchMMLevelQueueGUI) (Mario Maker 2 specific bot)
- [TwitchPackCounter](https://github.com/CubieDev/TwitchPackCounter) (Streamer specific bot)
- [TwitchDialCheck](https://github.com/CubieDev/TwitchDialCheck) (Streamer specific bot)
- [TwitchSendMessage](https://github.com/CubieDev/TwitchSendMessage) (Meant for debugging purposes)

View file

@ -1,936 +0,0 @@
import logging
import random
import sqlite3
import string
from typing import Any
import platformdirs
from loguru import logger
class Database:
"""
The database created is called `MarkovChain_{channel}.db`,
and populated with 27 + 27^2 = 756 tables. Firstly, 27 tables with the structure of
"MarkovStart{char}", i.e. called:
> MarkovStartA
> MarkovStartB
> ...
> MarkovStartZ
> MarkovStart_
These tables store the first two words of a sentence, alongside a "count" frequency.
The suffix of the table name is the first character of the first word in the entry.
For example, from a sentence "I am the developer of this bot", "I am" is learned by creating
or updating an entry in MarkovStartI where the first word is "I", the second word is "am",
and the "count" value increments every time the sequence "I am" was learned.
If instead we learn, "[he said hello]", then "[he said" is learned by creating or updating
an entry in MarkovStart_.
Alongside the MarkovStart... tables, there are 729 tables called "MarkovGrammar{char}{char}",
i.e. called:
> MarkovGrammarAA
> MarkovGrammarAB
> ...
> MarkovGrammarAZ
> MarkovGrammarA_
> MarkovGrammarBA
> MarkovGrammarBB
> ...
> MarkovGrammar_Z
> MarkovGrammar__
These tables store 3-grams, alongside a "count" frequency of this 3-gram. The suffix of the
table name is the first character of the first word in the 3-gram, with the first character
of the second word in the 3-gram.
If we revisit the example of "I am the developer of this bot", we learn the following 3-grams:
> "I am the"
> "am the developer"
> "the developer of"
> "developer of this"
> "of this bot"
> "this bot <END>"
The 3-gram "am the developer" will be placed in MarkovGrammarAT, by creating or updating an entry
where the first word is "am", the second is "the", and the third "developer", while the "count"
frequency is incremented every time the 3-gram "am the developer" is learned.
The core of the knowledge base are the MarkovGrammar tables, which can be used to create
functions that take a certain number of words as input, and then generate a new word. For example:
Given "I am", we can use the MarkovGrammarIA table to look for entries that have "I" as the first word,
and "am" as the second word. If there are multiple options, we can use the "count" frequency as
weights to pick an appropriate "next word".
Important notes:
- Learning is *case sensitive*. The 3-gram "YOU ARE A" will become a different entry than "you are a".
This is most important when learning emotes, where the distinction between "Kappa" and "kappa" truly is important.
- Generating is *case insensitive*. Generating when using "YOU ARE" as the previous words to use in e.g. self.get_next()
will get the same results as generating using "you are".
- Learning and generating is *punctuation insensitive*. Each sentence is tokenized to split commas, dots, apostrophes, etc.
As a result, the sentence "Hello, I'm Tom!" is tokenized to: ["Hello", ",", "I", "'m", "Tom", "!"]. Then, 3-grams of this
is learned.
- Both learning and generating is *punctuation sensitive*. "Hello, how are" will learn and generate differently than
"Hello how are", as the first word is taken as "Hello,", which differs from "Hello".
A solution is to completely remove punctuation. Before learning, before generating, etc.
Essentially ignore that it exists.
However, this is not entirely desirable. In a perfect world, we would like to learn "hello,"
and "hello" differently, just like "HELLO" and "hello", but allow generating from "hello"
to both get results from "hello" and "hello,".
"""
def __init__(self, channel: str):
self.user_data_path = platformdirs.user_data_path(
"markovbot_gui",
ensure_exists=True,
)
self.db_path = (
self.user_data_path / f"MarkovChain_{channel.replace('#', '').lower()}.db"
)
self._execute_queue: list = []
if self.db_path.is_file():
# Ensure the database is updated to the newest version
self.update_v1(channel)
self.update_v2()
self.update_v3(channel)
self.update_v4()
# Create database tables.
for first_char in [*list(string.ascii_uppercase), "_"]:
self.add_execute_queue(
f"""
CREATE TABLE IF NOT EXISTS MarkovStart{first_char} (
word1 TEXT COLLATE NOCASE,
word2 TEXT COLLATE NOCASE,
count INTEGER,
PRIMARY KEY (word1 COLLATE BINARY, word2 COLLATE BINARY)
);
""",
auto_commit=False,
)
for second_char in [*list(string.ascii_uppercase), "_"]:
self.add_execute_queue(
f"""
CREATE TABLE IF NOT EXISTS MarkovGrammar{first_char}{second_char} (
word1 TEXT COLLATE NOCASE,
word2 TEXT COLLATE NOCASE,
word3 TEXT COLLATE NOCASE,
count INTEGER,
PRIMARY KEY (word1 COLLATE BINARY, word2 COLLATE BINARY, word3 COLLATE BINARY)
);
""",
auto_commit=False,
)
sql = """
CREATE TABLE IF NOT EXISTS WhisperIgnore (
username TEXT,
PRIMARY KEY (username)
);
"""
self.add_execute_queue(sql)
# Add a version entry
sql = """
CREATE TABLE IF NOT EXISTS Version (
version INTEGER
);
"""
self.add_execute_queue(sql)
self.add_execute_queue("DELETE FROM Version;")
self.add_execute_queue("INSERT INTO Version (version) VALUES (3);")
self.execute_commit()
# Used for randomly picking a Markov Grammar if only one word is given
# Index 0 is for "A", 1 for "B", etc. Then, 26 is for "_"
self.word_frequency = [
11.6,
4.4,
5.2,
3.1,
2.8,
4,
1.6,
4.2,
7.3,
0.5,
0.8,
2.4,
3.8,
2.2,
7.6,
4.3,
0.2,
2.8,
6.6,
15.9,
1.1,
0.8,
5.5,
0.1,
0.7,
0.1,
0.5,
]
def update_v1(self, channel: str):
"""Update the Database structure from a deprecated version to a newer one.
Args:
channel (str): The name of the Twitch channel on which the bot is running.
"""
# If an old version of the Database is used, update the database
if ("MarkovGrammarA",) in self.execute(
"SELECT name FROM sqlite_master WHERE type='table';",
fetch=True,
):
logger.info("Creating backup before updating Database...")
# Connect to both the new and backup, backup, and close both
def progress(status, remaining, total):
logging.debug(f"Copied {total-remaining} of {total} pages...")
conn = sqlite3.connect(f"MarkovChain_{channel.replace('#', '').lower()}.db")
back_conn = sqlite3.connect(
f"MarkovChain_{channel.replace('#', '').lower()}_backup.db",
)
with back_conn:
conn.backup(back_conn, pages=1000, progress=progress)
conn.close()
back_conn.close()
logger.info("Created backup before updating Database...")
logger.info("Updating Database to new version for improved efficiency...")
# Rename ...Other to ..._
self.add_execute_queue("""
CREATE TABLE IF NOT EXISTS MarkovStart_ (
word1 TEXT COLLATE NOCASE,
word2 TEXT COLLATE NOCASE,
occurances INTEGER,
PRIMARY KEY (word1 COLLATE BINARY, word2 COLLATE BINARY)
);
""")
self.add_execute_queue("""
CREATE TABLE IF NOT EXISTS MarkovGrammar_ (
word1 TEXT COLLATE NOCASE,
word2 TEXT COLLATE NOCASE,
word3 TEXT COLLATE NOCASE,
occurances INTEGER,
PRIMARY KEY (word1 COLLATE BINARY, word2 COLLATE BINARY, word3 COLLATE BINARY)
);
""")
self.execute_commit()
# Copy data from Other to _ and remove Other
self.add_execute_queue(
"INSERT INTO MarkovGrammar_ SELECT * FROM MarkovGrammarOther;",
)
self.add_execute_queue(
"INSERT INTO MarkovStart_ SELECT * FROM MarkovStartOther;",
)
self.add_execute_queue("DROP TABLE MarkovGrammarOther")
self.add_execute_queue("DROP TABLE MarkovStartOther")
self.execute_commit()
# Copy all data from MarkovGrammarx where x is some digit to MarkovGrammar_,
# Same with MarkovStart.
for character in list(string.digits):
self.add_execute_queue(
f"INSERT INTO MarkovGrammar_ SELECT * FROM MarkovGrammar{character}", # noqa: S608
)
self.add_execute_queue(f"DROP TABLE MarkovGrammar{character}")
self.add_execute_queue(
f"INSERT INTO MarkovStart_ SELECT * FROM MarkovStart{character}", # noqa: S608
)
self.add_execute_queue(f"DROP TABLE MarkovStart{character}")
self.execute_commit()
# Split up MarkovGrammarA into MarkovGrammarAA, MarkovGrammarAB, etc.
for first_char in [*list(string.ascii_uppercase), "_"]:
for second_char in list(string.ascii_uppercase):
self.add_execute_queue(f"""
CREATE TABLE IF NOT EXISTS MarkovGrammar{first_char}{second_char} (
word1 TEXT COLLATE NOCASE,
word2 TEXT COLLATE NOCASE,
word3 TEXT COLLATE NOCASE,
occurances INTEGER,
PRIMARY KEY (word1 COLLATE BINARY, word2 COLLATE BINARY, word3 COLLATE BINARY)
);
""")
self.add_execute_queue(
f'INSERT INTO MarkovGrammar{first_char}{second_char} SELECT * FROM MarkovGrammar{first_char} WHERE word2 LIKE "{second_char}%";', # noqa: S608
)
self.add_execute_queue(
f'DELETE FROM MarkovGrammar{first_char} WHERE word2 LIKE "{second_char}%";', # noqa: S608
)
self.add_execute_queue(f"""
CREATE TABLE IF NOT EXISTS MarkovGrammar{first_char}_ (
word1 TEXT COLLATE NOCASE,
word2 TEXT COLLATE NOCASE,
word3 TEXT COLLATE NOCASE,
occurances INTEGER,
PRIMARY KEY (word1 COLLATE BINARY, word2 COLLATE BINARY, word3 COLLATE BINARY)
);
""")
self.add_execute_queue(
f"INSERT INTO MarkovGrammar{first_char}_ SELECT * FROM MarkovGrammar{first_char};", # noqa: S608
)
self.add_execute_queue(f"DROP TABLE MarkovGrammar{first_char}")
self.execute_commit()
logger.info("Finished Updating Database to new version.")
def update_v2(self):
"""Update the Database structure from a deprecated version to a newer one.
This update involves a typo.
Args:
channel (str): The name of the Twitch channel on which the bot is running.
"""
# Resolve typo in Database
if self.execute(
"SELECT * FROM PRAGMA_TABLE_INFO('MarkovGrammarAA') WHERE name='occurances';",
fetch=True,
):
logger.info("Updating Database to new version...")
for first_char in [*list(string.ascii_uppercase), "_"]:
for second_char in [*list(string.ascii_uppercase), "_"]:
self.execute(
f"ALTER TABLE MarkovGrammar{first_char}{second_char} RENAME COLUMN occurances TO count;",
)
self.execute(
f"ALTER TABLE MarkovStart{first_char} RENAME COLUMN occurances TO count;",
)
logger.info("Finished Updating Database to new version.")
def update_v3(self, channel: str) -> None: # noqa: C901, PLR0915
"""Update the Database structure to mark punctuation as a separate word.
Previously, "Hello," was a valid single word. Now, it would be split as "Hello" and ",".
This allows people to generate "!g hello", and have the bot generate "hello, how are you?",
or have "!g it" result in "it's a wonderful day".
This first copies `MarkovChain_{channel}.db` to `MarkovChain_{channel}_modified.db`.
This new copy is then modified. The original is never changed, to avoid issues when the
update is interrupted. As a result, running the program again will just re-attempt the
update.
Upon completing the update, the original database is renamed to
`MarkovChain_{channel}_backup.db`, while the newly modified `MarkovChain_{channel}_modified.db`
is renamed to `MarkovChain_{channel}.db`.
*This `MarkovChain_{channel}_backup.db` file can safely be deleted, as it is NOT used*
This function also adds a `Version` table, and sets the version to 3.
Args:
channel (str): The name of the Twitch channel on which the bot is running.
"""
# Get Database version. Throws OperationalError if the Version table does not exist,
# in which case we definitely want to upgrade.
try:
version = self.execute(
"SELECT version FROM Version ORDER BY version DESC LIMIT 1;",
fetch=True,
)
except sqlite3.OperationalError:
version = []
# Whether to upgrade
if not version or version[0][0] < 3: # noqa: PLR2004
logger.info(
"Updating Database to new version - supports better punctuation handling.",
)
from shutil import copyfile
from nltk import ngrams
from src.markovbot_gui.libs.tokenizer import tokenize
channel = channel.replace("#", "").lower()
copyfile(
self.db_path,
self.user_data_path / f"MarkovChain_{channel}_modified.db",
)
logger.info(
f'Created a copy of the database called "MarkovChain_{channel}_modified.db". The update will modify this file.',
)
# Temporarily set self.db_name to the modified one
self.db_path = (
self.user_data_path
/ f"MarkovChain_{channel.replace('#', '').lower()}_modified.db"
)
# Create database tables.
for first_char in [*list(string.ascii_uppercase), "_"]:
table = f"MarkovStart{first_char}"
self.add_execute_queue(
f"""
CREATE TABLE IF NOT EXISTS {table}_modified (
word1 TEXT COLLATE NOCASE,
word2 TEXT COLLATE NOCASE,
count INTEGER,
PRIMARY KEY (word1 COLLATE BINARY, word2 COLLATE BINARY)
);
""",
auto_commit=False,
)
for second_char in [*list(string.ascii_uppercase), "_"]:
table = f"MarkovGrammar{first_char}{second_char}"
self.add_execute_queue(
f"""
CREATE TABLE IF NOT EXISTS {table}_modified (
word1 TEXT COLLATE NOCASE,
word2 TEXT COLLATE NOCASE,
word3 TEXT COLLATE NOCASE,
count INTEGER,
PRIMARY KEY (word1 COLLATE BINARY, word2 COLLATE BINARY, word3 COLLATE BINARY)
);
""",
auto_commit=False,
)
self.execute_commit()
def modify_start(table_name: str) -> None:
"""Read all data from `table_name`, re-tokenize it, distribute the new first 2 tokens to _modified tables, and drop `table`.
Args:
table_name (str): The name of the table to work on.
"""
data = self.execute(f"SELECT * FROM {table_name};", fetch=True) # noqa: S608
for tup in data:
# Remove "count" from tup for now
count = tup[-1]
tup = tup[:-1] # noqa: PLW2901
raw_string = " ".join(tup)
tokenized = tokenize(raw_string)
two_gram = tokenized[:2]
# In case there was some issue in the previous Database
if len(two_gram) < 2: # noqa: PLR2004
continue
self.add_execute_queue(
f"""
INSERT OR REPLACE INTO MarkovStart{self.get_suffix(two_gram[0][0])}_modified (word1, word2, count)
VALUES (?, ?, coalesce (
(
SELECT count + {count} FROM MarkovStart{self.get_suffix(two_gram[0][0])}_modified
WHERE word1 = ? COLLATE BINARY
AND word2 = ? COLLATE BINARY
),
1
)
)""", # noqa: S608
values=two_gram + two_gram,
auto_commit=False,
)
self.execute(f"DROP TABLE {table_name};")
def modify_grammar(table_name: str) -> None:
"""Read all data from `table_name`, re-tokenize it, distribute the new 3-grams to _modified tables, and drop `table`.
Args:
table_name (str): The name of the table to work on.
"""
data = self.execute(f"SELECT * FROM {table_name};", fetch=True) # noqa: S608
for tup in data:
# Remove "count" from tup for now
count = tup[-1]
tup = tup[:-1] # noqa: PLW2901
# If ends on "<END>", ignore that in in the tuple, as we don't want it to get
# tokenized.
end = False
if tup[-1] == "<END>":
end = True
tup = tup[:-1] # noqa: PLW2901
raw_string = " ".join(tup)
tokenized = tokenize(raw_string)
# Re-add "<END>"
if end:
tokenized.append("<END>")
for ngram in ngrams(tokenized, 3):
# Filter out recursive case.
if self.check_equal(ngram):
continue
self.add_execute_queue(
f"""
INSERT OR REPLACE INTO MarkovGrammar{self.get_suffix(ngram[0][0])}{self.get_suffix(ngram[1][0])}_modified (word1, word2, word3, count)
VALUES (?, ?, ?, coalesce (
(
SELECT count + {count} FROM MarkovGrammar{self.get_suffix(ngram[0][0])}{self.get_suffix(ngram[1][0])}_modified
WHERE word1 = ? COLLATE BINARY
AND word2 = ? COLLATE BINARY
AND word3 = ? COLLATE BINARY
),
1
)
)""", # noqa: S608
values=ngram + ngram,
auto_commit=False,
)
self.execute(f"DROP TABLE {table_name};")
# Modify all tables
i = 0
total = 27 * 27 + 27 # The number of tables to convert
for first_char in [*list(string.ascii_uppercase), "_"]:
table = f"MarkovStart{first_char}"
modify_start(table)
i += 1
for second_char in [*list(string.ascii_uppercase), "_"]:
table = f"MarkovGrammar{first_char}{second_char}"
modify_grammar(table)
i += 1
logger.debug(
f"[{i / total * 100:.2f}%] Scheduled updates for the tables for words starting in {first_char}.",
)
logger.info("Starting executing table update...")
self.execute_commit()
logger.info("Finished executing table update.")
# Rename the _modified tables to normal tables again
for first_char in [*list(string.ascii_uppercase), "_"]:
table = f"MarkovStart{first_char}"
self.add_execute_queue(
f"ALTER TABLE {table}_modified RENAME TO {table};",
auto_commit=False,
)
for second_char in [*list(string.ascii_uppercase), "_"]:
table = f"MarkovGrammar{first_char}{second_char}"
self.add_execute_queue(
f"ALTER TABLE {table}_modified RENAME TO {table};",
auto_commit=False,
)
self.execute_commit()
# Turn the non-modified, old version of the Database into a "_backup.db" file,
# and turn the modified file into the new main file.
self.db_path.rename(self.db_path / f"MarkovChain_{channel}_backup.db")
(self.user_data_path / f"MarkovChain_{channel}_modified.db").rename(
self.db_path / f"MarkovChain_{channel}.db",
)
# os.rename(f"MarkovChain_{channel}.db", f"MarkovChain_{channel}_backup.db") # noqa: ERA001
# os.rename(f"MarkovChain_{channel}_modified.db", f"MarkovChain_{channel}.db") # noqa: ERA001
# Revert to using .db instead of _modified.db
self.db_path = (
self.user_data_path
/ f"MarkovChain_{channel.replace('#', '').lower()}.db"
)
# Add a version entry
self.execute("""CREATE TABLE IF NOT EXISTS Version (
version INTEGER
);""")
self.execute("DELETE FROM Version;")
self.execute("INSERT INTO Version (version) VALUES (3);")
logger.info(
f'Renamed original database file "MarkovChain_{channel}.db" to "MarkovChain_{channel}_backup.db". This file is *not* used, and can safely be deleted.',
)
logger.info(
f'Renamed updated database file "MarkovChain_{channel}_modified.db" to "MarkovChain_{channel}.db".',
)
logger.info(
f'This updated "MarkovChain_{channel}.db" will be used to drive the Twitch bot.',
)
def update_v4(self):
"""Update the db schema to allow storing quotes in a new `quotes` table. Besides the id, the table stores the
quote and the author. The quote is unique.
"""
# Get Database version. Throws OperationalError if the Version table does not exist,
# in which case we definitely want to upgrade.
try:
version = self.execute(
"SELECT version FROM Version ORDER BY version DESC LIMIT 1;",
fetch=True,
)
except sqlite3.OperationalError:
version = []
if not version or version[0][0] < 4: # noqa: PLR2004
logger.info("Updating db to the v4 version")
self.execute("""CREATE TABLE IF NOT EXISTS quotes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
quote TEXT UNIQUE,
author TEXT
);""")
def add_execute_queue(
self,
sql: str,
values: tuple[Any] | list[Any] | None = None,
auto_commit: bool = True,
) -> None:
"""Add query and corresponding values to a queue, to be executed all at once.
This entire queue can be executed with `self.execute_commit`,
and the queue is automatically executed if there are more than 25 waiting queries.
Args:
sql (str): The SQL query to add, potentially with "?" for where
a value ought to be filled in.
values ([tuple[Any]], optional): Optional tuple of values to replace "?" in SQL queries.
Defaults to None.
"""
if values is not None:
self._execute_queue.append((sql, values))
else:
self._execute_queue.append((sql,))
# Commit these executes if there are more than 25 queries
if auto_commit and len(self._execute_queue) > 25: # noqa: PLR2004
self.execute_commit()
def execute_commit(self, fetch: bool = False) -> Any:
"""Execute the SQL queries added to the queue with `self.add_execute_queue`.
Args:
fetch (bool, optional): Whether to return the fetchall() of the SQL queries.
Defaults to False.
Returns:
Any: The returned values from the SQL queries if `fetch` is true, otherwise None.
"""
if self._execute_queue:
with sqlite3.connect(self.db_path) as conn:
cur = conn.cursor()
cur.execute("begin")
for sql in self._execute_queue:
cur.execute(*sql)
self._execute_queue.clear()
cur.execute("commit")
if fetch:
return cur.fetchall()
return None
def execute(self, sql: str, values: tuple[Any] | None = None, fetch: bool = False):
"""Execute the SQL query with the corresponding values, potentially returning a result.
Args:
sql (str): The SQL query to add, potentially with "?" for where
a value ought to be filled in.
values ([tuple[Any]], optional): Optional tuple of values to replace "?" in SQL queries.
Defaults to None.
fetch (bool, optional): Whether to return the fetchall() of the SQL queries.
Defaults to False.
Returns:
Any: The returned values from the SQL queries if `fetch` is true, otherwise None.
"""
with sqlite3.connect(self.db_path) as conn:
cur = conn.cursor()
if values is None:
cur.execute(sql)
else:
cur.execute(sql, values)
conn.commit()
if fetch:
return cur.fetchall()
return None
@staticmethod
def get_suffix(character: str) -> str:
"""Transform a character into a member of string.ascii_lowercase or "_".
Args:
character (str): The character to normalize.
Returns:
str: The normalized character
"""
if character.lower() in string.ascii_lowercase:
return character.upper()
return "_"
def add_whisper_ignore(self, username: str) -> None:
"""Add `username` to the WhisperIgnore table, indicating that they do not wish to be whispered.
Args:
username (str): The username of the user who no longer wants to be whispered.
"""
self.execute(
"""
INSERT OR IGNORE INTO WhisperIgnore(username)
SELECT ?;""",
values=(username,),
)
def check_whisper_ignore(self, username: str) -> list[tuple[str]]:
"""Returns a non-empty list only if `username` is in the WhisperIgnore table.
Otherwise, returns an empty list. Is used to ensure that a user who doesn't want to be
whispered is never whispered.
Args:
username (str): The username of the user to check.
Returns:
list[tuple[str]]: Either an empty list, or [('test_user',)].
Allows the use of `if not check_whisper_ignore(user): whisper(user)`
"""
return self.execute(
"""
SELECT username FROM WhisperIgnore
WHERE username = ?;""",
values=(username,),
fetch=True,
)
def remove_whisper_ignore(self, username: str) -> None:
"""Remove `username` from the WhisperIgnore table, indicating that they want to be whispered again.
Args:
username (str): The username of the user who wants to be whispered again.
"""
self.execute(
"""
DELETE FROM WhisperIgnore
WHERE username = ?;""",
values=(username,),
)
@staticmethod
def check_equal(items: list) -> bool:
"""True if `items` consists of items that are all identical
Useful for checking if we're learning that a sequence of the same words leads to the same word,
which can cause infinite loops when generating.
Args:
items (list): The list of objects for which we want to check if they are all identical.
Returns:
bool: True if `l` consists of items that are all identical
"""
return items[0] * len(items) == items
def get_next(self, index: int, words: list | tuple) -> str | None:
"""Generate the next word in the sentence using learned data, given the previous `key_length` words.
`key_length` is set to 2 by default, and cannot easily be changed.
Args:
index (int): The index of this new word in the sentence.
words (list[str]): The previous 2 words.
Returns:
Optional[str]: The next word in the sentence, generated given the learned data.
"""
# Get all items
data = self.execute(
f"""
SELECT word3, count FROM MarkovGrammar{self.get_suffix(words[0][0])}{self.get_suffix(words[1][0])}
WHERE word1 = ? AND word2 = ?;""", # noqa: S608
values=words, # type: ignore[arg-type]
fetch=True,
)
# Return a word picked from the data, using count as a weighting factor
return None if len(data) == 0 else self.pick_word(data, index)
def get_next_initial(self, index: int, words) -> str | None:
"""Generate the next word in the sentence using learned data, given the previous `key_length` words.
`key_length` is set to 2 by default, and cannot easily be changed.
Similar to `get_next`, with the exception that it cannot immediately generate "<END>"
Args:
index (int): The index of this new word in the sentence.
words (list[str]): The previous 2 words.
Returns:
Optional[str]: The next word in the sentence, generated given the learned data.
"""
# Get all items
data = self.execute(
f"""
SELECT word3, count FROM MarkovGrammar{self.get_suffix(words[0][0])}{self.get_suffix(words[1][0])}
WHERE word1 = ? AND word2 = ? AND word3 != '<END>';""", # noqa: S608
values=words,
fetch=True,
)
# Return a word picked from the data, using count as a weighting factor
return None if len(data) == 0 else self.pick_word(data, index)
def get_next_single_initial(self, index: int, word: str) -> list[str] | None:
"""Generate the next word in the sentence using learned data, given the previous word.
Randomly picks a start character for the second word by weighing all uppercase letters and "_" with their word frequency.
Args:
index (int): The index of this new word in the sentence.
word (str): The previous word.
Returns:
Optional[list[str]]: The previous and newly generated word in the sentence as a list, generated given the learned data.
So, the previous word is taken directly the input of this method, and the second word is generated.
"""
# Randomly pick first character for the second word
char_two = random.choices( # noqa: S311
string.ascii_uppercase + "_",
weights=self.word_frequency,
)[0]
# Get all items
data = self.execute(
f"""
SELECT word2, count FROM MarkovGrammar{self.get_suffix(word[0])}{char_two}
WHERE word1 = ? AND word2 != '<END>';""", # noqa: S608
values=(word,),
fetch=True,
)
# Return a word picked from the data, using count as a weighting factor
return None if len(data) == 0 else [word, self.pick_word(data, index)]
def get_next_single_start(self, word: str) -> list[str] | None:
"""Generate the second word in the sentence using learned data, given the very first word in the sentence.
Args:
word (str): The first word in the sentence.
Returns:
Optional[list[str]]: The first and second word in the sentence as a list, generated given the learned data.
So, the first word is taken directly the input of this method, and the second word is generated.
"""
# Get all items
data = self.execute(
f"""
SELECT word2, count FROM MarkovStart{self.get_suffix(word[0])}
WHERE word1 = ?;""", # noqa: S608
values=(word,),
fetch=True,
)
# Return a word picked from the data, using count as a weighting factor
return None if len(data) == 0 else [word, self.pick_word(data)]
@staticmethod
def pick_word(data: list[tuple[str, int]], index: int = 0) -> str:
"""Randomly pick a word from `data` with word frequency as the weight.
`index` is further used to decrease the weight of the <END> token for the first 15 words
in the sequence, and then increase the weight after the 15th index.
Args:
data ([type]): A list of word - frequency pairs, e.g.
[('"the', 1), ('long', 1), ('well', 5), ('an', 2), ('a', 3), ('much', 1)]
index (int, optional): The index of the newly generated word in the sentence.
Used for modifying how often the <END> token occurs. Defaults to 0.
Returns:
str: The pseudo-randomly picked word.
"""
return random.choices( # noqa: S311
data,
weights=[
tup[-1] * ((index + 1) / 15) if tup[0] == "<END>" else tup[-1]
for tup in data
],
)[0][0]
def get_start(self) -> list[str]:
"""Get a list of two words that mark as the start of a sentence.
This is randomly gathered from MarkovStart{character}.
Returns:
list[str]: A list of two starting words, such as ["I", "am"].
"""
# Find one character start from
character = random.choices( # noqa: S311
[*list(string.ascii_lowercase), "_"],
weights=self.word_frequency,
k=1,
)[0]
# Get all first word, second word, frequency triples,
# e.g. [("I", "am", 3), ("You", "are", 2), ...]
data = self.execute(f"SELECT * FROM MarkovStart{character};", fetch=True) # noqa: S608
# If nothing has ever been said
if len(data) == 0:
return []
# Return a (weighted) randomly chosen 2-gram
return list(
random.choices(data, weights=[tup[-1] for tup in data], k=1)[0][:-1], # noqa: S311
)
def add_rule_queue(self, item: list[str]) -> None:
"""Adds a rule to the queue, ready to be entered into the knowledge base, given a 3-gram `item`.
The rules on the queue are added with `self.add_execute_queue`,
which automatically executes the queries in the queue when there are enough queries waiting.
Whenever `item` consists of three identical words, e.g. ["Kappa", "Kappa", "Kappa"], then
we perform no learning. If we did, this could cause infinite recursion in generation.
Args:
item (list[str]): A 3-gram, e.g. ['How', 'are', 'you']. This is learned by placing this
in the MarkovGrammarHA table, where it can be seen as:
*Given ["How", "are"], then "you" is a potential output*
The frequency of this word as an output is then incremented,
allowing for weighted picking of outputs.
"""
# Filter out recursive case.
if self.check_equal(item):
return
if (
"" in item
): # prevent adding invalid rules. Ideally this wouldn't trigger, but it seems to happen rarely.
logger.warning(
f"Failed to add item to rules. Item contains empty string: {item!r}",
)
return
self.add_execute_queue(
f"""
INSERT OR REPLACE INTO MarkovGrammar{self.get_suffix(item[0][0])}{self.get_suffix(item[1][0])} (word1, word2, word3, count)
VALUES (?, ?, ?, coalesce(
(
SELECT count + 1 FROM MarkovGrammar{self.get_suffix(item[0][0])}{self.get_suffix(item[1][0])}
WHERE word1 = ? COLLATE BINARY AND word2 = ? COLLATE BINARY AND word3 = ? COLLATE BINARY
),
1)
)""", # noqa: S608
values=item + item,
)
def add_start_queue(self, item: list[str]) -> None:
"""Adds a rule to the queue, ready to be entered into the knowledge base, given a 2-gram `item`.
The rules on the queue are added with `self.add_execute_queue`,
which automatically executes the queries in the queue when there are enough queries waiting.
Args:
item (list[str]): A 2-gram, e.g. ['How', 'are']. This is learned by placing this
in the MarkovStartH table, where it can be randomly (with frequency as weight)
picked as a start of a sentence.
"""
self.add_execute_queue(
f"""
INSERT OR REPLACE INTO MarkovStart{self.get_suffix(item[0][0])} (word1, word2, count)
VALUES (?, ?, coalesce(
(
SELECT count + 1 FROM MarkovStart{self.get_suffix(item[0][0])}
WHERE word1 = ? COLLATE BINARY AND word2 = ? COLLATE BINARY
),
1)
)""", # noqa: S608
values=item + item,
)

View file

@ -1,520 +0,0 @@
import string
import time
from enum import StrEnum
from loguru import logger
from nltk.tokenize import sent_tokenize
from TwitchWebsocket import Message, TwitchWebsocket
from src.markovbot_gui.libs.db import Database
from src.markovbot_gui.libs.settings import Settings
from src.markovbot_gui.libs.timer import LoopingTimer
from src.markovbot_gui.libs.tokenizer import detokenize, tokenize
class Commands(StrEnum):
SET_COOLDOWN = "!setcd"
GENERATE = "!g"
BLACKLIST = "!blacklist"
GENERATE_HELP = "!ghelp"
QUOTE = "!q"
QUOTE_ADD = "!qadd"
class MarkovChain:
end_tag = "<END>"
def __init__(self, settings: Settings | None = None):
self.s = settings or Settings.read()
self.prev_message_t = 0.0
self._enabled = True
self.db = Database(self.s.channel_name)
if self.s.help_message_timer > 0:
if self.s.help_message_timer < 300: # noqa: PLR2004
raise ValueError(
'Value for "HelpMessageTimer" in must be at least 300 seconds, ' # noqa: EM101
"or a negative number for no help messages.",
)
t = LoopingTimer(self.s.help_message_timer, self._command_help)
t.start()
# Set up daemon Timer to send automatic generation messages
if self.s.automatic_generation_timer > 0:
if self.s.automatic_generation_timer < 30: # noqa: PLR2004
raise ValueError(
'Value for "Automatic_generation_message" must be at least 30 seconds, or a negative number for no ' # noqa: EM101
"automatic generations.",
)
logger.info(
f"Automatic generation enabled, will send messages every {self.s.automatic_generation_timer} seconds"
)
t = LoopingTimer(
self.s.automatic_generation_timer,
self._command_automatic_generation,
)
t.start()
self.ws = TwitchWebsocket(
host=self.s.host,
port=self.s.port,
chan=self.s.channel_name,
nick=self.s.nickname,
auth=self.s.authentication,
callback=self.message_handler,
capability=["commands", "tags"],
live=True,
)
def run_bot(self):
self.ws.start_bot()
def stop_bot(self):
self.ws.leave_channel(self.s.channel_name)
self.ws.stop()
def _command_help(self) -> None:
"""Send a Help message to the connected chat, as long as the bot wasn't disabled."""
if self._enabled:
logger.info("Help message sent.")
try:
self.ws.send_message(
"Learn how this bot generates sentences here: https://github.com/CubieDev/TwitchMarkovChain#how-it-works",
)
except OSError as error:
logger.warning(
f"[OSError: {error}] upon sending help message. Ignoring.",
)
def _command_set_cooldown(self, username: str, split_message: list[str]):
if len(split_message) == 2: # noqa: PLR2004
try:
cooldown = int(split_message[1])
except ValueError:
self.ws.send_whisper(
username,
"The parameter must be an integer amount, eg: !setcd 30",
)
return
self.s.cooldown = cooldown
self.s.write()
self.ws.send_whisper(
username,
f"The !generate cooldown has been set to {cooldown} seconds.",
)
def _command_blacklist(self, username: str, split_message: list[str]):
if len(split_message) == 2: # noqa: PLR2004
try:
blacklisted_username = split_message[1]
except ValueError:
self.ws.send_whisper(
username,
"The parameter must be a username, eg: !blacklist ibai",
)
return
self.s.denied_users.append(blacklisted_username)
self.s.write()
def _command_generate(self, username: str, message: str):
cur_time = time.time()
if self.prev_message_t + self.s.cooldown >= cur_time:
if not self.db.check_whisper_ignore(username):
self.send_whisper(
username,
f"Cooldown hit: {self.prev_message_t + self.s.cooldown - cur_time:0.2f} out of {self.s.cooldown:.0f}s remaining. !nopm to stop these cooldown pm's.",
)
logger.info(
f"Cooldown hit with {self.prev_message_t + self.s.cooldown - cur_time:0.2f}s remaining.",
)
params = tokenize(message)[2:] if self.s.allow_generate_params else None
# Generate an actual sentence
sentence, success = self.generate(params)
if success:
# Reset cooldown if a message was actually generated
self.prev_message_t = time.time()
logger.info(sentence)
self.ws.send_message(sentence)
self.store_sentence(message)
def _command_automatic_generation(self) -> None:
"""Send an automatic generation message to the connected chat.
As long as the bot wasn't disabled, just like if someone typed "!g" in chat.
"""
if self._enabled:
logger.debug("Automatically generating message")
sentence, success = self.generate()
if success:
logger.info(
f"Created '{sentence}'. Cooling down for {self.s.automatic_generation_timer} seconds before regenerating",
)
try:
self.ws.send_message(sentence)
except OSError as error:
logger.warning(
f"[OSError: {error}] upon sending automatic generation message. Ignoring.",
)
else:
logger.info(
"Attempted to output automatic generation message, but there is not enough learned information yet.",
)
def _command_quote(self):
"""Retrieve a random quote from the `quotes` table and format it as
> «<quote>» - <author>
"""
data = self.db.execute(
"SELECT quote, author FROM quotes ORDER BY RANDOM() LIMIT 1;", fetch=True
)
if data:
data = data[0]
quote, author = data[0], data[1]
self.ws.send_message(f"«{quote}» - {author}")
def _command_add_quote(self, message: str):
"""Add a quote to the quotes table. The message should follow the format:
!qadd quote author
The last word will be parsed as the author and anything in between !qadd and the author will be considered
as the quote itself
"""
# Split the message into quote and author
parts = message.split()
author = parts[-1]
quote = " ".join(parts[1:-1])
data = self.db.execute(
"SELECT 1 FROM quotes WHERE quote = ?", (quote,), fetch=True
)
if data:
self.ws.send_message(f"Quote «{quote}» was already added.")
return
self.db.execute(
"INSERT INTO quotes (quote, author) VALUES (?, ?)",
(quote, author), # type: ignore[arg-type]
)
self.ws.send_message(f"Quote «{quote}» by {author} added.")
def store_sentence(self, message: str):
logger.info(f"Processing {message} in order to store it")
stripped_message = message.strip()
try:
sentences = sent_tokenize(stripped_message)
except LookupError:
logger.debug("Downloading required punkt resource...")
import nltk
nltk.download("punkt")
logger.debug("Downloaded required punkt resource.")
sentences = sent_tokenize(stripped_message)
for sentence in sentences:
words = tokenize(sentence)
# Double spaces will lead to invalid rules. We remove empty words here
if "" in words:
words = [word for word in words if word]
# If the sentence is too short, ignore it and move on to the next.
if len(words) <= self.s.key_length:
continue
# Add a new starting point for a sentence to the <START>
words = [words[x] for x in range(self.s.key_length)]
logger.debug(f"Adding {words} to start queue")
self.db.add_start_queue(words)
# Create Key variable which will be used as a key in the Dictionary for the grammar
key: list[str] = []
for word in words:
# Set up key for first use
if len(key) < self.s.key_length:
key.append(word)
continue
logger.debug(f"Adding {key}[{word}] to rule queue")
self.db.add_rule_queue([*key, word])
# Remove the first word, and add the current word,
# so that the key is correct for the next word.
key.pop(0)
key.append(word)
logger.debug(f"Adding {key} to rule queue")
# Add <END> at the end of the sentence
self.db.add_rule_queue([*key, self.end_tag])
def message_handler(self, message: Message): # noqa: C901, PLR0911, PLR0912
try:
if not message.user or message.user in self.s.denied_users:
logger.debug(f"User {message.user} can't send messages")
return
msgs = message.message.split()
if not msgs:
logger.debug("Message is empty")
return
if "bits" in message.tags:
return
if "emotes" in message.tags:
# Replace modified emotes with normal versions,
# as the bot will never have the modified emotes unlocked at the time.
for modifier in self.extract_modifiers(message.tags["emotes"]):
message.message = message.message.replace(modifier, "")
logger.debug(f"Received {msgs[0]} command from {message.user}")
match msgs[0]:
case Commands.GENERATE_HELP:
logger.debug("Executing _command_help()")
self._command_help()
case Commands.SET_COOLDOWN:
if self.is_mod(message.user, message.channel):
logger.debug(
f"User {message.user} is mod, executing _command_set_cooldown()",
)
self._command_set_cooldown(
split_message=msgs,
username=message.user,
)
case Commands.BLACKLIST:
if self.is_mod(message.user, message.channel):
logger.debug(
f"User {message.user} is a mod, executing _command_blacklist()",
)
self._command_blacklist(
split_message=msgs,
username=message.user,
)
case Commands.GENERATE:
if not self._enabled:
logger.info("Bot not enabled, skipping")
return
if message.user not in self.s.denied_users:
logger.info(
f"User {message.user} allowed to generate, executing _command_generate()",
)
self._command_generate(
message=message.message,
username=message.user,
)
case Commands.QUOTE:
if not self._enabled:
logger.info("Bot not enabled, skipping")
return
if message.user not in self.s.denied_users:
logger.info(
f"User {message.user} allowed to generate, executing _command_quote()",
)
self._command_quote()
case Commands.QUOTE_ADD:
if self.is_mod(message.user, message.channel):
logger.info(
f"User {message.user} allowed to create quote, executing _command_quote()",
)
self._command_add_quote(message.message)
return
self.ws.send_message(
f"@{message.user} you're not in the modlist, you can't add quotes"
)
case _:
logger.debug(
f"Not a command: {msgs[0]}. Storing into db as a plain message",
)
if message.type == "366":
logger.info(f"Successfully joined channel: #{message.channel}")
return
self.store_sentence(message.message)
except Exception: # noqa: BLE001
logger.exception(f"Could not process message {message}")
def generate(self, params: list[str] | None = None) -> tuple[str, bool]: # noqa: C901, PLR0912
"""Given an input sentence, generate the remainder of the sentence using the learned data.
Args:
params (list[str]): A list of words to use as an input to use as the start of generating.
Returns:
tuple[str, bool]: A tuple of a sentence as the first value, and a boolean indicating
whether the generation succeeded as the second value.
"""
params = params or []
# List of sentences that will be generated. In some cases, multiple sentences will be generated,
# e.g. when the first sentence has less words than self.min_sentence_length.
sentences: list[list | list[str]] = [[]]
# Check for commands or recursion, eg: !generate !generate
if len(params) > 0 and self.is_command(params[0]):
return "You can't make me do commands, you madman!", False
# Get the starting key and starting sentence.
# If there is more than 1 param, get the last 2 as the key.
# Note that self.s.key_length is fixed to 2 in this implementation
if len(params) > 1:
key = params[-self.s.key_length :]
# Copy the entire params for the sentence
sentences[0] = params.copy()
elif len(params) == 1:
# First we try to find if this word was once used as the first word in a sentence:
key = self.db.get_next_single_start(params[0]) # type: ignore[assignment]
if key is None:
# If this failed, we try to find the next word in the grammar as a whole
key = self.db.get_next_single_initial(0, params[0])
if key is None:
# Return a message that this word hasn't been learned yet
return f'I haven\'t extracted "{params[0]}" from chat yet.', False
# Copy this for the sentence
sentences[0] = key.copy()
else: # if there are no params
# Get starting key
key = self.db.get_start()
if key:
# Copy this for the sentence
sentences[0] = key.copy()
else:
# If nothing's ever been said
return "There is not enough learned information yet.", False
# Counter to prevent infinite loops (i.e. constantly generating <END> while below the
# minimum number of words to generate)
i = 0
while (
self.get_sentence_length(sentences) < self.s.max_sentence_length
and i < self.s.max_sentence_length * 2
):
# Use key to get next word
if i == 0:
# Prevent fetching <END> on the first word
word = self.db.get_next_initial(i, key)
else:
word = self.db.get_next(i, key)
i += 1
if word == "<END>" or word is None:
# Break, unless we are before the min_sentence_length
if i < self.s.min_sentence_length:
key = self.db.get_start()
# Ensure that the key can be generated. Otherwise, we still stop.
if key:
# Start a new sentence
sentences.append([])
for entry in key:
sentences[-1].append(entry)
continue
break
# Otherwise add the word
sentences[-1].append(word)
# Shift the key so on the next iteration it gets the next item
key.pop(0)
key.append(word)
# If there were params, but the sentence resulting is identical to the params
# Then the params did not result in an actual sentence
# If so, restart without params
if len(params) > 0 and params == sentences[0]:
return "I haven't learned what to do with \"" + detokenize(
params[-self.s.key_length :],
) + '" yet.', False
return self.s.sentence_separator.join(
detokenize(sentence) for sentence in sentences
), True
@staticmethod
def get_sentence_length(sentences: list[list[str]]) -> int:
"""Given a list of tokens representing a sentence, return the number of words in there.
Args:
sentences (List[List[str]]): List of lists of tokens that make up a sentence,
where a token is a word or punctuation. For example:
[['Hello', ',', 'you', "'re", 'Tom', '!'], ['Yes', ',', 'I', 'am', '.']]
This would return 6.
Returns:
int: The number of words in the sentence.
"""
count = 0
for sentence in sentences:
for token in sentence:
if token not in string.punctuation and token[0] != "'":
count += 1
return count
@staticmethod
def extract_modifiers(emotes: str) -> list[str]:
"""Extract emote modifiers from emotes such as the horizontal flip.
Args:
emotes (str): String containing all emotes used in the message.
Returns:
list[str]: List of strings that show modifiers, such as "_HZ" for horizontal flip.
"""
output = []
try:
while emotes:
u_index = emotes.index("_")
c_index = emotes.index(":", u_index)
output.append(emotes[u_index:c_index])
emotes = emotes[c_index:]
except ValueError:
pass
return output
def send_whisper(self, user: str, message: str) -> None:
"""Optionally send a whisper, only if "WhisperCooldown" is True.
Args:
user (str): The user to potentially whisper.
message (str): The message to potentially whisper
"""
if self.s.whisper_cooldown:
self.ws.send_whisper(user, message)
@staticmethod
def is_command(message: str) -> bool:
"""True if the message is any command, except /me.
Is used to avoid learning and generating commands.
Args:
message (str): The message to check.
Returns:
bool: True if the message is any potential command (starts with a '!', '/' or '.')
except /me.
"""
return message in list(Commands)
def is_mod(self, username: str, channel: str) -> bool:
"""True if the user is a moderator.
Args:
username (str): The name of the user to check
channel (str): The name of the channel
Returns:
bool: True if the user is a moderator.
"""
return username in self.s.mods or username == channel
if __name__ == "__main__":
MarkovChain()

View file

@ -1,118 +0,0 @@
import json
from pathlib import Path
from typing import Literal
import platformdirs
from loguru import logger
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
host: str = Field("irc.chat.twitch.tv", alias="Host", serialization_alias="Host")
port: int = Field(6667, alias="Port", serialization_alias="Port")
channel: str = Field(..., alias="Channel", serialization_alias="Channel")
nickname: str = Field(..., alias="Nickname", serialization_alias="Nickname")
authentication: str = Field(
...,
alias="Authentication",
serialization_alias="Authentication",
)
denied_users: list[str] = Field(
[
"StreamElements",
"Nightbot",
"Moobot",
"Marbiebot",
],
alias="DeniedUsers",
serialization_alias="DeniedUsers",
)
banned_words: list[str] = Field(
default_factory=list,
alias="BannedWords",
serialization_alias="BannedWords",
)
mods: list[str] = Field(
default_factory=list,
alias="Mods",
serialization_alias="Mods",
)
cooldown: int = Field(210, alias="Cooldown", serialization_alias="Cooldown")
key_length: int = Field(2, alias="KeyLength", serialization_alias="KeyLength")
max_sentence_length: int = Field(
25,
alias="MaxSentenceWordAmount",
serialization_alias="MaxSentenceWordAmount",
)
min_sentence_length: int = Field(
-1,
alias="MinSentenceWordAmount",
serialization_alias="MinSentenceWordAmount",
)
help_message_timer: int = Field(
60 * 60 * 5,
alias="HelpMessageTimer",
serialization_alias="HelpMessageTimer",
)
automatic_generation_timer: int = Field(
-1,
alias="AutomaticGenerationTimer",
serialization_alias="AutomaticGenerationTimer",
)
whisper_cooldown: bool = Field(
True,
alias="WhisperCooldown",
serialization_alias="WhisperCooldown",
)
enable_generate_command: bool = Field(
True,
alias="EnableGenerateCommand",
serialization_alias="EnableGenerateCommand",
)
sentence_separator: str = Field(
" - ",
alias="SentenceSeparator",
serialization_alias="SentenceSeparator",
)
allow_generate_params: bool = Field(
True,
alias="AllowGenerateParams",
serialization_alias="AllowGenerateParams",
)
log_level: Literal[
"CRITICAL",
"ERROR",
"WARNING",
"INFO",
"DEBUG",
"TRACE",
] = Field("DEBUG", alias="LogLevel")
model_config = SettingsConfigDict(extra="ignore")
@property
def channel_name(self):
return self.channel.replace("#", "").lower()
@classmethod
def read(cls, filepath: Path | None = None) -> "Settings":
if not filepath:
filepath = (
platformdirs.user_config_path("markovbot_gui", ensure_exists=True)
/ "settings.json"
)
with filepath.open("r") as f:
data = json.load(f)
return Settings(**data)
def write(self, filepath: Path | None = None):
if not filepath:
filepath = (
platformdirs.user_config_path("markovbot_gui", ensure_exists=True)
/ "settings.json"
)
with filepath.open("w") as f:
logger.info(f"Writing current settings to {filepath}")
json.dump(self.model_dump(by_alias=True), f, indent=4)

View file

@ -1,32 +0,0 @@
import logging
import threading
from collections.abc import Callable
logger = logging.getLogger(__name__)
class LoopingTimer(threading.Thread):
"""
Thread that will continuously run `target(*args, **kwargs)`
every `interval` seconds, until program termination.
"""
def __init__(
self,
interval: int,
target: Callable[[], None],
*args,
**kwargs,
) -> None:
threading.Thread.__init__(self)
self.interval = interval
self.target = target
self.args = args
self.kwargs = kwargs
self.stopped = threading.Event()
self.daemon = True
def run(self):
while not self.stopped.wait(self.interval):
self.target(*self.args, **self.kwargs)

View file

@ -1,132 +0,0 @@
import re
from typing import ClassVar
from nltk.tokenize.destructive import NLTKWordTokenizer
from nltk.tokenize.treebank import TreebankWordDetokenizer
class MarkovChainTokenizer(NLTKWordTokenizer):
# Starting quotes.
STARTING_QUOTES: ClassVar[list] = [
(re.compile("([«“‘„]|[`]+)", re.UNICODE), r" \1 "), # noqa: RUF001
(re.compile(r"(``)"), r" \1 "),
(re.compile(r"([ \(\[{<])(\"|\'{2})"), r"\1 '' "),
(re.compile(r"(?i)(\')(?!re|ve|ll|m|t|s|d)(\w)\b", re.UNICODE), r"\1 \2"),
]
PUNCTUATION: ClassVar[list] = [
(re.compile(r""), r"'"), # noqa: RUF001
(
re.compile(r'([^\.])(\.)([\]\)}>"\'' "»”’ " r"]*)\s*$", re.UNICODE), # noqa: RUF001
r"\1 \2 \3 ",
),
(re.compile(r"([:,])([^\d])"), r" \1 \2"),
(re.compile(r"([:,])$"), r" \1 "),
# See https://github.com/nltk/nltk/pull/2322
(re.compile(r"\.{2,}", re.UNICODE), r" \g<0> "),
# Custom for MarkovChain: Removed the "@"
(re.compile(r"[;#$%&]"), r" \g<0> "),
(
re.compile(r'([^\.])(\.)([\]\)}>"\']*)\s*$'),
r"\1 \2\3 ",
), # Handles the final period.
(re.compile(r"[?!]"), r" \g<0> "),
(re.compile(r"([^'])' "), r"\1 ' "),
# See https://github.com/nltk/nltk/pull/2322
(re.compile(r"[*]", re.UNICODE), r" \g<0> "),
]
EMOTICON_RE = re.compile(
r"""
(
[<>]?
[:;=8] # eyes
[\-o\*\']? # optional nose
[\)\]\(\[dDpP/\:\}\{@\|\\] # mouth
|
[\)\]\(\[dDpP/\:\}\{@\|\\] # mouth
[\-o\*\']? # optional nose
[:;=8] # eyes
[<>]?
|
<3 # heart
)""",
re.VERBOSE | re.IGNORECASE | re.UNICODE,
)
_tokenize = MarkovChainTokenizer().tokenize
_detokenize = TreebankWordDetokenizer().tokenize
def tokenize(sentence: str) -> list[str]:
"""Word tokenize, separating commas, dots, apostrophes, etc.
Uses nltk's `NLTKWordTokenizer`, but does not consider "@" to be punctuation.
Also doesn't convert "hello" to ``hello'', but to ''hello''.
Furthermore, doesn't split emoticons, i.e. "<3" or ":)"
Args:
sentence (str): Input sentence.
Returns:
list[str]: Tokenized output of the sentence.
"""
output = []
match = EMOTICON_RE.search(sentence)
while match:
output += _tokenize(sentence[: match.start()].strip())
output += [match.group()]
sentence = sentence[match.end() :].strip()
match = EMOTICON_RE.search(sentence)
output += _tokenize(sentence)
return output
def detokenize(tokenized: list[str]) -> str:
"""Detokenize a tokenized list of words and punctuation.
Converted in a less naïve way than `" ".join(tokenized)`
Preprocess tokenized by placing spaces before the 1st, 3rd, 5th, etc. quote,
and by placing spaces after the 2nd, 4th, 6th, etc. quote.
Then, ["He", "said", "''", "heya", "!", "''", "yesterday", "."] will be detokenized to
> He said ''heya!'' yesterday.
instead of
> He said''heya!''yesterday.
Args:
tokenized (List[str]): Input tokens, e.g. ["Hello", ",", "I", "'m", "Tom"]
Returns:
str: The correct string sentence, e.g. "Hello, I'm Tom"
"""
indices = [
index for index, token in enumerate(tokenized) if token in ("''", "'", '"')
]
# Replace '' with ", works better with more recent NLTK versions
tokenized_copy = [token if token != "''" else '"' for token in tokenized] # noqa: S105
# We get the reverse of the enumerate, as we modify the list we took the indices from
enumerated = list(enumerate(indices))
for i, index in enumerated[::-1]:
# Opening quote
if i % 2 == 0:
# If there is another word, merge with that word and prepend a space
if len(tokenized) > index + 1:
tokenized_copy[index : index + 2] = [
"".join(tokenized_copy[index : index + 2]),
]
# Closing quote
elif index > 0:
tokenized_copy[index - 1 : index + 1] = [
"".join(tokenized_copy[index - 1 : index + 1]),
]
return _detokenize(tokenized_copy).strip()

View file

@ -1,10 +0,0 @@
import logging
class LogHandler(logging.Handler):
def __init__(self, log_queue):
super().__init__()
self.log_queue = log_queue
def emit(self, record):
self.log_queue.put(self.format(record))

View file

@ -1,75 +0,0 @@
import platformdirs
from kivy.app import App
from kivy.metrics import dp
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.button import Button
from kivy.uix.popup import Popup
from kivy.uix.widget import Widget
from src.markovbot_gui.bot_runner import BotRunner
from src.markovbot_gui.config_window import ConfigWindow
class BotApp(App):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.config_path = (
platformdirs.user_config_path("markovbot_gui") / "settings.json"
)
self.data_path = platformdirs.user_data_path("markovbot_gui")
def run_bot(self, instance):
bot_runner = BotRunner(settings_path=self.config_path)
popup = Popup(
title=f"Bot runner, database available at {self.data_path}",
content=bot_runner,
size_hint=(None, None),
size=(dp(600), dp(600)),
auto_dismiss=False,
)
popup.open()
def run_config(self, instance):
config_window = ConfigWindow(config_path=self.config_path)
popup = Popup(
title=f"Bot configuration, available at {self.config_path}",
content=config_window,
size_hint=(None, None),
size=(dp(600), dp(400)),
auto_dismiss=False,
)
# Add close button
close_button = Button(
text="Close",
size_hint=(None, None),
size=(dp(100), dp(40)),
pos_hint={"center_x": 0.5},
)
close_button.bind(on_release=popup.dismiss)
config_window.add_widget(close_button)
popup.open()
def build(self):
widget = Widget()
layout = BoxLayout(size_hint=(1, None), height=50)
run_button = Button(text="Run bot")
run_button.bind(on_release=self.run_bot)
layout.add_widget(run_button)
config_button = Button(text="Open config")
config_button.bind(on_release=self.run_config)
layout.add_widget(config_button)
root = BoxLayout(orientation="vertical")
root.add_widget(widget)
root.add_widget(layout)
return root
if __name__ == "__main__":
BotApp().run()

1089
uv.lock generated

File diff suppressed because it is too large Load diff