refactor: clean the search command a bit
This commit is contained in:
parent
eeb1573f99
commit
701d79583d
4 changed files with 93 additions and 141 deletions
|
|
@ -1,4 +1,5 @@
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from collections.abc import Callable
|
||||||
|
|
||||||
from halig.settings import Settings
|
from halig.settings import Settings
|
||||||
|
|
||||||
|
|
@ -12,3 +13,6 @@ class ICommand(ABC):
|
||||||
class BaseCommand(ICommand):
|
class BaseCommand(ICommand):
|
||||||
def __init__(self, settings: Settings, *args, **kwargs):
|
def __init__(self, settings: Settings, *args, **kwargs):
|
||||||
self.settings = settings
|
self.settings = settings
|
||||||
|
|
||||||
|
def traverse_notebooks(self, callback_on_item: Callable):
|
||||||
|
"""Traverse root_path"""
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,6 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from collections.abc import Generator
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import platformdirs
|
import platformdirs
|
||||||
|
|
@ -14,156 +12,105 @@ from halig.settings import Settings
|
||||||
|
|
||||||
|
|
||||||
class SearchCommand(BaseCommand):
|
class SearchCommand(BaseCommand):
|
||||||
"""Full text search against a SQLite located at $HOME/.cache/halig.db
|
def __init__(self, term: str, index: bool, settings: Settings, *args, **kwargs):
|
||||||
|
super().__init__(settings, *args, **kwargs)
|
||||||
The database schema is pretty simple and it uses SQLite's FT5 for
|
self.encryptor = Encryptor(settings)
|
||||||
the full text search capabilities:
|
self.term = term
|
||||||
|
self.index = index
|
||||||
CREATE VIRTUAL TABLE note USING fts5(last_timestamp, hash, filepath, body);
|
self.cache_path = platformdirs.user_cache_path(
|
||||||
"""
|
"halig",
|
||||||
|
ensure_exists=True,
|
||||||
def __init__(
|
)
|
||||||
self,
|
self.db_path = self.cache_path / "halig.db"
|
||||||
search_term: str,
|
self.db_conn = sqlite3.connect(self.db_path)
|
||||||
settings: Settings,
|
|
||||||
should_index: bool = False,
|
|
||||||
):
|
|
||||||
self.search_term = search_term
|
|
||||||
self.settings = settings
|
|
||||||
self.should_index = should_index
|
|
||||||
self.encryptor = Encryptor(self.settings)
|
|
||||||
|
|
||||||
def _create_schema(self):
|
def _create_schema(self):
|
||||||
"""Create or repair the database schema"""
|
with self.db_conn:
|
||||||
db_path = self._get_database_path()
|
self.db_conn.execute(
|
||||||
conn = sqlite3.connect(db_path)
|
"""CREATE VIRTUAL TABLE IF NOT EXISTS notes
|
||||||
cursor = conn.cursor()
|
USING fts5(name, last_timestamp, hash, filepath, body);""",
|
||||||
|
|
||||||
# Create or repair the schema
|
|
||||||
cursor.execute(
|
|
||||||
"""CREATE VIRTUAL TABLE IF NOT EXISTS notes
|
|
||||||
USING fts5(last_timestamp, hash, filepath, body);
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
|
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
def _check_index_status(self):
|
|
||||||
"""Check the db's notes indexing status using the hash and the timestamp"""
|
|
||||||
db_path = self._get_database_path()
|
|
||||||
conn = sqlite3.connect(db_path)
|
|
||||||
cursor = conn.cursor()
|
|
||||||
|
|
||||||
# Query the database to check if it's already indexed
|
|
||||||
cursor.execute("SELECT COUNT(*) FROM note;")
|
|
||||||
count = cursor.fetchone()[0]
|
|
||||||
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
return count > 0
|
|
||||||
|
|
||||||
def _do_index(self):
|
|
||||||
"""Index the notes, either partially or fully"""
|
|
||||||
db_path = self._get_database_path()
|
|
||||||
conn = sqlite3.connect(db_path)
|
|
||||||
cursor = conn.cursor()
|
|
||||||
|
|
||||||
# Delete existing records before re-indexing
|
|
||||||
cursor.execute("DELETE FROM note;")
|
|
||||||
|
|
||||||
# Traverse the notebook directory and index the notes
|
|
||||||
for path in self._get_notebook_files():
|
|
||||||
encrypted_data = self._read_encrypted_file(path)
|
|
||||||
decrypted_data = self.encryptor.decrypt(encrypted_data)
|
|
||||||
|
|
||||||
# Calculate the hash of the decrypted data
|
|
||||||
hash_value = self._calculate_hash(decrypted_data)
|
|
||||||
|
|
||||||
# Insert the indexed data into the database
|
|
||||||
cursor.execute(
|
|
||||||
"""INSERT INTO notes (last_timestamp, hash, filepath, body)
|
|
||||||
VALUES (?, ?, ?, ?);""",
|
|
||||||
(os.path.getmtime(path), hash_value, str(path), decrypted_data),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
conn.commit()
|
def _search_note_in_db_by_path(self, path: Path) -> tuple[str | None, str | None]:
|
||||||
conn.close()
|
with self.db_conn:
|
||||||
|
cursor = self.db_conn.execute(
|
||||||
|
"SELECT hash, last_timestamp FROM notes where filepath = ?",
|
||||||
|
(str(path),),
|
||||||
|
)
|
||||||
|
results = cursor.fetchall()
|
||||||
|
if not results:
|
||||||
|
return None, None
|
||||||
|
return results[0] # type: ignore[no-any-return]
|
||||||
|
|
||||||
def run(self):
|
def _index_note(
|
||||||
"""`halig search` entrypoint, which does a few checks before running
|
self,
|
||||||
the query.
|
updated_at: float,
|
||||||
|
body_hash: str,
|
||||||
|
note_path: Path,
|
||||||
|
body: str,
|
||||||
|
):
|
||||||
|
with self.db_conn:
|
||||||
|
self.db_conn.execute(
|
||||||
|
""""INSERT INTO notes (name, last_timestamp, hash, filepath, body)
|
||||||
|
VALUES (?, ?, ?, ?, ?);""",
|
||||||
|
(note_path.name, updated_at, body_hash, str(note_path), body),
|
||||||
|
)
|
||||||
|
|
||||||
1. Check if the notes are indexed
|
def _update_index_note(
|
||||||
2. If there are notes to be indexed or the database does not exist
|
self,
|
||||||
or it has an incorrect schema, the user is prompted to allow
|
updated_at: float,
|
||||||
the program to reindex
|
body_hash: str,
|
||||||
3. After we're sure the database is in a correct state, we perform the
|
note_path: Path,
|
||||||
query
|
body: str,
|
||||||
4. We print the results as if it were `grep -rin` output
|
):
|
||||||
"""
|
with self.db_conn:
|
||||||
self._create_schema()
|
self.db_conn.execute(
|
||||||
# Check if indexing is required or if the database is in an incorrect state
|
"""UPDATE notes SET
|
||||||
index_status = self._check_index_status()
|
last_timestamp = (?),
|
||||||
if self.should_index or not index_status:
|
hash = (?),
|
||||||
self._do_index()
|
body = (?)
|
||||||
|
WHERE
|
||||||
|
filepath = (?);
|
||||||
|
""",
|
||||||
|
(updated_at, body_hash, body, str(note_path)),
|
||||||
|
)
|
||||||
|
|
||||||
# Perform the search query
|
def _index_notebooks(self):
|
||||||
self._perform_search()
|
for note_path in self.settings.notebooks_root_path.glob("./**/*.age"):
|
||||||
|
updated_at = note_path.stat().st_mtime
|
||||||
|
with note_path.open("rb") as f:
|
||||||
|
body = self.encryptor.decrypt(f.read())
|
||||||
|
body_hash = hashlib.sha512(body).hexdigest()
|
||||||
|
original_hash, last_timestamp = self._search_note_in_db_by_path(note_path)
|
||||||
|
if not original_hash:
|
||||||
|
self._index_note(updated_at, body_hash, note_path, body.decode())
|
||||||
|
continue
|
||||||
|
|
||||||
def _perform_search(self):
|
if hash != original_hash:
|
||||||
"""Perform the search query and print the results
|
self._update_index_note(updated_at, body_hash, note_path, body.decode())
|
||||||
with highlighted search term
|
|
||||||
"""
|
|
||||||
db_path = self._get_database_path()
|
|
||||||
conn = sqlite3.connect(db_path)
|
|
||||||
cursor = conn.cursor()
|
|
||||||
|
|
||||||
# Execute the search query
|
def _search(self):
|
||||||
cursor.execute(
|
with self.db_conn:
|
||||||
"SELECT filepath, body FROM note WHERE body MATCH ?;",
|
cursor = self.db_conn.execute(
|
||||||
(self.search_term,),
|
"SELECT filepath, body FROM notes WHERE body MATCH ? ORDER BY rank;",
|
||||||
)
|
(f"{self.term}*",),
|
||||||
|
)
|
||||||
# Fetch and print the results with highlighted search term
|
results = cursor.fetchall()
|
||||||
console = Console()
|
console = Console()
|
||||||
search_regex = re.compile(re.escape(self.search_term), re.IGNORECASE)
|
search_regex = re.compile(re.escape(self.term), re.IGNORECASE)
|
||||||
|
|
||||||
results = cursor.fetchall()
|
|
||||||
for result in results:
|
for result in results:
|
||||||
filepath, body = result
|
filepath, body = result
|
||||||
|
lines = body.split("\n")
|
||||||
|
|
||||||
# Split the body into lines
|
|
||||||
lines = body.decode().split("\n")
|
|
||||||
|
|
||||||
# Iterate over lines and find the line number where the search term is found
|
|
||||||
for lineno, line in enumerate(lines, start=1):
|
for lineno, line in enumerate(lines, start=1):
|
||||||
match = search_regex.search(line)
|
match = search_regex.search(line)
|
||||||
if match:
|
if match:
|
||||||
content_line = search_regex.sub("[bold red]\\g<0>[/bold red]", line)
|
content_line = search_regex.sub("[bold red]\\g<0>[/bold red]", line)
|
||||||
console.print(f"{filepath}:{lineno}: {content_line}")
|
console.print(f"{filepath}:{lineno}: {content_line}")
|
||||||
|
|
||||||
conn.close()
|
def run(self):
|
||||||
|
self._create_schema()
|
||||||
def _get_database_path(self) -> Path:
|
if self.index:
|
||||||
"""Get the path to the SQLite database"""
|
self._index_notebooks()
|
||||||
cache_dir = platformdirs.user_cache_path("halig", ensure_exists=True)
|
self._search()
|
||||||
db_path = cache_dir / "halig.db"
|
self.db_conn.close()
|
||||||
db_path.touch()
|
|
||||||
return db_path
|
|
||||||
|
|
||||||
def _get_notebook_files(self) -> Generator[Path, None, None]:
|
|
||||||
"""Get the list of notebook files to index"""
|
|
||||||
return self.settings.notebooks_root_path.glob("**/*.age")
|
|
||||||
|
|
||||||
def _read_encrypted_file(self, file_path: Path) -> bytes:
|
|
||||||
"""Read the encrypted contents of a file"""
|
|
||||||
with file_path.open("rb") as file:
|
|
||||||
return file.read()
|
|
||||||
|
|
||||||
def _calculate_hash(self, data: bytes) -> str:
|
|
||||||
"""Calculate the hash of the data"""
|
|
||||||
# Use an appropriate hash algorithm, e.g., hashlib.sha256()
|
|
||||||
# Adjust the hashing algorithm based on your requirements
|
|
||||||
hash_object = hashlib.sha256(data)
|
|
||||||
return hash_object.hexdigest()
|
|
||||||
|
|
|
||||||
|
|
@ -117,12 +117,13 @@ def import_unencrypted(
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
def search(
|
def search(
|
||||||
search_term: str,
|
term: str,
|
||||||
|
index: bool = False,
|
||||||
):
|
):
|
||||||
settings = load_from_file()
|
settings = load_from_file()
|
||||||
command = SearchCommand(
|
command = SearchCommand(
|
||||||
search_term=search_term,
|
term=term,
|
||||||
should_index=True,
|
index=index,
|
||||||
settings=settings,
|
settings=settings,
|
||||||
)
|
)
|
||||||
command.run()
|
command.run()
|
||||||
|
|
|
||||||
|
|
@ -76,7 +76,7 @@ def settings_file_path(halig_path: Path, notebooks_path: Path) -> Path:
|
||||||
yaml_file.touch()
|
yaml_file.touch()
|
||||||
s = Settings(notebooks_root_path=notebooks_path)
|
s = Settings(notebooks_root_path=notebooks_path)
|
||||||
# `.dict()` doesn't serialize some fields that yaml doesn't understand
|
# `.dict()` doesn't serialize some fields that yaml doesn't understand
|
||||||
serialized = json.loads(s.json())
|
serialized = json.loads(s.model_dump_json())
|
||||||
with yaml_file.open("w") as f:
|
with yaml_file.open("w") as f:
|
||||||
yaml.safe_dump(serialized, f)
|
yaml.safe_dump(serialized, f)
|
||||||
return yaml_file
|
return yaml_file
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue