<?php
// FILE: 1_generate_embeddings.php
// PURPOSE: To read content from database tables, generate vector embeddings using the OpenAI API,
// and store them back in the database.
// USAGE: Run this script from the command line: `php 1_generate_embeddings.php`
// It's designed to be run once for existing content, and then periodically as new content is added.

// --- SETUP & CONFIGURATION ---
ini_set('display_errors', 1);
ini_set('display_startup_errors', 1);
error_reporting(E_ALL);
set_time_limit(0); // Allow script to run for a long time

// Include the central database connection and configuration file.
require_once 'db_connect.php';

// --- CORE EMBEDDING FUNCTION ---

/**
 * Calls the OpenAI API to get a vector embedding for a given text string.
 *
 * @param string $text The text to embed.
 * @param string $apiKey The OpenAI API key.
 * @return array|null The embedding vector as an array, or null on failure.
 */
function getEmbedding(string $text, string $apiKey): ?array
{
    // Use a cost-effective and high-performance model
    $model = 'text-embedding-3-small';
    $apiUrl = 'https://api.openai.com/v1/embeddings';

    $data = [
        'input' => $text,
        'model' => $model,
    ];

    $ch = curl_init($apiUrl);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_POST, true);
    curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
    curl_setopt($ch, CURLOPT_HTTPHEADER, [
        'Content-Type: application/json',
        'Authorization: Bearer ' . $apiKey
    ]);

    $response = curl_exec($ch);
    $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);

    if ($httpcode !== 200) {
        error_log("OpenAI API Error: Failed to get embedding. HTTP Code: $httpcode. Response: $response");
        return null;
    }

    $responseData = json_decode($response, true);
    return $responseData['data'][0]['embedding'] ?? null;
}


// --- DATABASE PROCESSING FUNCTION ---

/**
 * Processes a database table to generate and store embeddings for its content.
 *
 * @param PDO $pdo The database connection object.
 * @param string $apiKey The OpenAI API key.
 * @param string $tableName The name of the table to process.
 * @param string $pkColumn The name of the primary key column (e.g., 'id').
 * @param array $textColumns An array of column names containing the text to be embedded.
 */
function processTable(PDO $pdo, string $apiKey, string $tableName, string $pkColumn, array $textColumns)
{
    echo "--- Processing table: `$tableName` ---\n";

    // Select rows that have not yet been embedded
    $sql = "SELECT `$pkColumn`, `" . implode('`, `', $textColumns) . "` FROM `$tableName` WHERE `vector_embedding` IS NULL";
    $stmt = $pdo->query($sql);
    $rows = $stmt->fetchAll();

    if (empty($rows)) {
        echo "No new content to embed in `$tableName`.\n\n";
        return;
    }

    echo "Found " . count($rows) . " new rows to process in `$tableName`.\n";
    $processedCount = 0;

    foreach ($rows as $row) {
        // Concatenate the content from all specified text columns
        $contentToEmbed = '';
        foreach ($textColumns as $column) {
            $contentToEmbed .= strip_tags($row[$column]) . "\n"; // strip_tags to clean up HTML
        }
        $contentToEmbed = trim($contentToEmbed);

        if (empty($contentToEmbed)) {
            continue;
        }

        // Get the embedding from the API
        $embedding = getEmbedding($contentToEmbed, $apiKey);

        if ($embedding) {
            // Store the JSON-encoded vector in the database
            $updateSql = "UPDATE `$tableName` SET `vector_embedding` = ? WHERE `$pkColumn` = ?";
            $updateStmt = $pdo->prepare($updateSql);
            $updateStmt->execute([json_encode($embedding), $row[$pkColumn]]);
            $processedCount++;
            echo "  - Embedded and saved vector for row ID: " . $row[$pkColumn] . "\n";
        } else {
            echo "  - FAILED to get embedding for row ID: " . $row[$pkColumn] . "\n";
        }
        
        // Avoid hitting API rate limits
        sleep(1); 
    }

    echo "Finished processing `$tableName`. Embedded $processedCount new items.\n\n";
}


// --- SCRIPT EXECUTION ---

// The global $pdo and $config variables are loaded from db_connect.php
if (!isset($pdo) || !isset($config['openai']['api_key'])) {
    die("Error: Database connection or OpenAI API key not found. Check your db_connect.php file.\n");
}

$openAiApiKey = $config['openai']['api_key'];

echo "Starting embedding generation process...\n\n";

// Process each table with its specific structure
processTable($pdo, $openAiApiKey, 'projects', 'id', ['name', 'description']);
processTable($pdo, $openAiApiKey, 'news_articles', 'id', ['title', 'content']);
processTable($pdo, $openAiApiKey, 'knowledge_base', 'id', ['title', 'content_text']);

echo "All tables have been processed.\n";

?>
