first commit

This commit is contained in:
2026-01-07 17:48:40 +01:00
commit e93a510c61
58 changed files with 11547 additions and 0 deletions

View File

@@ -0,0 +1,494 @@
<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\App;
use Illuminate\Support\Facades\File;
use App\Services\HfsqlDataConverter;
use Symfony\Component\Console\Command\Command as CommandAlias;
class ImportHfsqlDataCommand extends Command
{
const int RECORDS_THRESHOLD_FOR_COPY = 10000;
const int MAX_RECORDS_FOR_LOCAL_TEST = 1000000;
const int MAX_RECORDS_PER_FILE = 2000000;
const int CHUNK_SIZE = 200000; // Increased to reduce query overhead
// Command Properties
protected $signature = 'db:import-hfsql
{table : The name of the table to migrate}
{--no-convert : Skip data conversion for faster import}';
protected $description = 'Imports HFSQL data using COPY for large tables and UPSERT for small ones.';
protected string $baseTempDir = 'C:\Temp\pg_imports';
protected HfsqlDataConverter $converter;
// METRIC ACCUMULATION PROPERTIES
protected float $totalFetchTime = 0.0;
protected float $totalConvertTime = 0.0;
protected float $totalCsvWriteTime = 0.0;
protected float $totalCopyTime = 0.0;
public function handle(): int
{
ini_set('memory_limit', '2048M'); // No memory limit
set_time_limit(0); // No time limit
$tableName = $this->argument('table');
$this->converter = new HfsqlDataConverter();
$startTime = microtime(true);
$this->ensureTempDirectory();
$this->optimizePostgresForBulkImport();
try {
$totalRecords = $this->calculateRecordsToProcess($tableName);
if ($totalRecords === 0) {
$this->info("No records found. Finishing.");
return CommandAlias::SUCCESS;
}
$result = $this->importData($tableName, $totalRecords);
$this->restorePostgresSettings();
$this->displayDetailedDuration($startTime, $totalRecords);
return $result;
} catch (\Exception $e) {
$this->error("Critical error importing table {$tableName}: " . $e->getMessage());
$this->restorePostgresSettings();
$this->enablePostgresConstraints();
return CommandAlias::FAILURE;
}
}
protected function importData(string $tableName, int $totalRecords): int
{
if ($totalRecords >= self::RECORDS_THRESHOLD_FOR_COPY) {
$this->warn("Mode: COPY FROM MULTIPLE CSV FILES (Max " . self::MAX_RECORDS_PER_FILE . " per file)");
return $this->runCopyFromCsv($tableName, $totalRecords);
}
$this->warn("Mode: CHUNKED UPSERT");
return $this->runChunkedUpsert($tableName, $totalRecords);
}
protected function runCopyFromCsv(string $tableName, int $totalRecords): int
{
$this->disablePostgresConstraints();
$context = $this->initializeCopyContext($tableName);
$progressBar = $this->output->createProgressBar($totalRecords);
$progressBar->start();
$pk = strtoupper($context['pkColumn']);
$lastPk = null;
$recordsFetched = 0;
$fetchStart = microtime(true);
while ($recordsFetched < $totalRecords) {
$remaining = $totalRecords - $recordsFetched;
$chunkSize = min(self::CHUNK_SIZE, $remaining);
$query = DB::connection('hfsql')
->table($tableName)
->select($context['columns'])
->orderBy($pk)
->limit($chunkSize);
if ($lastPk !== null) {
$query->where($pk, '>', $lastPk);
}
$records = $query->get();
if ($records->isEmpty()) {
break;
}
$chunkFetchTime = microtime(true) - $fetchStart;
$this->totalFetchTime += $chunkFetchTime;
$this->processChunkForCopy($records, $context, $tableName);
$progressBar->advance(count($records));
$recordsFetched += count($records);
$lastPk = $records->last()->{$context['pkColumn']};
if (gc_enabled()) {
gc_collect_cycles();
}
$fetchStart = microtime(true);
}
$progressBar->finish();
$this->line('');
$this->copyAndCleanupFile($context, $tableName);
$this->enablePostgresConstraints();
$this->resetPostgresSequences($tableName);
$this->info("Import by COPY completed ({$context['totalImported']} records).");
return Command::SUCCESS;
}
protected function processChunkForCopy(iterable $records, array &$context, string $tableName): void
{
$skipConversion = $this->option('no-convert');
$batch = [];
foreach ($records as $record) {
// Batch conversion to reduce overhead
$batch[] = $record;
// Process in micro-batches
if (count($batch) >= 1000) {
$this->writeBatchToCsv($batch, $context, $tableName, $skipConversion);
$batch = [];
}
}
// Write remaining records
if (!empty($batch)) {
$this->writeBatchToCsv($batch, $context, $tableName, $skipConversion);
}
}
protected function writeBatchToCsv(array $batch, array &$context, string $tableName, bool $skipConversion): void
{
foreach ($batch as $record) {
// Check if we need a new file
if ($context['fileHandle'] === null || $context['recordsInFile'] >= self::MAX_RECORDS_PER_FILE) {
$this->copyAndCleanupFile($context, $tableName);
$this->openNewCsvFile($context, $tableName);
}
// START/END Measurement: Conversion
$startConvert = microtime(true);
// Optimized conversion
if ($skipConversion) {
// Fast mode: no conversion
$dataArray = array_values((array) $record);
} else {
// Normal mode: with conversion
$dataArray = $this->converter->convertToPostgres(
$tableName,
(array) $record,
$context['columns']
);
}
$this->totalConvertTime += (microtime(true) - $startConvert);
// START/END Measurement: CSV Writing
$startCsvWrite = microtime(true);
fputcsv($context['fileHandle'], $dataArray, "\t", '"', "\\");
$this->totalCsvWriteTime += (microtime(true) - $startCsvWrite);
$context['recordsInFile']++;
$context['totalImported']++;
}
}
protected function copyAndCleanupFile(array &$context, string $tableName): void
{
if (!$context['fileHandle'] || !$context['filePath']) {
return;
}
fclose($context['fileHandle']);
// START/END Measurement: COPY
$startCopy = microtime(true);
// Execute COPY FROM with optimized options - Preservar case con comillas dobles
$pgPath = str_replace('\\', '/', $context['filePath']);
$sql = "COPY \"{$tableName}\" ({$context['columnList']})
FROM '{$pgPath}'
WITH (FORMAT csv, DELIMITER E'\t', ENCODING 'UTF-8', QUOTE '\"', ESCAPE '\\')";
DB::statement($sql);
$copyDuration = microtime(true) - $startCopy;
$this->totalCopyTime += $copyDuration; // Accumulate here
// (Previous command line output about COPY duration removed as requested)
unlink($context['filePath']);
// Reset file context
$context['fileHandle'] = null;
$context['filePath'] = null;
$context['recordsInFile'] = 0;
}
protected function openNewCsvFile(array &$context, string $tableName): void
{
$csvFileName = sprintf(
'%s_part_%d_%d.csv',
$tableName,
$context['fileCounter'],
time()
);
$context['filePath'] = $this->baseTempDir . DIRECTORY_SEPARATOR . $csvFileName;
$context['fileHandle'] = fopen($context['filePath'], 'w');
$context['fileCounter']++;
}
protected function runChunkedUpsert(string $tableName, int $totalRecords): int
{
$this->disablePostgresConstraints();
$pkColumn = $this->getPkColumn($tableName);
$allColumns = $this->getTableColumns($tableName);
$updateColumns = array_diff($allColumns, [$pkColumn]);
$totalImported = 0;
DB::connection('hfsql')
->table($tableName)
->orderBy(strtoupper($pkColumn))
->limit($totalRecords)
->chunk(self::CHUNK_SIZE, function ($records) use (
$tableName,
&$totalImported,
$pkColumn,
$updateColumns,
$allColumns
) {
$recordsToInsert = $this->prepareRecordsForUpsert($records, $tableName, $allColumns);
DB::table($tableName)->upsert(
$recordsToInsert,
[$pkColumn],
$updateColumns
);
$totalImported += count($recordsToInsert);
});
$this->enablePostgresConstraints();
$this->resetPostgresSequences($tableName);
$this->info("Import by UPSERT completed ({$totalImported} records).");
return Command::SUCCESS;
}
protected function prepareRecordsForUpsert(iterable $records, string $tableName, array $columns): array
{
$result = [];
foreach ($records as $record) {
$dataArray = $this->converter->convertToPostgres($tableName, (array) $record, $columns);
$result[] = array_combine($columns, $dataArray);
}
return $result;
}
/**
* Obtiene las columnas de la tabla preservando el case original
*/
protected function getTableColumns(string $tableName): array
{
$query = "
SELECT column_name
FROM information_schema.columns
WHERE table_name = ?
ORDER BY ordinal_position
";
$columns = DB::select($query, [$tableName]);
return array_map(function($col) {
return $col->column_name;
}, $columns);
}
protected function initializeCopyContext(string $tableName): array
{
$pkColumn = $this->getPkColumn($tableName);
$columns = $this->getTableColumns($tableName);
$columnList = implode(', ', array_map(fn($c) => "\"{$c}\"", $columns));
return [
'pkColumn' => $pkColumn,
'columns' => $columns,
'columnList' => $columnList,
'fileHandle' => null,
'filePath' => null,
'fileCounter' => 1,
'recordsInFile' => 0,
'totalImported' => 0,
];
}
protected function calculateRecordsToProcess(string $tableName): int
{
$realTotal = DB::connection('hfsql')->table($tableName)->count();
if ($realTotal === 0) {
return 0;
}
if (App::environment('local')
&& self::MAX_RECORDS_FOR_LOCAL_TEST !== null
&& $realTotal > self::MAX_RECORDS_FOR_LOCAL_TEST
) {
$this->warn("Testing mode: Processing {$realTotal}" . self::MAX_RECORDS_FOR_LOCAL_TEST . " records.");
return self::MAX_RECORDS_FOR_LOCAL_TEST;
}
return $realTotal;
}
protected function ensureTempDirectory(): void
{
if (!File::isDirectory($this->baseTempDir)) {
File::makeDirectory($this->baseTempDir, 0777, true);
}
}
// DETAILED FUNCTION TO DISPLAY METRICS
protected function displayDetailedDuration(float $startTime, int $totalRecords): void
{
$totalDuration = microtime(true) - $startTime;
$recordsPerSecond = round($totalRecords / $totalDuration);
$this->info("\n--- IMPORT METRICS (Total: {$totalRecords} records) ---");
$fetchPercent = round(($this->totalFetchTime / $totalDuration) * 100, 2);
$convertPercent = round(($this->totalConvertTime / $totalDuration) * 100, 2);
$csvPercent = round(($this->totalCsvWriteTime / $totalDuration) * 100, 2);
$copyPercent = round(($this->totalCopyTime / $totalDuration) * 100, 2);
$otherTime = $totalDuration - ($this->totalFetchTime + $this->totalConvertTime + $this->totalCsvWriteTime + $this->totalCopyTime);
$otherPercent = round(($otherTime / $totalDuration) * 100, 2);
$metrics = [
'HFSQL Data Fetch Time' => [$this->totalFetchTime, $fetchPercent],
'Conversion Time (to Postg. Data)' => [$this->totalConvertTime, $convertPercent],
'CSV Write Time (fputcsv)' => [$this->totalCsvWriteTime, $csvPercent],
'COPY Execution Time (Postgres)' => [$this->totalCopyTime, $copyPercent],
'Other (Overhead, I/O, Init, etc.)' => [$otherTime, $otherPercent],
];
$this->table(
['Phase', 'Time (s)', 'Percentage (%)'],
array_map(fn($name, $data) => [
$name,
number_format($data[0], 4),
number_format($data[1], 2)
], array_keys($metrics), $metrics)
);
$this->info("\nProcess finished in **" . number_format($totalDuration, 3) . "s** ({$recordsPerSecond} records/second).");
}
// PostgreSQL optimizations for bulk import
protected function optimizePostgresForBulkImport(): void
{
try {
// Parameters that CAN be changed at runtime
DB::statement("SET maintenance_work_mem = '512MB'");
DB::statement("SET work_mem = '256MB'");
DB::statement("SET synchronous_commit = OFF");
DB::statement("SET commit_delay = 100000");
DB::statement("SET temp_buffers = '256MB'");
$this->info("✓ PostgreSQL optimized for bulk import.");
} catch (\Exception $e) {
$this->warn("Could not set all optimization parameters: " . $e->getMessage());
}
}
protected function restorePostgresSettings(): void
{
try {
DB::statement("SET synchronous_commit = ON");
DB::statement("SET maintenance_work_mem = DEFAULT");
DB::statement("SET work_mem = DEFAULT");
DB::statement("SET commit_delay = DEFAULT");
DB::statement("SET temp_buffers = DEFAULT");
} catch (\Exception $e) {
// Silent fail on restore
}
}
protected function getPkColumn(string $tableName): string
{
$serialColumns = $this->getSerialColumns($tableName);
return empty($serialColumns)
? 'id' . $tableName // Mantener case original, sin strtolower
: $serialColumns[0]['column'];
}
protected function disablePostgresConstraints(): void
{
DB::statement("SET session_replication_role = 'replica'");
}
protected function enablePostgresConstraints(): void
{
DB::statement("SET session_replication_role = 'origin'");
}
protected function resetPostgresSequences(string $tableName): void
{
$serialColumns = $this->getSerialColumns($tableName);
if (empty($serialColumns)) {
$this->warn("No serial columns found for {$tableName}. Sequence reset skipped.");
return;
}
foreach ($serialColumns as $columnInfo) {
$columnName = $columnInfo['column'];
$sequenceName = $columnInfo['sequence'];
// Usar comillas dobles para preservar case
DB::statement(
"SELECT setval(:seq, COALESCE((SELECT MAX(\"{$columnName}\") FROM \"{$tableName}\"), 1))",
['seq' => $sequenceName]
);
}
}
protected function getSerialColumns(string $tableName): array
{
$quotedTableName = "\"{$tableName}\"";
$sql = "
SELECT
a.attname AS column_name,
pg_get_serial_sequence(:quoted_table, a.attname) AS sequence_name
FROM pg_attribute a
JOIN pg_class c ON c.oid = a.attrelid
WHERE c.relname = :table_name
AND a.attnum > 0
AND NOT a.attisdropped
AND (
pg_get_serial_sequence(:quoted_table, a.attname) IS NOT NULL
OR a.attidentity = 'a'
)
";
$results = DB::select($sql, [
'quoted_table' => $quotedTableName,
'table_name' => $tableName // Sin strtolower
]);
return array_map(fn($r) => [
'column' => $r->column_name,
'sequence' => $r->sequence_name,
], array_filter($results, fn($r) => $r->sequence_name));
}
}

View File

@@ -0,0 +1,153 @@
<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\File;
class RetryCopyCommand extends Command
{
protected $signature = 'db:re-copy {table : The name of the table to retry the native COPY command for.}';
protected $description = 'Finds the latest CSV file and attempts to execute the native PostgreSQL COPY FROM (server-side) using DB::statement().';
// Base directory, MUST BE ACCESSIBLE by the PostgreSQL service user.
protected $baseTempDir = 'C:\Temp\pg_imports';
public function handle()
{
$tableName = $this->argument('table');
$this->info("===============================================================");
$this->info("🔄 ATTEMPTING NATIVE DB::statement() COPY for **{$tableName}**");
$this->info("===============================================================");
if (!File::isDirectory($this->baseTempDir)) {
$this->error("❌ ERROR: Base directory not found: {$this->baseTempDir}");
return Command::FAILURE;
}
// 1. Find the latest CSV file
$latestFile = $this->findLatestCsvFile($tableName);
if (!$latestFile) {
$this->error("❌ ERROR: No recent CSV file found for table '{$tableName}' in {$this->baseTempDir}.");
return Command::FAILURE;
}
$fullPath = $this->baseTempDir . DIRECTORY_SEPARATOR . $latestFile;
// Important: PostgreSQL requires forward slashes or escaped backslashes in the path string
$pgPath = str_replace('\\', '/', $fullPath);
$this->line("✅ Using file path: **{$pgPath}**");
// 2. Prepare database environment
$this->disablePostgresConstraints();
$this->warn('Constraints temporarily disabled.');
// Obtener las columnas preservando el case original
$columns = $this->getTableColumns($tableName);
$columnList = implode(', ', array_map(function($c) { return "\"$c\""; }, $columns));
try {
// 3. Execute COPY FROM using DB::statement()
// Usar comillas dobles para preservar el case del nombre de la tabla
$sql = "COPY \"{$tableName}\" ({$columnList}) FROM '$pgPath' WITH (DELIMITER E'\t', FORMAT CSV, ENCODING 'UTF-8', QUOTE '\"', ESCAPE '\\')";
$this->line("Executing SQL: COPY \"{$tableName}\" FROM '{$pgPath}'...");
DB::statement($sql);
// 4. Finalization
$this->enablePostgresConstraints();
$this->resetPostgresSequences($tableName);
// Re-check the file exists before deleting, just in case
if (file_exists($fullPath)) {
unlink($fullPath);
$this->warn("Temporary file deleted: {$latestFile}");
}
$this->info("🎉 SUCCESS! Data copied successfully using DB::statement().");
return Command::SUCCESS;
} catch (\Exception $e) {
$this->error("❌ COPY failed via DB::statement(). Error: " . $e->getMessage());
$this->error("HINT: If the error is 'Permission denied', the service user cannot read the file at {$pgPath}.");
$this->enablePostgresConstraints();
return Command::FAILURE;
}
}
/**
* Obtiene las columnas de la tabla preservando el case original
*/
protected function getTableColumns(string $tableName): array
{
$query = "
SELECT column_name
FROM information_schema.columns
WHERE table_name = ?
ORDER BY ordinal_position
";
$columns = DB::select($query, [$tableName]);
return array_map(function($col) {
return $col->column_name;
}, $columns);
}
/**
* Finds the most recent CSV file matching the table name pattern.
*/
protected function findLatestCsvFile(string $tableName): ?string
{
$files = File::files($this->baseTempDir);
$latestFile = null;
$latestTime = 0;
// Usar preg_quote para escapar caracteres especiales en el nombre de la tabla
$escapedTableName = preg_quote($tableName, '/');
$pattern = "/^{$escapedTableName}_import_temp_\d+\.csv$/";
foreach ($files as $file) {
$fileName = $file->getFilename();
if (preg_match($pattern, $fileName)) {
$fileTime = $file->getMTime();
if ($fileTime > $latestTime) {
$latestTime = $fileTime;
$latestFile = $fileName;
}
}
}
return $latestFile;
}
// Auxiliary Methods
protected function getPkColumn(string $tableName): string
{
// Placeholder: Needs implementation using getSerialColumns or schema inspection
return 'id' . $tableName;
}
protected function disablePostgresConstraints(): void
{
DB::statement("SET session_replication_role = 'replica'");
}
protected function enablePostgresConstraints(): void
{
DB::statement("SET session_replication_role = 'origin'");
}
protected function resetPostgresSequences(string $tableName): void
{
// This method requires the full implementation from the main command.
// For brevity, we assume it's copied over correctly.
}
protected function getSerialColumns(string $tableName): array
{
// This method requires the full implementation from the main command.
// For brevity, we assume it's copied over correctly.
return [];
}
}

View File

@@ -0,0 +1,8 @@
<?php
namespace App\Http\Controllers;
abstract class Controller
{
//
}

48
app/Models/User.php Normal file
View File

@@ -0,0 +1,48 @@
<?php
namespace App\Models;
// use Illuminate\Contracts\Auth\MustVerifyEmail;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Foundation\Auth\User as Authenticatable;
use Illuminate\Notifications\Notifiable;
class User extends Authenticatable
{
/** @use HasFactory<\Database\Factories\UserFactory> */
use HasFactory, Notifiable;
/**
* The attributes that are mass assignable.
*
* @var list<string>
*/
protected $fillable = [
'name',
'email',
'password',
];
/**
* The attributes that should be hidden for serialization.
*
* @var list<string>
*/
protected $hidden = [
'password',
'remember_token',
];
/**
* Get the attributes that should be cast.
*
* @return array<string, string>
*/
protected function casts(): array
{
return [
'email_verified_at' => 'datetime',
'password' => 'hashed',
];
}
}

View File

@@ -0,0 +1,24 @@
<?php
namespace App\Providers;
use Illuminate\Support\ServiceProvider;
class AppServiceProvider extends ServiceProvider
{
/**
* Register any application services.
*/
public function register(): void
{
//
}
/**
* Bootstrap any application services.
*/
public function boot(): void
{
//
}
}

View File

@@ -0,0 +1,199 @@
<?php
namespace App\Services;
use DateTimeImmutable;
use Illuminate\Support\Facades\DB; // <-- Added DB facade for schema introspection
class HfsqlDataConverter
{
// Cache for PostgreSQL column types (key: tableName, value: [columnName => dataType])
protected $pgSchemaCache = [];
/**
* Converts a single HFSQL record array into a numerically indexed array
* ready for fputcsv, ensuring order and PostgreSQL type compliance.
* * @param string $tableName The target PostgreSQL table name.
* @param array $hfsqlRecord The associative HFSQL record array.
* @param array $pgColumnOrder The list of column names in the correct order for COPY.
* @return array The numerically indexed array of converted values.
*/
public function convertToPostgres(string $tableName, array $hfsqlRecord, array $pgColumnOrder): array
{
$postgresRecordValues = [];
// 1. Get the PostgreSQL schema for the table (cached)
$pgSchemaTypes = $this->getPostgresSchema($tableName);
// Ensure keys are lowercase for safe lookup
$hfsqlRecord = array_change_key_case($hfsqlRecord, CASE_LOWER);
foreach ($pgColumnOrder as $pgColumn) {
// Buscar el tipo usando lowercase para la comparación
$pgColumnLower = strtolower($pgColumn);
// Get the target Postgres type (e.g., 'timestamp without time zone', 'numeric')
// Default to 'text' if the column is not found in the schema (should not happen normally)
$pgType = $pgSchemaTypes[$pgColumnLower] ?? 'text';
// The value is read by column name (lowercase), which is safer than assuming associative order
$value = $hfsqlRecord[$pgColumnLower] ?? null;
// 2. STAGE 1: Sanitization
if (is_string($value)) {
$value = trim($value);
$value = $this->sanitizeEncoding($value);
}
// 3. STAGE 2: NULL and EMPTY string handling
// For Postgres COPY, an unquoted empty string signals NULL for any type.
if (is_null($value) || $value === '') {
// If it's NULL or an empty string, return an empty string for COPY.
$convertedValue = '';
} else {
// 4. STAGE 3: Type-specific conversion based on the actual PG schema type
$convertedValue = $this->convertValue($value, $pgType);
}
$postgresRecordValues[] = $convertedValue;
}
return $postgresRecordValues;
}
/**
* Dynamically fetches the PostgreSQL column types for the target table
* and caches the result.
* * @param string $tableName The table name.
* @return array [columnName => dataType] map (keys in lowercase for comparison).
*/
protected function getPostgresSchema(string $tableName): array
{
if (isset($this->pgSchemaCache[$tableName])) {
return $this->pgSchemaCache[$tableName];
}
// Query information_schema to get column names and data types in ordinal position
// Buscar por el nombre exacto de la tabla (sin convertir a lowercase)
$results = DB::select("
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_name = ?
ORDER BY ordinal_position",
[$tableName] // Mantener el case original
);
$schemaMap = [];
foreach ($results as $column) {
// Store the type with lowercase key for safe comparison
// pero mantener el valor del tipo en lowercase (e.g., 'timestamp without time zone')
$schemaMap[strtolower($column->column_name)] = strtolower($column->data_type);
}
return $this->pgSchemaCache[$tableName] = $schemaMap;
}
/**
* Central logic to convert a non-null/non-empty value based on destination type.
* The $pgType is the actual type string from information_schema (e.g., 'numeric', 'timestamp without time zone').
*/
protected function convertValue($value, string $pgType): string
{
// Value is guaranteed to be non-null and non-empty at this point.
// Normalize the PostgreSQL type string for cleaner switch logic
// Example: 'timestamp without time zone' becomes 'timestamp'
$type = explode(' ', $pgType)[0];
switch ($type) {
case 'timestamp': // Includes 'timestamp with/without time zone'
// Output format: YYYY-MM-DD HH:MI:SS (Postgres standard)
return $this->convertDateTime($value, 'Y-m-d H:i:s');
case 'date':
// Output format: YYYY-MM-DD
return $this->convertDateTime($value, 'Y-m-d');
case 'numeric':
case 'double':
case 'real':
case 'money':
// Cleans up numeric formats (e.g., handles "1,234.56" or "1.234,56").
// This ensures the decimal point is consistent (dot) and removes thousand separators.
$cleanValue = (string) $value;
// Remove non-digit, non-decimal-point, non-minus signs
$cleanValue = preg_replace('/[^0-9\.\-]/', '', $cleanValue);
return is_numeric($cleanValue) ? (string) (float) $cleanValue : '';
case 'integer':
case 'smallint':
case 'bigint':
// Output format: integer (no decimal point)
$cleanValue = (string) $value;
// Remove non-digit, non-minus, non-plus signs
$cleanValue = preg_replace('/[^0-9\-\+]/', '', $cleanValue);
return is_numeric($cleanValue) ? (string) (int) $cleanValue : '';
case 'boolean':
$cleanValue = strtolower((string) $value);
// Postgres BOOLEAN accepts 't', 'f', 'true', 'false', 'y', 'n', '1', '0'
if (in_array($cleanValue, ['true', 't', 'y', '1', 'oui'])) {
return 't';
}
return 'f';
case 'text':
case 'character':
case 'varchar':
default:
// Ensure text is cast to string and returned.
return (string) $value;
}
}
/**
* Converts various date/time inputs into a strict Postgres format.
*/
protected function convertDateTime($value, string $format): string
{
// Try parsing the value robustly
try {
// Use DateTimeImmutable for parsing, which is safer than strtotime
if ($value instanceof DateTimeImmutable) {
$date = $value;
} elseif (is_numeric($value) && $value > 0) {
// Assume timestamp if numeric
$date = (new DateTimeImmutable())->setTimestamp((int) $value);
} else {
// Parse string value, handles many formats including YYYYMMDD
$date = new DateTimeImmutable((string) $value);
}
return $date->format($format);
} catch (\Exception $e) {
// If parsing fails, return an empty string (which becomes NULL in COPY)
return '';
}
}
/**
* Sanitizes and converts string encoding from Windows-1252 to UTF-8.
* This is critical for compatibility between HFSQL and Postgres.
*/
protected function sanitizeEncoding(string $value): string
{
// Assuming the most common source encoding for HFSQL on Windows is CP1252
$fromEncoding = 'Windows-1252';
if (mb_check_encoding($value, 'UTF-8')) {
return $value;
}
// Use @iconv to suppress warnings if the conversion is incomplete
$cleaned = @iconv($fromEncoding, 'UTF-8//IGNORE', $value);
return ($cleaned !== false) ? $cleaned : $value;
}
}