Files
postgresql-migration-laravel/app/Services/HfsqlDataConverter.php
2026-01-07 17:48:40 +01:00

200 lines
7.8 KiB
PHP

<?php
namespace App\Services;
use DateTimeImmutable;
use Illuminate\Support\Facades\DB; // <-- Added DB facade for schema introspection
class HfsqlDataConverter
{
// Cache for PostgreSQL column types (key: tableName, value: [columnName => dataType])
protected $pgSchemaCache = [];
/**
* Converts a single HFSQL record array into a numerically indexed array
* ready for fputcsv, ensuring order and PostgreSQL type compliance.
* * @param string $tableName The target PostgreSQL table name.
* @param array $hfsqlRecord The associative HFSQL record array.
* @param array $pgColumnOrder The list of column names in the correct order for COPY.
* @return array The numerically indexed array of converted values.
*/
public function convertToPostgres(string $tableName, array $hfsqlRecord, array $pgColumnOrder): array
{
$postgresRecordValues = [];
// 1. Get the PostgreSQL schema for the table (cached)
$pgSchemaTypes = $this->getPostgresSchema($tableName);
// Ensure keys are lowercase for safe lookup
$hfsqlRecord = array_change_key_case($hfsqlRecord, CASE_LOWER);
foreach ($pgColumnOrder as $pgColumn) {
// Buscar el tipo usando lowercase para la comparación
$pgColumnLower = strtolower($pgColumn);
// Get the target Postgres type (e.g., 'timestamp without time zone', 'numeric')
// Default to 'text' if the column is not found in the schema (should not happen normally)
$pgType = $pgSchemaTypes[$pgColumnLower] ?? 'text';
// The value is read by column name (lowercase), which is safer than assuming associative order
$value = $hfsqlRecord[$pgColumnLower] ?? null;
// 2. STAGE 1: Sanitization
if (is_string($value)) {
$value = trim($value);
$value = $this->sanitizeEncoding($value);
}
// 3. STAGE 2: NULL and EMPTY string handling
// For Postgres COPY, an unquoted empty string signals NULL for any type.
if (is_null($value) || $value === '') {
// If it's NULL or an empty string, return an empty string for COPY.
$convertedValue = '';
} else {
// 4. STAGE 3: Type-specific conversion based on the actual PG schema type
$convertedValue = $this->convertValue($value, $pgType);
}
$postgresRecordValues[] = $convertedValue;
}
return $postgresRecordValues;
}
/**
* Dynamically fetches the PostgreSQL column types for the target table
* and caches the result.
* * @param string $tableName The table name.
* @return array [columnName => dataType] map (keys in lowercase for comparison).
*/
protected function getPostgresSchema(string $tableName): array
{
if (isset($this->pgSchemaCache[$tableName])) {
return $this->pgSchemaCache[$tableName];
}
// Query information_schema to get column names and data types in ordinal position
// Buscar por el nombre exacto de la tabla (sin convertir a lowercase)
$results = DB::select("
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_name = ?
ORDER BY ordinal_position",
[$tableName] // Mantener el case original
);
$schemaMap = [];
foreach ($results as $column) {
// Store the type with lowercase key for safe comparison
// pero mantener el valor del tipo en lowercase (e.g., 'timestamp without time zone')
$schemaMap[strtolower($column->column_name)] = strtolower($column->data_type);
}
return $this->pgSchemaCache[$tableName] = $schemaMap;
}
/**
* Central logic to convert a non-null/non-empty value based on destination type.
* The $pgType is the actual type string from information_schema (e.g., 'numeric', 'timestamp without time zone').
*/
protected function convertValue($value, string $pgType): string
{
// Value is guaranteed to be non-null and non-empty at this point.
// Normalize the PostgreSQL type string for cleaner switch logic
// Example: 'timestamp without time zone' becomes 'timestamp'
$type = explode(' ', $pgType)[0];
switch ($type) {
case 'timestamp': // Includes 'timestamp with/without time zone'
// Output format: YYYY-MM-DD HH:MI:SS (Postgres standard)
return $this->convertDateTime($value, 'Y-m-d H:i:s');
case 'date':
// Output format: YYYY-MM-DD
return $this->convertDateTime($value, 'Y-m-d');
case 'numeric':
case 'double':
case 'real':
case 'money':
// Cleans up numeric formats (e.g., handles "1,234.56" or "1.234,56").
// This ensures the decimal point is consistent (dot) and removes thousand separators.
$cleanValue = (string) $value;
// Remove non-digit, non-decimal-point, non-minus signs
$cleanValue = preg_replace('/[^0-9\.\-]/', '', $cleanValue);
return is_numeric($cleanValue) ? (string) (float) $cleanValue : '';
case 'integer':
case 'smallint':
case 'bigint':
// Output format: integer (no decimal point)
$cleanValue = (string) $value;
// Remove non-digit, non-minus, non-plus signs
$cleanValue = preg_replace('/[^0-9\-\+]/', '', $cleanValue);
return is_numeric($cleanValue) ? (string) (int) $cleanValue : '';
case 'boolean':
$cleanValue = strtolower((string) $value);
// Postgres BOOLEAN accepts 't', 'f', 'true', 'false', 'y', 'n', '1', '0'
if (in_array($cleanValue, ['true', 't', 'y', '1', 'oui'])) {
return 't';
}
return 'f';
case 'text':
case 'character':
case 'varchar':
default:
// Ensure text is cast to string and returned.
return (string) $value;
}
}
/**
* Converts various date/time inputs into a strict Postgres format.
*/
protected function convertDateTime($value, string $format): string
{
// Try parsing the value robustly
try {
// Use DateTimeImmutable for parsing, which is safer than strtotime
if ($value instanceof DateTimeImmutable) {
$date = $value;
} elseif (is_numeric($value) && $value > 0) {
// Assume timestamp if numeric
$date = (new DateTimeImmutable())->setTimestamp((int) $value);
} else {
// Parse string value, handles many formats including YYYYMMDD
$date = new DateTimeImmutable((string) $value);
}
return $date->format($format);
} catch (\Exception $e) {
// If parsing fails, return an empty string (which becomes NULL in COPY)
return '';
}
}
/**
* Sanitizes and converts string encoding from Windows-1252 to UTF-8.
* This is critical for compatibility between HFSQL and Postgres.
*/
protected function sanitizeEncoding(string $value): string
{
// Assuming the most common source encoding for HFSQL on Windows is CP1252
$fromEncoding = 'Windows-1252';
if (mb_check_encoding($value, 'UTF-8')) {
return $value;
}
// Use @iconv to suppress warnings if the conversion is incomplete
$cleaned = @iconv($fromEncoding, 'UTF-8//IGNORE', $value);
return ($cleaned !== false) ? $cleaned : $value;
}
}