200 lines
7.8 KiB
PHP
200 lines
7.8 KiB
PHP
<?php
|
|
|
|
namespace App\Services;
|
|
|
|
use DateTimeImmutable;
|
|
use Illuminate\Support\Facades\DB; // <-- Added DB facade for schema introspection
|
|
|
|
class HfsqlDataConverter
|
|
{
|
|
// Cache for PostgreSQL column types (key: tableName, value: [columnName => dataType])
|
|
protected $pgSchemaCache = [];
|
|
|
|
/**
|
|
* Converts a single HFSQL record array into a numerically indexed array
|
|
* ready for fputcsv, ensuring order and PostgreSQL type compliance.
|
|
* * @param string $tableName The target PostgreSQL table name.
|
|
* @param array $hfsqlRecord The associative HFSQL record array.
|
|
* @param array $pgColumnOrder The list of column names in the correct order for COPY.
|
|
* @return array The numerically indexed array of converted values.
|
|
*/
|
|
public function convertToPostgres(string $tableName, array $hfsqlRecord, array $pgColumnOrder): array
|
|
{
|
|
$postgresRecordValues = [];
|
|
// 1. Get the PostgreSQL schema for the table (cached)
|
|
$pgSchemaTypes = $this->getPostgresSchema($tableName);
|
|
|
|
// Ensure keys are lowercase for safe lookup
|
|
$hfsqlRecord = array_change_key_case($hfsqlRecord, CASE_LOWER);
|
|
|
|
foreach ($pgColumnOrder as $pgColumn) {
|
|
// Buscar el tipo usando lowercase para la comparación
|
|
$pgColumnLower = strtolower($pgColumn);
|
|
|
|
// Get the target Postgres type (e.g., 'timestamp without time zone', 'numeric')
|
|
// Default to 'text' if the column is not found in the schema (should not happen normally)
|
|
$pgType = $pgSchemaTypes[$pgColumnLower] ?? 'text';
|
|
|
|
// The value is read by column name (lowercase), which is safer than assuming associative order
|
|
$value = $hfsqlRecord[$pgColumnLower] ?? null;
|
|
|
|
// 2. STAGE 1: Sanitization
|
|
if (is_string($value)) {
|
|
$value = trim($value);
|
|
$value = $this->sanitizeEncoding($value);
|
|
}
|
|
|
|
// 3. STAGE 2: NULL and EMPTY string handling
|
|
// For Postgres COPY, an unquoted empty string signals NULL for any type.
|
|
if (is_null($value) || $value === '') {
|
|
// If it's NULL or an empty string, return an empty string for COPY.
|
|
$convertedValue = '';
|
|
} else {
|
|
// 4. STAGE 3: Type-specific conversion based on the actual PG schema type
|
|
$convertedValue = $this->convertValue($value, $pgType);
|
|
}
|
|
|
|
$postgresRecordValues[] = $convertedValue;
|
|
}
|
|
|
|
return $postgresRecordValues;
|
|
}
|
|
|
|
/**
|
|
* Dynamically fetches the PostgreSQL column types for the target table
|
|
* and caches the result.
|
|
* * @param string $tableName The table name.
|
|
* @return array [columnName => dataType] map (keys in lowercase for comparison).
|
|
*/
|
|
protected function getPostgresSchema(string $tableName): array
|
|
{
|
|
if (isset($this->pgSchemaCache[$tableName])) {
|
|
return $this->pgSchemaCache[$tableName];
|
|
}
|
|
|
|
// Query information_schema to get column names and data types in ordinal position
|
|
// Buscar por el nombre exacto de la tabla (sin convertir a lowercase)
|
|
$results = DB::select("
|
|
SELECT column_name, data_type
|
|
FROM information_schema.columns
|
|
WHERE table_name = ?
|
|
ORDER BY ordinal_position",
|
|
[$tableName] // Mantener el case original
|
|
);
|
|
|
|
$schemaMap = [];
|
|
foreach ($results as $column) {
|
|
// Store the type with lowercase key for safe comparison
|
|
// pero mantener el valor del tipo en lowercase (e.g., 'timestamp without time zone')
|
|
$schemaMap[strtolower($column->column_name)] = strtolower($column->data_type);
|
|
}
|
|
|
|
return $this->pgSchemaCache[$tableName] = $schemaMap;
|
|
}
|
|
|
|
/**
|
|
* Central logic to convert a non-null/non-empty value based on destination type.
|
|
* The $pgType is the actual type string from information_schema (e.g., 'numeric', 'timestamp without time zone').
|
|
*/
|
|
protected function convertValue($value, string $pgType): string
|
|
{
|
|
// Value is guaranteed to be non-null and non-empty at this point.
|
|
|
|
// Normalize the PostgreSQL type string for cleaner switch logic
|
|
// Example: 'timestamp without time zone' becomes 'timestamp'
|
|
$type = explode(' ', $pgType)[0];
|
|
|
|
switch ($type) {
|
|
case 'timestamp': // Includes 'timestamp with/without time zone'
|
|
// Output format: YYYY-MM-DD HH:MI:SS (Postgres standard)
|
|
return $this->convertDateTime($value, 'Y-m-d H:i:s');
|
|
|
|
case 'date':
|
|
// Output format: YYYY-MM-DD
|
|
return $this->convertDateTime($value, 'Y-m-d');
|
|
|
|
case 'numeric':
|
|
case 'double':
|
|
case 'real':
|
|
case 'money':
|
|
// Cleans up numeric formats (e.g., handles "1,234.56" or "1.234,56").
|
|
// This ensures the decimal point is consistent (dot) and removes thousand separators.
|
|
$cleanValue = (string) $value;
|
|
// Remove non-digit, non-decimal-point, non-minus signs
|
|
$cleanValue = preg_replace('/[^0-9\.\-]/', '', $cleanValue);
|
|
|
|
return is_numeric($cleanValue) ? (string) (float) $cleanValue : '';
|
|
|
|
case 'integer':
|
|
case 'smallint':
|
|
case 'bigint':
|
|
// Output format: integer (no decimal point)
|
|
$cleanValue = (string) $value;
|
|
// Remove non-digit, non-minus, non-plus signs
|
|
$cleanValue = preg_replace('/[^0-9\-\+]/', '', $cleanValue);
|
|
|
|
return is_numeric($cleanValue) ? (string) (int) $cleanValue : '';
|
|
|
|
case 'boolean':
|
|
$cleanValue = strtolower((string) $value);
|
|
// Postgres BOOLEAN accepts 't', 'f', 'true', 'false', 'y', 'n', '1', '0'
|
|
if (in_array($cleanValue, ['true', 't', 'y', '1', 'oui'])) {
|
|
return 't';
|
|
}
|
|
return 'f';
|
|
|
|
case 'text':
|
|
case 'character':
|
|
case 'varchar':
|
|
default:
|
|
// Ensure text is cast to string and returned.
|
|
return (string) $value;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Converts various date/time inputs into a strict Postgres format.
|
|
*/
|
|
protected function convertDateTime($value, string $format): string
|
|
{
|
|
// Try parsing the value robustly
|
|
try {
|
|
// Use DateTimeImmutable for parsing, which is safer than strtotime
|
|
if ($value instanceof DateTimeImmutable) {
|
|
$date = $value;
|
|
} elseif (is_numeric($value) && $value > 0) {
|
|
// Assume timestamp if numeric
|
|
$date = (new DateTimeImmutable())->setTimestamp((int) $value);
|
|
} else {
|
|
// Parse string value, handles many formats including YYYYMMDD
|
|
$date = new DateTimeImmutable((string) $value);
|
|
}
|
|
|
|
return $date->format($format);
|
|
|
|
} catch (\Exception $e) {
|
|
// If parsing fails, return an empty string (which becomes NULL in COPY)
|
|
return '';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Sanitizes and converts string encoding from Windows-1252 to UTF-8.
|
|
* This is critical for compatibility between HFSQL and Postgres.
|
|
*/
|
|
protected function sanitizeEncoding(string $value): string
|
|
{
|
|
// Assuming the most common source encoding for HFSQL on Windows is CP1252
|
|
$fromEncoding = 'Windows-1252';
|
|
|
|
if (mb_check_encoding($value, 'UTF-8')) {
|
|
return $value;
|
|
}
|
|
|
|
// Use @iconv to suppress warnings if the conversion is incomplete
|
|
$cleaned = @iconv($fromEncoding, 'UTF-8//IGNORE', $value);
|
|
|
|
return ($cleaned !== false) ? $cleaned : $value;
|
|
}
|
|
}
|