dataType]) protected $pgSchemaCache = []; /** * Converts a single HFSQL record array into a numerically indexed array * ready for fputcsv, ensuring order and PostgreSQL type compliance. * * @param string $tableName The target PostgreSQL table name. * @param array $hfsqlRecord The associative HFSQL record array. * @param array $pgColumnOrder The list of column names in the correct order for COPY. * @return array The numerically indexed array of converted values. */ public function convertToPostgres(string $tableName, array $hfsqlRecord, array $pgColumnOrder): array { $postgresRecordValues = []; // 1. Get the PostgreSQL schema for the table (cached) $pgSchemaTypes = $this->getPostgresSchema($tableName); // Ensure keys are lowercase for safe lookup $hfsqlRecord = array_change_key_case($hfsqlRecord, CASE_LOWER); foreach ($pgColumnOrder as $pgColumn) { // Buscar el tipo usando lowercase para la comparación $pgColumnLower = strtolower($pgColumn); // Get the target Postgres type (e.g., 'timestamp without time zone', 'numeric') // Default to 'text' if the column is not found in the schema (should not happen normally) $pgType = $pgSchemaTypes[$pgColumnLower] ?? 'text'; // The value is read by column name (lowercase), which is safer than assuming associative order $value = $hfsqlRecord[$pgColumnLower] ?? null; // 2. STAGE 1: Sanitization if (is_string($value)) { $value = trim($value); $value = $this->sanitizeEncoding($value); } // 3. STAGE 2: NULL and EMPTY string handling // For Postgres COPY, an unquoted empty string signals NULL for any type. if (is_null($value) || $value === '') { // If it's NULL or an empty string, return an empty string for COPY. $convertedValue = ''; } else { // 4. STAGE 3: Type-specific conversion based on the actual PG schema type $convertedValue = $this->convertValue($value, $pgType); } $postgresRecordValues[] = $convertedValue; } return $postgresRecordValues; } /** * Dynamically fetches the PostgreSQL column types for the target table * and caches the result. * * @param string $tableName The table name. * @return array [columnName => dataType] map (keys in lowercase for comparison). */ protected function getPostgresSchema(string $tableName): array { if (isset($this->pgSchemaCache[$tableName])) { return $this->pgSchemaCache[$tableName]; } // Query information_schema to get column names and data types in ordinal position // Buscar por el nombre exacto de la tabla (sin convertir a lowercase) $results = DB::select(" SELECT column_name, data_type FROM information_schema.columns WHERE table_name = ? ORDER BY ordinal_position", [$tableName] // Mantener el case original ); $schemaMap = []; foreach ($results as $column) { // Store the type with lowercase key for safe comparison // pero mantener el valor del tipo en lowercase (e.g., 'timestamp without time zone') $schemaMap[strtolower($column->column_name)] = strtolower($column->data_type); } return $this->pgSchemaCache[$tableName] = $schemaMap; } /** * Central logic to convert a non-null/non-empty value based on destination type. * The $pgType is the actual type string from information_schema (e.g., 'numeric', 'timestamp without time zone'). */ protected function convertValue($value, string $pgType): string { // Value is guaranteed to be non-null and non-empty at this point. // Normalize the PostgreSQL type string for cleaner switch logic // Example: 'timestamp without time zone' becomes 'timestamp' $type = explode(' ', $pgType)[0]; switch ($type) { case 'timestamp': // Includes 'timestamp with/without time zone' // Output format: YYYY-MM-DD HH:MI:SS (Postgres standard) return $this->convertDateTime($value, 'Y-m-d H:i:s'); case 'date': // Output format: YYYY-MM-DD return $this->convertDateTime($value, 'Y-m-d'); case 'numeric': case 'double': case 'real': case 'money': // Cleans up numeric formats (e.g., handles "1,234.56" or "1.234,56"). // This ensures the decimal point is consistent (dot) and removes thousand separators. $cleanValue = (string) $value; // Remove non-digit, non-decimal-point, non-minus signs $cleanValue = preg_replace('/[^0-9\.\-]/', '', $cleanValue); return is_numeric($cleanValue) ? (string) (float) $cleanValue : ''; case 'integer': case 'smallint': case 'bigint': // Output format: integer (no decimal point) $cleanValue = (string) $value; // Remove non-digit, non-minus, non-plus signs $cleanValue = preg_replace('/[^0-9\-\+]/', '', $cleanValue); return is_numeric($cleanValue) ? (string) (int) $cleanValue : ''; case 'boolean': $cleanValue = strtolower((string) $value); // Postgres BOOLEAN accepts 't', 'f', 'true', 'false', 'y', 'n', '1', '0' if (in_array($cleanValue, ['true', 't', 'y', '1', 'oui'])) { return 't'; } return 'f'; case 'text': case 'character': case 'varchar': default: // Ensure text is cast to string and returned. return (string) $value; } } /** * Converts various date/time inputs into a strict Postgres format. */ protected function convertDateTime($value, string $format): string { // Try parsing the value robustly try { // Use DateTimeImmutable for parsing, which is safer than strtotime if ($value instanceof DateTimeImmutable) { $date = $value; } elseif (is_numeric($value) && $value > 0) { // Assume timestamp if numeric $date = (new DateTimeImmutable())->setTimestamp((int) $value); } else { // Parse string value, handles many formats including YYYYMMDD $date = new DateTimeImmutable((string) $value); } return $date->format($format); } catch (\Exception $e) { // If parsing fails, return an empty string (which becomes NULL in COPY) return ''; } } /** * Sanitizes and converts string encoding from Windows-1252 to UTF-8. * This is critical for compatibility between HFSQL and Postgres. */ protected function sanitizeEncoding(string $value): string { // Assuming the most common source encoding for HFSQL on Windows is CP1252 $fromEncoding = 'Windows-1252'; if (mb_check_encoding($value, 'UTF-8')) { return $value; } // Use @iconv to suppress warnings if the conversion is incomplete $cleaned = @iconv($fromEncoding, 'UTF-8//IGNORE', $value); return ($cleaned !== false) ? $cleaned : $value; } }