import pandas as pd
import numpy as np

# Load the CSV file into a pandas DataFrame
try:
    df = pd.read_csv('poster.csv')
except FileNotFoundError:
    print("오류: 'poster.csv' 파일을 찾을 수 없습니다.")
    exit()

# Define the expanded set of columns to check for duplicates within each row
columns_to_check = [
    '1_main_chara_face_xywh', '2_main_chara_face_xywh', '3_main_chara_face_xywh',
    '4_main_chara_face_xywh', '5_main_chara_face_xywh', '6_main_chara_face_xywh',
    '1_sub_chara_face_xywh', '2_sub_chara_face_xywh', '3_sub_chara_face_xywh',
    '4_sub_chara_face_xywh', '5_sub_chara_face_xywh', '6_sub_chara_face_xywh'
]

# Function to find duplicates in the specified columns of a single row
def find_duplicates_in_row(row):
    # Extract values from the columns to check, ignoring NaN or empty values
    values = [val for val in row[columns_to_check] if pd.notna(val)]
    
    # If there are fewer than 2 values, no duplicates are possible
    if len(values) < 2:
        return False
        
    # Check for duplicates by comparing the length of the list with the length of the set of values
    return len(values) != len(set(values))

# Apply the function to each row to create a boolean mask
# The mask will be True for rows with duplicates (defective records)
is_defective_mask = df.apply(find_duplicates_in_row, axis=1)

# Filter the DataFrame to get only the defective records
defective_records = df[is_defective_mask]

# Display the results
if defective_records.empty:
    print("결함이 있는 레코드를 찾지 못했습니다.")
else:
    print(f"총 {len(defective_records)}개의 결함이 있는 레코드를 찾았습니다:")
    # To make the output cleaner, we'll only display the FileName and the columns we checked
    display_columns = ['FileName'] + columns_to_check
    print(defective_records[display_columns].to_markdown(index=False))