Initial commit
This commit is contained in:
64
compare_gst_excel.py
Normal file
64
compare_gst_excel.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# import pandas as pd
|
||||
|
||||
# def normalize_row(row):
|
||||
# return tuple(
|
||||
# str(cell).strip().replace(".0", "") if isinstance(cell, float) and cell.is_integer() else str(cell).strip()
|
||||
# for cell in row
|
||||
# )
|
||||
|
||||
# def find_unmatched_rows(sheet1_df, sheet2_df):
|
||||
# # Ensure column names are clean
|
||||
# sheet1_df.columns = sheet1_df.columns.str.strip()
|
||||
# sheet2_df.columns = sheet2_df.columns.str.strip()
|
||||
|
||||
# # Normalize rows for comparison
|
||||
# sheet1_normalized = sheet1_df.apply(normalize_row, axis=1)
|
||||
# sheet2_normalized = sheet2_df.apply(normalize_row, axis=1)
|
||||
|
||||
# # Find unmatched rows
|
||||
# unmatched_in_sheet1 = sheet1_df[~sheet1_normalized.isin(sheet2_normalized)]
|
||||
# unmatched_in_sheet2 = sheet2_df[~sheet2_normalized.isin(sheet1_normalized)]
|
||||
|
||||
# # Mark source
|
||||
# unmatched_in_sheet1["Source"] = "Sheet1"
|
||||
# unmatched_in_sheet2["Source"] = "Sheet2"
|
||||
|
||||
# # Combine
|
||||
# unmatched_combined = pd.concat([unmatched_in_sheet1, unmatched_in_sheet2], ignore_index=True)
|
||||
# return unmatched_combined
|
||||
import pandas as pd
|
||||
|
||||
def normalize_row(row):
|
||||
return tuple(
|
||||
str(cell).strip().replace(".0", "") if isinstance(cell, float) and cell.is_integer() else str(cell).strip()
|
||||
for cell in row
|
||||
)
|
||||
|
||||
def find_unmatched_rows(sheet1_df, sheet2_df):
|
||||
# Clean column names
|
||||
sheet1_df.columns = sheet1_df.columns.str.strip()
|
||||
sheet2_df.columns = sheet2_df.columns.str.strip()
|
||||
|
||||
# Choose the comparison columns
|
||||
comparison_columns = ['Date', 'GSTIN/UIN']
|
||||
|
||||
# Ensure required columns exist
|
||||
for col in comparison_columns:
|
||||
if col not in sheet1_df.columns or col not in sheet2_df.columns:
|
||||
raise ValueError(f"Missing column '{col}' in one of the sheets.")
|
||||
|
||||
# Create keys for comparison
|
||||
sheet1_keys = sheet1_df[comparison_columns].apply(normalize_row, axis=1)
|
||||
sheet2_keys = sheet2_df[comparison_columns].apply(normalize_row, axis=1)
|
||||
|
||||
# Find unmatched rows
|
||||
unmatched_in_sheet1 = sheet1_df[~sheet1_keys.isin(sheet2_keys)].copy()
|
||||
unmatched_in_sheet2 = sheet2_df[~sheet2_keys.isin(sheet1_keys)].copy()
|
||||
|
||||
# Mark source
|
||||
unmatched_in_sheet1["Source"] = "Sheet1"
|
||||
unmatched_in_sheet2["Source"] = "Sheet2"
|
||||
|
||||
# Combine
|
||||
unmatched_combined = pd.concat([unmatched_in_sheet1, unmatched_in_sheet2], ignore_index=True)
|
||||
return unmatched_combined
|
||||
Reference in New Issue
Block a user