Files
Comparison_Project/app/services/file_service.py

596 lines
22 KiB
Python

import os
import pandas as pd
from werkzeug.utils import secure_filename
from app.config import Config
from app import db
from app.models.trench_excavation_model import TrenchExcavation
from app.models.manhole_excavation_model import ManholeExcavation
from app.models.manhole_domestic_chamber_model import ManholeDomesticChamber
from app.models.tr_ex_client_model import TrenchExcavationClient
from app.models.mh_ex_client_model import ManholeExcavationClient
from app.models.mh_dc_client_model import ManholeDomesticChamberClient
from app.utils.file_utils import ensure_upload_folder
class FileService:
def allowed_file(self, filename):
return "." in filename and filename.rsplit(".", 1)[1].lower() in Config.ALLOWED_EXTENSIONS
# def handle_file_upload(self, file, subcontractor_id, file_type):
def handle_file_upload(self, file, subcontractor_id):
RA_Bill_No = 1 # this RA bill define temp for upload (changes also)
if not subcontractor_id:
return False, "Please select subcontractor."
# if not file_type:
# return False, "Please select file type."
if not file or file.filename == "":
return False, "No file selected."
if not self.allowed_file(file.filename):
return False, "Invalid file type! Allowed: CSV, XLSX, XLS"
ensure_upload_folder()
folder = os.path.join(Config.UPLOAD_FOLDER, f"sub_{subcontractor_id}")
os.makedirs(folder, exist_ok=True)
filename = secure_filename(file.filename)
filepath = os.path.join(folder, filename)
file.save(filepath)
try:
# df = pd.read_csv(filepath) if filename.endswith(".csv") else pd.read_excel(filepath)
df_tr_ex = pd.read_excel(filepath, sheet_name ="Tr.Ex.", header=12)
df_mh_ex = pd.read_excel(filepath, sheet_name="MH Ex.", header=12)
# df2 = pd.read_excel(filepath, sheet_name="MH & DC", header=0)
print("\n=== Uploaded File tr ex ===")
print(df_tr_ex.head())
print("=============================\n")
print("=== Uploaded File mh ex ===")
print(df_mh_ex.head())
print("========================================")
self.process_trench_excavation(df_tr_ex, subcontractor_id, RA_Bill_No)
self.process_manhole_excavation(df_mh_ex, subcontractor_id,RA_Bill_No)
# df = pd.read_csv(filepath) if filename.endswith(".csv") else pd.read_excel(filepath)
# # Trench Excavation save (subcontractor)
# if file_type == "trench_excavation":
# return self.process_trench_excavation(df, subcontractor_id)
# # Manhole Excavation save (subcontractor)
# if file_type == "manhole_excavation":
# return self.process_manhole_excavation(df, subcontractor_id)
# # Manhole and Domestic Chamber Construction save (subcontractor)
# if file_type == "manhole_domestic_chamber":
# return self.process_manhole_domestic_chamber(df, subcontractor_id)
# # Tr Ex save (client)
# if file_type =="tr_ex_client":
# return self.client_trench_excavation(df, subcontractor_id)
# # Mh Ex save (client)
# if file_type =="mh_ex_client":
# return self.client_manhole_excavation(df, subcontractor_id)
# # Mh and Dc save (client)
# if file_type == "mh_dc_client":
# return self.client_manhole_domestic_chamber(df, subcontractor_id)
return True, "File uploaded successfully."
except Exception as e:
return False, f"Processing failed: {e}"
# new new
def process_trench_excavation(self, df, subcontractor_id,RA_Bill_No):
print("RA_Bill_No of Tr Ex:",RA_Bill_No)
print("=== RAW HEADERS ===")
print(df.columns.tolist())
print("===================")
# Clean column names
df.columns = (
df.columns.astype(str)
.str.strip()
.str.replace(r"[^\w]", "_", regex=True)
.str.replace("__+", "_", regex=True)
.str.strip("_")
)
# Remove completely empty rows
df = df.dropna(how="all")
# Forward fill merged Location
if "Location" in df.columns:
df["Location"] = df["Location"].ffill()
saved_count = 0
skipped_count = 0
try:
for index, row in df.iterrows():
record_data = {}
location = row.get("Location")
mh_no = row.get("MH_NO")
if (pd.isna(location) or str(location).strip() == "" or pd.isna(mh_no) or str(mh_no).strip() == ""):
skipped_count += 1
continue
# Map only model columns
for col in df.columns:
if hasattr(TrenchExcavation, col):
value = row[col]
# Normalize empty values
if pd.isna(value) or str(value).strip() in ["", "-", "", "nan"]:
value = None
record_data[col] = value
# If all mapped fields are None → skip
if all(v is None for v in record_data.values()):
skipped_count += 1
continue
record = TrenchExcavation(
subcontractor_id=subcontractor_id, RA_Bill_No=RA_Bill_No,
**record_data
)
print("Saving Row → Location:", record.Location, " MH_NO:", record.MH_NO)
db.session.add(record)
saved_count += 1
db.session.commit()
return True, (
f"Trench Excavation saved successfully. "
f"Inserted: {saved_count}, Skipped: {skipped_count}"
)
except Exception as e:
db.session.rollback()
return False, f"Trench Excavation save failed: {e}"
# new new
def process_manhole_excavation(self, df, subcontractor_id, RA_Bill_No):
print("RA_Bill_No of MH EX:",RA_Bill_No)
print("=== RAW HEADERS ===")
print(df.columns.tolist())
print("===================")
# Clean column names
df.columns = (
df.columns.astype(str)
.str.strip()
.str.replace(r"[^\w]", "_", regex=True)
.str.replace("__+", "_", regex=True)
.str.strip("_")
)
# Remove completely empty rows
df = df.dropna(how="all")
# Forward fill merged Location
if "Location" in df.columns:
df["Location"] = df["Location"].ffill()
saved_count = 0
skipped_count = 0
try:
for index, row in df.iterrows():
record_data = {}
location = row.get("Location")
mh_no = row.get("MH_NO")
if (pd.isna(location) or str(location).strip() == "" or pd.isna(mh_no) or str(mh_no).strip() == ""):
skipped_count += 1
continue
# Map only model columns
for col in df.columns:
if hasattr(ManholeExcavation, col):
value = row[col]
# Normalize empty values
if pd.isna(value) or str(value).strip() in ["", "-", "", "nan"]:
value = None
record_data[col] = value
# If all mapped fields are None → skip
if all(v is None for v in record_data.values()):
skipped_count += 1
continue
record = ManholeExcavation(
subcontractor_id=subcontractor_id, RA_Bill_No=RA_Bill_No,
**record_data
)
print("Saving Row → Location:", record.Location, " MH_NO:", record.MH_NO)
db.session.add(record)
saved_count += 1
db.session.commit()
return True, (
f"Manhole Excavation saved successfully. "
f"Inserted: {saved_count}, Skipped: {skipped_count}"
)
except Exception as e:
db.session.rollback()
return False, f"Manhole Excavation save failed: {e}"
# olds
# def handle_file_upload(self, file, subcontractor_id, file_type):
# # def handle_file_upload(self, file, subcontractor_id):
# if not subcontractor_id:
# return False, "Please select subcontractor."
# # if not file_type:
# # return False, "Please select file type."
# if not file or file.filename == "":
# return False, "No file selected."
# if not self.allowed_file(file.filename):
# return False, "Invalid file type! Allowed: CSV, XLSX, XLS"
# ensure_upload_folder()
# folder = os.path.join(Config.UPLOAD_FOLDER, f"sub_{subcontractor_id}")
# os.makedirs(folder, exist_ok=True)
# filename = secure_filename(file.filename)
# filepath = os.path.join(folder, filename)
# file.save(filepath)
# try:
# # df = pd.read_csv(filepath) if filename.endswith(".csv") else pd.read_excel(filepath)
# df = pd.read_excel(filepath, sheet_name ="Tr.Ex.", header=0)
# df1 = pd.read_excel(filepath, sheet_name="MH Ex.", header=0)
# print("\n=== Uploaded File Preview ===")
# print(" file name h:",df)
# print("=============================\n")
# print(" file name h1:",df1)
# # print(df.head())
# print("=============================\n")
# # Trench Excavation save (subcontractor)
# if file_type == "trench_excavation":
# return self.process_trench_excavation(df, subcontractor_id)
# # Manhole Excavation save (subcontractor)
# if file_type == "manhole_excavation":
# return self.process_manhole_excavation(df, subcontractor_id)
# # Manhole and Domestic Chamber Construction save (subcontractor)
# if file_type == "manhole_domestic_chamber":
# return self.process_manhole_domestic_chamber(df, subcontractor_id)
# # Tr Ex save (client)
# if file_type =="tr_ex_client":
# return self.client_trench_excavation(df, subcontractor_id)
# # Mh Ex save (client)
# if file_type =="mh_ex_client":
# return self.client_manhole_excavation(df, subcontractor_id)
# # Mh and Dc save (client)
# if file_type == "mh_dc_client":
# return self.client_manhole_domestic_chamber(df, subcontractor_id)
# return True, "File uploaded successfully."
# except Exception as e:
# return False, f"Processing failed: {e}"
# ---------------------- Sub contractor --------------------------
# Trench Excavation save method (TrenchExcavation model)
# def process_trench_excavation(self, df, subcontractor_id):
# df.columns = [str(c).strip() for c in df.columns]
# # If the sheet has merged cells -> forward fill Location
# if "Location" in df.columns:
# df["Location"] = df["Location"].ffill()
# df = df.dropna(how="all") # REMOVE empty rows
# # Identify missing location rows before insert
# missing_loc = df[df["Location"].isna() | (df["Location"].astype(str).str.strip() == "")]
# if not missing_loc.empty:
# return False, f"Error: Some rows have empty Location. Rows: {missing_loc.index.tolist()}"
# saved_count = 0
# try:
# for index, row in df.iterrows():
# record_data = {}
# # Insert only fields that exist in model
# for col in df.columns:
# if hasattr(TrenchExcavation, col):
# value = row[col]
# # Normalize empty values
# if pd.isna(value) or str(value).strip() in ["", "-", "—", "nan", "NaN"]:
# value = None
# record_data[col] = value
# record = TrenchExcavation(
# subcontractor_id=subcontractor_id,
# **record_data
# )
# db.session.add(record)
# saved_count += 1
# db.session.commit()
# return True, f"Trench Excavation data saved successfully. Total rows: {saved_count}"
# except Exception as e:
# db.session.rollback()
# return False, f"Trench Excavation Save Failed: {e}"
# Manhole Excavation save method (ManholeExcavation model)
# def process_manhole_excavation(self, df, subcontractor_id):
# # Clean column names (strip whitespace)
# df.columns = [str(c).strip() for c in df.columns]
# # If the sheet has merged cells -> forward fill Location
# if "Location" in df.columns:
# df["Location"] = df["Location"].ffill()
# # REMOVE empty rows
# df = df.dropna(how="all")
# # Identify missing location rows before insert
# missing_loc = df[df["Location"].isna() | (df["Location"].astype(str).str.strip() == "")]
# if not missing_loc.empty:
# return False, f"Error: Some rows have empty Location. Rows: {missing_loc.index.tolist()}"
# saved_count = 0
# try:
# for index, row in df.iterrows():
# record_data = {}
# # Insert only fields that exist in model
# for col in df.columns:
# if hasattr(ManholeExcavation, col):
# value = row[col]
# # Normalize empty values
# if pd.isna(value) or str(value).strip() in ["", "-", "—", "nan", "NaN"]:
# value = None
# record_data[col] = value
# record = ManholeExcavation(
# subcontractor_id=subcontractor_id,
# **record_data
# )
# db.session.add(record)
# saved_count += 1
# db.session.commit()
# return True, f"Manhole Excavation data saved successfully. Total rows: {saved_count}"
# except Exception as e:
# db.session.rollback()
# return False, f"Manhole Excavation Save Failed: {e}"
# Manhole and Domestic Chamber Construction save method (ManholeDomesticChamber model)
def process_manhole_domestic_chamber(self, df, subcontractor_id):
# Clean column names (strip whitespace)
df.columns = [str(c).strip() for c in df.columns]
# If the sheet has merged cells -> forward fill Location
if "Location" in df.columns:
df["Location"] = df["Location"].ffill()
# REMOVE empty rows
df = df.dropna(how="all")
# Identify missing location rows before insert
missing_loc = df[df["Location"].isna() | (df["Location"].astype(str).str.strip() == "")]
if not missing_loc.empty:
return False, f"Error: Some rows have empty Location. Rows: {missing_loc.index.tolist()}"
saved_count = 0
try:
for index, row in df.iterrows():
record_data = {}
# Insert only fields that exist in model
for col in df.columns:
if hasattr(ManholeDomesticChamber, col):
value = row[col]
# Normalize empty values
if pd.isna(value) or str(value).strip() in ["", "-", "", "nan", "NaN"]:
value = None
record_data[col] = value
record = ManholeDomesticChamber(
subcontractor_id=subcontractor_id,
**record_data
)
db.session.add(record)
saved_count += 1
db.session.commit()
return True, f"Manhole Domestic Chamber Construction data saved successfully. Total rows: {saved_count}"
except Exception as e:
db.session.rollback()
return False, f"Manhole Domestic Chamber Construction Save Failed: {e}"
# ---------------------- client ----------------------------------
# Tr Ex save method (TrenchExcavationClient model)
def client_trench_excavation(self, df, subcontractor_id):
df.columns = [str(c).strip() for c in df.columns]
# If the sheet has merged cells -> forward fill Location
if "Location" in df.columns:
df["Location"] = df["Location"].ffill()
df = df.dropna(how="all") # REMOVE empty rows
# Identify missing location rows before insert
missing_loc = df[df["Location"].isna() | (df["Location"].astype(str).str.strip() == "")]
if not missing_loc.empty:
return False, f"Error: Some rows have empty Location. Rows: {missing_loc.index.tolist()}"
saved_count = 0
try:
for index, row in df.iterrows():
record_data = {}
# Insert only fields that exist in model
for col in df.columns:
if hasattr(TrenchExcavationClient, col):
value = row[col]
# Normalize empty values
if pd.isna(value) or str(value).strip() in ["", "-", "", "nan", "NaN"]:
value = None
record_data[col] = value
record = TrenchExcavationClient(
subcontractor_id=subcontractor_id,
**record_data
)
db.session.add(record)
saved_count += 1
db.session.commit()
return True, f"Clinnt Tr Ex data saved successfully. Total rows: {saved_count}"
except Exception as e:
db.session.rollback()
return False, f"Clinnt Tr Ex Save Failed: {e}"
# Mh Ex save method (ManholeExcavationClient model)
def client_manhole_excavation(self, df, subcontractor_id):
# Clean column names (strip whitespace)
df.columns = [str(c).strip() for c in df.columns]
# If the sheet has merged cells -> forward fill Location
if "Location" in df.columns:
df["Location"] = df["Location"].ffill()
# REMOVE empty rows
df = df.dropna(how="all")
# Identify missing location rows before insert
missing_loc = df[df["Location"].isna() | (df["Location"].astype(str).str.strip() == "")]
if not missing_loc.empty:
return False, f"Error: Some rows have empty Location. Rows: {missing_loc.index.tolist()}"
saved_count = 0
try:
for index, row in df.iterrows():
record_data = {}
# Insert only fields that exist in model
for col in df.columns:
if hasattr(ManholeExcavationClient, col):
value = row[col]
# Normalize empty values
if pd.isna(value) or str(value).strip() in ["", "-", "", "nan", "NaN"]:
value = None
record_data[col] = value
record = ManholeExcavationClient(
subcontractor_id=subcontractor_id,
**record_data
)
db.session.add(record)
saved_count += 1
db.session.commit()
return True, f" Client Mh Ex. data saved successfully. Total rows: {saved_count}"
except Exception as e:
db.session.rollback()
return False, f"Client Mh Ex. Save Failed: {e}"
# Mh and Dc save method (ManholeDomesticChamberClient model)
def client_manhole_domestic_chamber(self, df, subcontractor_id):
# Clean column names (strip whitespace)
df.columns = [str(c).strip() for c in df.columns]
# If the sheet has merged cells -> forward fill Location
if "Location" in df.columns:
df["Location"] = df["Location"].ffill()
# REMOVE empty rows
df = df.dropna(how="all")
# Identify missing location rows before insert
missing_loc = df[df["Location"].isna() | (df["Location"].astype(str).str.strip() == "")]
if not missing_loc.empty:
return False, f"Error: Some rows have empty Location. Rows: {missing_loc.index.tolist()}"
saved_count = 0
try:
for index, row in df.iterrows():
record_data = {}
# Insert only fields that exist in model
for col in df.columns:
if hasattr(ManholeDomesticChamberClient, col):
value = row[col]
# Normalize empty values
if pd.isna(value) or str(value).strip() in ["", "-", "", "nan", "NaN"]:
value = None
record_data[col] = value
record = ManholeDomesticChamberClient(
subcontractor_id=subcontractor_id,
**record_data
)
db.session.add(record)
saved_count += 1
db.session.commit()
return True, f"Mh and Dc data saved successfully. Total rows: {saved_count}"
except Exception as e:
db.session.rollback()
return False, f"Mh and Dc data Save Failed: {e}"