#!/usr/bin/env python
"""Elimina firmas, encabezados y otros elementos para hacer menos pesado el html de Burbuja.info"""
import sys
from bs4 import BeautifulSoup
from datetime import datetime
if len(sys.argv) != 2:
print("Usage: python script.py <html_file>")
sys.exit(1)
html_file_path = sys.argv[1]
html_file_output=sys.argv[1] +'comp.html'
# Read the HTML content from the file
with open(html_file_path, 'r', encoding='utf-8') as file:
html_content = file.read()
# Read the HTML content from the file
#with open('file.html', 'r', encoding='utf-8') as file:
# html_content = file.read()
# Parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')
# Find and remove elements with class 'message-signature' and 'message-actionBar'
classes_to_remove = ['message-signature', 'message-actionBar','p-sectionLinks','p-nav-inner','p-footer','message message--quickReply block-topRadiusContent block-bottomRadiusContent','message-attribution-opposite message-attribution-opposite--list']
for class_name in classes_to_remove:
for element in soup.find_all(class_=class_name):
element.decompose()
try:
# Find all time elements with the specific format
time_elements = soup.find_all('time', class_='u-dt', text=lambda text: 'ayer' in text.lower())
# Replace each time element with the current date and time
for time_element in time_elements:
# Extract the date and time from the element attributes
date_string = time_element['data-date-string']
time_string = time_element['data-time-string']
# Combine the date and time strings and parse into datetime object
datetime_str = f'{date_string} {time_string}'
parsed_datetime = datetime.strptime(datetime_str, '%d %b %Y %I:%M %p')
# Get the current date and time
current_datetime = datetime.now()
# Replace the element attributes with the current date and time
time_element['data-date-string'] = current_datetime.strftime('%d %b %Y')
time_element['data-time-string'] = current_datetime.strftime('%I:%M %p')
time_element['datetime'] = current_datetime.strftime('%Y-%m-%dT%H:%M:%S%z')
time_element['title'] = current_datetime.strftime('%d %b %Y a la(s) %I:%M %p')
# Update the element text
updated_time_str = parsed_datetime.strftime('%d %b %Y a la(s) %I:%M %p')
time_element.string = updated_time_str
except:
print("time error")
# Write the modified HTML back to the file
with open(html_file_output, 'w', encoding='utf-8') as file:
file.write(soup.prettify())