import pandas as pd
import datetime
df_norte = pd.read_csv("./ventas_norte_v2.csv", sep=";", dtype={"ID_Pedido": str, "Direccion_Envio": str, "Producto": str, "Unidades": int, "Precio_Unitario": int})
df_norte["Total_Factura"] = df_norte["Total_Factura"].apply(lambda x: x.split("$")[1]).astype(float)
df_norte = df_norte.rename(columns={"ID_Pedido": "id_transaccion", "Fecha_Transaccion": "fecha", "Producto": "producto", "Unidades": "cantidad", "Total_Factura": "total_venta"})
df_norte = df_norte.drop(columns=["Precio_Unitario", "Cliente_Nombre", "Direccion_Envio", "Precio_Unitario"])
df_norte["region"] = "Norte"
df_norte["ciudad"] = "Madrid"
df_norte = df_norte.set_index("id_transaccion")
df_norte
| fecha | producto | cantidad | total_venta | region | ciudad | |
|---|---|---|---|---|---|---|
| id_transaccion | ||||||
| N-1000 | 2023-05-10 03:00:00 | Laptop Gamer | 1 | 970.0 | Norte | Madrid |
| N-1001 | 2023-06-18 09:00:00 | Mouse Ergonómico | 4 | 3708.0 | Norte | Madrid |
| N-1002 | 2023-03-10 05:00:00 | Monitor 4K | 2 | 2820.0 | Norte | Madrid |
| N-1003 | 2023-05-10 03:00:00 | Webcam HD | 2 | 2538.0 | Norte | Madrid |
| N-1004 | 2023-02-25 17:00:00 | Webcam HD | 2 | 908.0 | Norte | Madrid |
| ... | ... | ... | ... | ... | ... | ... |
| N-1145 | 2023-04-17 10:00:00 | Teclado Mecánico | 4 | 4204.0 | Norte | Madrid |
| N-1146 | 2023-04-08 00:00:00 | Monitor 4K | 2 | 2458.0 | Norte | Madrid |
| N-1147 | 2023-02-01 22:00:00 | Monitor 4K | 1 | 1040.0 | Norte | Madrid |
| N-1148 | 2023-06-13 03:00:00 | Laptop Gamer | 1 | 297.0 | Norte | Madrid |
| N-1149 | 2023-02-24 01:00:00 | Monitor 4K | 1 | 909.0 | Norte | Madrid |
150 rows × 6 columns
df_sur_both_pages = pd.read_excel("ventas_sur_v2.xlsx", sheet_name=None)
df_sur = pd.concat([df_sur_both_pages["Q1_2023"], df_sur_both_pages["Q2_2023"]])
df_sur["Cantidad"] = df_sur["Cantidad"].astype(int)
df_sur["Precio_Base"] = df_sur["Precio_Base"].astype(float)
df_sur["Descuento_Aplicado"] = df_sur["Descuento_Aplicado"].astype(float)
df_sur["Es_Cliente_Corporativo"] = df_sur["Es_Cliente_Corporativo"].astype(bool)
df_sur["Total"] = df_sur["Precio_Base"] * df_sur["Cantidad"] * (1 - df_sur["Descuento_Aplicado"])
df_sur
| Ref_Venta | Fecha_Alta | Articulo | Cantidad | Precio_Base | Descuento_Aplicado | Es_Cliente_Corporativo | Estado_Envio | Total | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | S-5000 | 2023-02-11 00:00:00 | Webcam HD | 9 | 425.12 | 0.20 | False | Enviado | 3060.864 |
| 1 | S-5001 | 2023-02-03 12:00:00 | Teclado Mecánico | 8 | 599.60 | 0.00 | True | Devuelto | 4796.800 |
| 2 | S-5002 | 2023-04-06 07:00:00 | Mouse Ergonómico | 6 | 971.36 | 0.00 | False | Completado | 5828.160 |
| 3 | S-5003 | 2023-04-02 05:00:00 | Monitor 4K | 1 | 799.66 | 0.10 | True | Pendiente | 719.694 |
| 4 | S-5004 | 2023-06-17 12:00:00 | Mouse Ergonómico | 2 | 619.99 | 0.05 | True | Devuelto | 1177.981 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 75 | S-5075 | 2023-05-17 22:00:00 | Monitor 4K | 2 | 1150.01 | 0.10 | False | Enviado | 2070.018 |
| 76 | S-5076 | 2023-01-29 13:00:00 | Webcam HD | 1 | 874.12 | 0.10 | True | Devuelto | 786.708 |
| 77 | S-5077 | 2023-06-09 11:00:00 | Mouse Ergonómico | 1 | 145.92 | 0.00 | True | Devuelto | 145.920 |
| 78 | S-5078 | 2023-02-07 13:00:00 | Monitor 4K | 2 | 1152.24 | 0.00 | False | Enviado | 2304.480 |
| 79 | S-5079 | 2023-05-01 11:00:00 | Docking Station | 7 | 261.88 | 0.10 | True | Enviado | 1649.844 |
160 rows × 9 columns
# Normalizacion para concatenacion posterior
df_sur = df_sur.drop(columns=["Precio_Base", "Descuento_Aplicado", "Es_Cliente_Corporativo", "Estado_Envio"])
df_sur = df_sur.rename(columns={"Ref_Venta": "id_transaccion", "Fecha_Alta": "fecha", "Articulo": "producto", "Cantidad": "cantidad", "Total": "total_venta"})
df_sur["region"] = "Sur"
df_sur["ciudad"] = "Madrid"
df_sur = df_sur.set_index("id_transaccion")
df_sur
| fecha | producto | cantidad | total_venta | region | ciudad | |
|---|---|---|---|---|---|---|
| id_transaccion | ||||||
| S-5000 | 2023-02-11 00:00:00 | Webcam HD | 9 | 3060.864 | Sur | Madrid |
| S-5001 | 2023-02-03 12:00:00 | Teclado Mecánico | 8 | 4796.800 | Sur | Madrid |
| S-5002 | 2023-04-06 07:00:00 | Mouse Ergonómico | 6 | 5828.160 | Sur | Madrid |
| S-5003 | 2023-04-02 05:00:00 | Monitor 4K | 1 | 719.694 | Sur | Madrid |
| S-5004 | 2023-06-17 12:00:00 | Mouse Ergonómico | 2 | 1177.981 | Sur | Madrid |
| ... | ... | ... | ... | ... | ... | ... |
| S-5075 | 2023-05-17 22:00:00 | Monitor 4K | 2 | 2070.018 | Sur | Madrid |
| S-5076 | 2023-01-29 13:00:00 | Webcam HD | 1 | 786.708 | Sur | Madrid |
| S-5077 | 2023-06-09 11:00:00 | Mouse Ergonómico | 1 | 145.920 | Sur | Madrid |
| S-5078 | 2023-02-07 13:00:00 | Monitor 4K | 2 | 2304.480 | Sur | Madrid |
| S-5079 | 2023-05-01 11:00:00 | Docking Station | 7 | 1649.844 | Sur | Madrid |
160 rows × 6 columns
import json
with open("ventas_este_v2.json") as f:
data = json.load(f)
df_este = pd.json_normalize(data)
df_este = df_este[["data.id_registro", "data.payload.fecha_evento", "data.payload.comprador.ubicacion.ciudad", "data.payload.transaccion.detalles_producto.nombre_comercial", "data.payload.transaccion.cantidad_comprada", "data.payload.transaccion.detalles_producto.precio_lista", "data.payload.transaccion.detalles_producto.impuestos.monto_iva"]]
df_este = df_este.rename(columns={"data.id_registro": "id_transaccion", "data.payload.fecha_evento": "fecha", "data.payload.comprador.ubicacion.ciudad": "ciudad", "data.payload.transaccion.detalles_producto.nombre_comercial": "producto", "data.payload.transaccion.cantidad_comprada": "cantidad", "data.payload.transaccion.detalles_producto.precio_lista": "precio", "data.payload.transaccion.detalles_producto.impuestos.monto_iva": "iva"})
df_este["total_venta"] = (df_este["precio"] + df_este["iva"]) * df_este["cantidad"]
df_este = df_este.drop(columns=["precio", "iva"])
df_este["region"] = "Este"
df_este = df_este.set_index("id_transaccion")
df_este
| fecha | ciudad | producto | cantidad | total_venta | region | |
|---|---|---|---|---|---|---|
| id_transaccion | ||||||
| E-8000 | 2023-06-29 00:00:00 | Sevilla | Monitor 4K | 1 | 608.63 | Este |
| E-8001 | 2023-04-12 00:00:00 | Sevilla | Webcam HD | 1 | 1357.62 | Este |
| E-8002 | 2023-04-20 00:00:00 | Sevilla | Webcam HD | 2 | 2405.48 | Este |
| E-8003 | 2023-04-14 00:00:00 | Madrid | Webcam HD | 2 | 3037.10 | Este |
| E-8004 | 2023-01-03 00:00:00 | Sevilla | Monitor 4K | 2 | 3528.36 | Este |
| ... | ... | ... | ... | ... | ... | ... |
| E-8115 | 2023-04-05 00:00:00 | Valencia | Monitor 4K | 1 | 1003.09 | Este |
| E-8116 | 2023-04-23 00:00:00 | Valencia | Laptop Gamer | 2 | 2076.36 | Este |
| E-8117 | 2023-06-03 00:00:00 | Sevilla | Webcam HD | 2 | 1541.54 | Este |
| E-8118 | 2023-02-24 00:00:00 | Valencia | Monitor 4K | 2 | 1214.84 | Este |
| E-8119 | 2023-01-08 00:00:00 | Madrid | Mouse Ergonómico | 1 | 370.26 | Este |
120 rows × 6 columns
df = pd.concat([df_norte, df_sur, df_este])
df.to_csv(sep=",", encoding="utf-8", path_or_buf="ventas_consolidadas.csv")
df
| fecha | producto | cantidad | total_venta | region | ciudad | |
|---|---|---|---|---|---|---|
| id_transaccion | ||||||
| N-1000 | 2023-05-10 03:00:00 | Laptop Gamer | 1 | 970.00 | Norte | Madrid |
| N-1001 | 2023-06-18 09:00:00 | Mouse Ergonómico | 4 | 3708.00 | Norte | Madrid |
| N-1002 | 2023-03-10 05:00:00 | Monitor 4K | 2 | 2820.00 | Norte | Madrid |
| N-1003 | 2023-05-10 03:00:00 | Webcam HD | 2 | 2538.00 | Norte | Madrid |
| N-1004 | 2023-02-25 17:00:00 | Webcam HD | 2 | 908.00 | Norte | Madrid |
| ... | ... | ... | ... | ... | ... | ... |
| E-8115 | 2023-04-05 00:00:00 | Monitor 4K | 1 | 1003.09 | Este | Valencia |
| E-8116 | 2023-04-23 00:00:00 | Laptop Gamer | 2 | 2076.36 | Este | Valencia |
| E-8117 | 2023-06-03 00:00:00 | Webcam HD | 2 | 1541.54 | Este | Sevilla |
| E-8118 | 2023-02-24 00:00:00 | Monitor 4K | 2 | 1214.84 | Este | Valencia |
| E-8119 | 2023-01-08 00:00:00 | Mouse Ergonómico | 1 | 370.26 | Este | Madrid |
430 rows × 6 columns