-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
36 changed files
with
151,766 additions
and
0 deletions.
There are no files selected for viewing
File renamed without changes.
File renamed without changes.
282 changes: 282 additions & 0 deletions
282
12. Transformaciones basicas/Transformaciones Básicas.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,282 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "d7f80722-5c0c-470d-9def-c8e8584efdae", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"DataFrame Original (Leído desde el CSV):\n", | ||
" ID Nombre Edad Genero Salario Departamento\n", | ||
"0 1 Ana 25 F 50000.0 Ventas\n", | ||
"1 2 Luis 30 M NaN Marketing\n", | ||
"2 3 Carlos Treinta y cinco M 60000.0 Ventas\n", | ||
"3 4 Sofia 40 F 55000.0 RRHH\n", | ||
"4 5 NaN 45 M 70000.0 TI\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"\n", | ||
"# --------------------------------------------\n", | ||
"# 1. Leer el archivo CSV generado\n", | ||
"# --------------------------------------------\n", | ||
"# Leer el archivo CSV\n", | ||
"df = pd.read_csv('C:/Users/layla/Escritorio/Fundamentos de ETL con Python/Codigos y Notebooks/12. Transformaciones basicas/datos_ejemplo.csv')\n", | ||
"\n", | ||
"# Mostrar el DataFrame original\n", | ||
"print(\"DataFrame Original (Leído desde el CSV):\")\n", | ||
"print(df)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "924fc2d4-7644-4c8d-b6cd-b6efca2e9830", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"\n", | ||
"Selección de una columna: 'Nombre'\n", | ||
"0 Ana\n", | ||
"1 Luis\n", | ||
"2 Carlos\n", | ||
"3 Sofia\n", | ||
"4 NaN\n", | ||
"Name: Nombre, dtype: object\n", | ||
"\n", | ||
"Selección de múltiples columnas: 'Nombre' y 'Salario'\n", | ||
" Nombre Salario\n", | ||
"0 Ana 50000.0\n", | ||
"1 Luis NaN\n", | ||
"2 Carlos 60000.0\n", | ||
"3 Sofia 55000.0\n", | ||
"4 NaN 70000.0\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# --------------------------------------------\n", | ||
"# 2. Selección de columnas\n", | ||
"# --------------------------------------------\n", | ||
"print(\"\\nSelección de una columna: 'Nombre'\")\n", | ||
"\n", | ||
"# Selección de una columna (una serie)\n", | ||
"print(df['Nombre'])\n", | ||
"\n", | ||
"print(\"\\nSelección de múltiples columnas: 'Nombre' y 'Salario'\")\n", | ||
"# Selección de varias columnas (un DataFrame)\n", | ||
"print(df[['Nombre', 'Salario']])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "301de0a2-485d-4630-8daa-efb71d93c9dd", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"\n", | ||
"Filtrado de filas donde 'Edad' es mayor a 30 años:\n", | ||
" ID Nombre Edad Genero Salario Departamento\n", | ||
"3 4 Sofia 40.0 F 55000.0 RRHH\n", | ||
"4 5 NaN 45.0 M 70000.0 TI\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# --------------------------------------------\n", | ||
"# 3. Filtrado de filas por condición\n", | ||
"# --------------------------------------------\n", | ||
"\n", | ||
"# Primero, convertimos la columna 'Edad' a valores numéricos, para que podamos filtrar correctamente\n", | ||
"df['Edad'] = pd.to_numeric(df['Edad'], errors='coerce') # Convertir 'Edad' a valores numéricos\n", | ||
"\n", | ||
"print(\"\\nFiltrado de filas donde 'Edad' es mayor a 30 años:\")\n", | ||
"# Filtrar las filas donde la edad sea mayor a 30\n", | ||
"filtro_edad = df[df['Edad'] > 30]\n", | ||
"print(filtro_edad)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "e5c47454-8ec8-4da0-a990-a097af071d0d", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"\n", | ||
"Filtrado de filas donde 'Salario' sea mayor a 55000 y 'Genero' sea 'M':\n", | ||
" ID Nombre Edad Genero Salario Departamento\n", | ||
"2 3 Carlos NaN M 60000.0 Ventas\n", | ||
"4 5 NaN 45.0 M 70000.0 TI\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# --------------------------------------------\n", | ||
"# 4. Filtrado con múltiples condiciones\n", | ||
"# --------------------------------------------\n", | ||
"print(\"\\nFiltrado de filas donde 'Salario' sea mayor a 55000 y 'Genero' sea 'M':\")\n", | ||
"\n", | ||
"# Filtrar filas usando múltiples condiciones\n", | ||
"filtro_multiple = df[(df['Salario'] > 55000) & (df['Genero'] == 'M')]\n", | ||
"print(filtro_multiple)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"id": "738ca6bd-e964-4ff0-8d08-747f3d667837", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"\n", | ||
"Uso de .loc[] para seleccionar filas y columnas por etiquetas:\n", | ||
" Nombre Salario\n", | ||
"3 Sofia 55000.0\n", | ||
"4 NaN 70000.0\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# --------------------------------------------\n", | ||
"# 5. Uso de .loc[] para selección por etiquetas\n", | ||
"# --------------------------------------------\n", | ||
"print(\"\\nUso de .loc[] para seleccionar filas y columnas por etiquetas:\")\n", | ||
"\n", | ||
"# Usando .loc[] para seleccionar por etiquetas de fila y columna\n", | ||
"loc_result = df.loc[df['Edad'] > 30, ['Nombre', 'Salario']]\n", | ||
"print(loc_result)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"id": "2c7c57c7-40f4-4530-b79d-4489f62a003e", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"\n", | ||
"Uso de .iloc[] para seleccionar filas y columnas por posiciones:\n", | ||
" ID Nombre Salario\n", | ||
"1 2 Luis NaN\n", | ||
"2 3 Carlos 60000.0\n", | ||
"3 4 Sofia 55000.0\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# --------------------------------------------\n", | ||
"# 6. Uso de .iloc[] para selección por posiciones\n", | ||
"# --------------------------------------------\n", | ||
"print(\"\\nUso de .iloc[] para seleccionar filas y columnas por posiciones:\")\n", | ||
"\n", | ||
"# Usando .iloc[] para seleccionar filas y columnas por posición\n", | ||
"iloc_result = df.iloc[1:4, [0, 1, 4]] # Filas 1 a 3 (index 1, 2, 3) y columnas 0, 1 y 4\n", | ||
"print(iloc_result)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"id": "cc268ad1-7928-4b70-a9d6-a6e4165b8c27", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"\n", | ||
"Uso de .query() para filtrar filas donde 'Salario' > 55000 y 'Genero' == 'M':\n", | ||
" ID Nombre Edad Genero Salario Departamento\n", | ||
"2 3 Carlos NaN M 60000.0 Ventas\n", | ||
"4 5 NaN 45.0 M 70000.0 TI\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# --------------------------------------------\n", | ||
"# 7. Uso de .query() para selección (Alternativa a condicionales)\n", | ||
"# --------------------------------------------\n", | ||
"print(\"\\nUso de .query() para filtrar filas donde 'Salario' > 55000 y 'Genero' == 'M':\")\n", | ||
"\n", | ||
"# Usando query() para filtrar con condiciones en formato de cadena\n", | ||
"query_result = df.query(\"Salario > 55000 and Genero == 'M'\")\n", | ||
"print(query_result)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "d69e5272-19a6-494c-ad1b-0719e2f0c05d", | ||
"metadata": {}, | ||
"source": [ | ||
"# Ejercicio - Challenge" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "def84705-c939-4a11-8d30-500b4ed7e448", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
" \n", | ||
"# Crear el DataFrame de ejemplo\n", | ||
"data = {\n", | ||
" 'id_venta': [1, 2, 3, 4, 5, 6],\n", | ||
" 'producto': ['Camisa', 'Pantalón', 'Zapatos', 'Camisa', 'Pantalón', 'Zapatos'],\n", | ||
" 'cantidad': [2, 3, 1, 2, 0, 5],\n", | ||
" 'precio_unitario': [20.00, 40.00, 50.00, 20.00, 40.00, 50.00],\n", | ||
" 'fecha': ['2024-05-01', '2024-05-03', '2024-05-02', '2024-05-01', '2024-05-02', '2024-05-05']\n", | ||
"}\n", | ||
" \n", | ||
"df = pd.DataFrame(data)\n", | ||
"print(df)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.7" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
ID,Nombre,Edad,Genero,Salario,Departamento | ||
1,Ana,25,F,50000.0,Ventas | ||
2,Luis,30,M,,Marketing | ||
3,Carlos,Treinta y cinco,M,60000.0,Ventas | ||
4,Sofia,40,F,55000.0,RRHH | ||
5,,45,M,70000.0,TI |
Oops, something went wrong.