Skip to content

Commit

Permalink
final
Browse files Browse the repository at this point in the history
  • Loading branch information
AgusBuu committed Jan 8, 2025
1 parent 9034085 commit 5afde73
Show file tree
Hide file tree
Showing 36 changed files with 151,766 additions and 0 deletions.
File renamed without changes.
File renamed without changes.
282 changes: 282 additions & 0 deletions 12. Transformaciones basicas/Transformaciones Básicas.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d7f80722-5c0c-470d-9def-c8e8584efdae",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"DataFrame Original (Leído desde el CSV):\n",
" ID Nombre Edad Genero Salario Departamento\n",
"0 1 Ana 25 F 50000.0 Ventas\n",
"1 2 Luis 30 M NaN Marketing\n",
"2 3 Carlos Treinta y cinco M 60000.0 Ventas\n",
"3 4 Sofia 40 F 55000.0 RRHH\n",
"4 5 NaN 45 M 70000.0 TI\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"# --------------------------------------------\n",
"# 1. Leer el archivo CSV generado\n",
"# --------------------------------------------\n",
"# Leer el archivo CSV\n",
"df = pd.read_csv('C:/Users/layla/Escritorio/Fundamentos de ETL con Python/Codigos y Notebooks/12. Transformaciones basicas/datos_ejemplo.csv')\n",
"\n",
"# Mostrar el DataFrame original\n",
"print(\"DataFrame Original (Leído desde el CSV):\")\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "924fc2d4-7644-4c8d-b6cd-b6efca2e9830",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Selección de una columna: 'Nombre'\n",
"0 Ana\n",
"1 Luis\n",
"2 Carlos\n",
"3 Sofia\n",
"4 NaN\n",
"Name: Nombre, dtype: object\n",
"\n",
"Selección de múltiples columnas: 'Nombre' y 'Salario'\n",
" Nombre Salario\n",
"0 Ana 50000.0\n",
"1 Luis NaN\n",
"2 Carlos 60000.0\n",
"3 Sofia 55000.0\n",
"4 NaN 70000.0\n"
]
}
],
"source": [
"# --------------------------------------------\n",
"# 2. Selección de columnas\n",
"# --------------------------------------------\n",
"print(\"\\nSelección de una columna: 'Nombre'\")\n",
"\n",
"# Selección de una columna (una serie)\n",
"print(df['Nombre'])\n",
"\n",
"print(\"\\nSelección de múltiples columnas: 'Nombre' y 'Salario'\")\n",
"# Selección de varias columnas (un DataFrame)\n",
"print(df[['Nombre', 'Salario']])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "301de0a2-485d-4630-8daa-efb71d93c9dd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Filtrado de filas donde 'Edad' es mayor a 30 años:\n",
" ID Nombre Edad Genero Salario Departamento\n",
"3 4 Sofia 40.0 F 55000.0 RRHH\n",
"4 5 NaN 45.0 M 70000.0 TI\n"
]
}
],
"source": [
"# --------------------------------------------\n",
"# 3. Filtrado de filas por condición\n",
"# --------------------------------------------\n",
"\n",
"# Primero, convertimos la columna 'Edad' a valores numéricos, para que podamos filtrar correctamente\n",
"df['Edad'] = pd.to_numeric(df['Edad'], errors='coerce') # Convertir 'Edad' a valores numéricos\n",
"\n",
"print(\"\\nFiltrado de filas donde 'Edad' es mayor a 30 años:\")\n",
"# Filtrar las filas donde la edad sea mayor a 30\n",
"filtro_edad = df[df['Edad'] > 30]\n",
"print(filtro_edad)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e5c47454-8ec8-4da0-a990-a097af071d0d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Filtrado de filas donde 'Salario' sea mayor a 55000 y 'Genero' sea 'M':\n",
" ID Nombre Edad Genero Salario Departamento\n",
"2 3 Carlos NaN M 60000.0 Ventas\n",
"4 5 NaN 45.0 M 70000.0 TI\n"
]
}
],
"source": [
"# --------------------------------------------\n",
"# 4. Filtrado con múltiples condiciones\n",
"# --------------------------------------------\n",
"print(\"\\nFiltrado de filas donde 'Salario' sea mayor a 55000 y 'Genero' sea 'M':\")\n",
"\n",
"# Filtrar filas usando múltiples condiciones\n",
"filtro_multiple = df[(df['Salario'] > 55000) & (df['Genero'] == 'M')]\n",
"print(filtro_multiple)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "738ca6bd-e964-4ff0-8d08-747f3d667837",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Uso de .loc[] para seleccionar filas y columnas por etiquetas:\n",
" Nombre Salario\n",
"3 Sofia 55000.0\n",
"4 NaN 70000.0\n"
]
}
],
"source": [
"# --------------------------------------------\n",
"# 5. Uso de .loc[] para selección por etiquetas\n",
"# --------------------------------------------\n",
"print(\"\\nUso de .loc[] para seleccionar filas y columnas por etiquetas:\")\n",
"\n",
"# Usando .loc[] para seleccionar por etiquetas de fila y columna\n",
"loc_result = df.loc[df['Edad'] > 30, ['Nombre', 'Salario']]\n",
"print(loc_result)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "2c7c57c7-40f4-4530-b79d-4489f62a003e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Uso de .iloc[] para seleccionar filas y columnas por posiciones:\n",
" ID Nombre Salario\n",
"1 2 Luis NaN\n",
"2 3 Carlos 60000.0\n",
"3 4 Sofia 55000.0\n"
]
}
],
"source": [
"# --------------------------------------------\n",
"# 6. Uso de .iloc[] para selección por posiciones\n",
"# --------------------------------------------\n",
"print(\"\\nUso de .iloc[] para seleccionar filas y columnas por posiciones:\")\n",
"\n",
"# Usando .iloc[] para seleccionar filas y columnas por posición\n",
"iloc_result = df.iloc[1:4, [0, 1, 4]] # Filas 1 a 3 (index 1, 2, 3) y columnas 0, 1 y 4\n",
"print(iloc_result)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "cc268ad1-7928-4b70-a9d6-a6e4165b8c27",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Uso de .query() para filtrar filas donde 'Salario' > 55000 y 'Genero' == 'M':\n",
" ID Nombre Edad Genero Salario Departamento\n",
"2 3 Carlos NaN M 60000.0 Ventas\n",
"4 5 NaN 45.0 M 70000.0 TI\n"
]
}
],
"source": [
"# --------------------------------------------\n",
"# 7. Uso de .query() para selección (Alternativa a condicionales)\n",
"# --------------------------------------------\n",
"print(\"\\nUso de .query() para filtrar filas donde 'Salario' > 55000 y 'Genero' == 'M':\")\n",
"\n",
"# Usando query() para filtrar con condiciones en formato de cadena\n",
"query_result = df.query(\"Salario > 55000 and Genero == 'M'\")\n",
"print(query_result)"
]
},
{
"cell_type": "markdown",
"id": "d69e5272-19a6-494c-ad1b-0719e2f0c05d",
"metadata": {},
"source": [
"# Ejercicio - Challenge"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "def84705-c939-4a11-8d30-500b4ed7e448",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
" \n",
"# Crear el DataFrame de ejemplo\n",
"data = {\n",
" 'id_venta': [1, 2, 3, 4, 5, 6],\n",
" 'producto': ['Camisa', 'Pantalón', 'Zapatos', 'Camisa', 'Pantalón', 'Zapatos'],\n",
" 'cantidad': [2, 3, 1, 2, 0, 5],\n",
" 'precio_unitario': [20.00, 40.00, 50.00, 20.00, 40.00, 50.00],\n",
" 'fecha': ['2024-05-01', '2024-05-03', '2024-05-02', '2024-05-01', '2024-05-02', '2024-05-05']\n",
"}\n",
" \n",
"df = pd.DataFrame(data)\n",
"print(df)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
6 changes: 6 additions & 0 deletions 12. Transformaciones basicas/datos_ejemplo.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
ID,Nombre,Edad,Genero,Salario,Departamento
1,Ana,25,F,50000.0,Ventas
2,Luis,30,M,,Marketing
3,Carlos,Treinta y cinco,M,60000.0,Ventas
4,Sofia,40,F,55000.0,RRHH
5,,45,M,70000.0,TI
Loading

0 comments on commit 5afde73

Please sign in to comment.