{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# DataFrames Merge"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load data "
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" did | \n",
" dname | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Ivan | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" Asen | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Maria | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" Stoyan | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" Aleks | \n",
"
\n",
" \n",
" 5 | \n",
" 6 | \n",
" Svetlin | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" did dname\n",
"0 1 Ivan\n",
"1 2 Asen\n",
"2 3 Maria\n",
"3 4 Stoyan\n",
"4 5 Aleks\n",
"5 6 Svetlin"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"devs = pd.read_csv(\"datasets/developers/developers.csv\", sep=\";\")\n",
"devs"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" did | \n",
" language | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2 | \n",
" \"C++\" | \n",
"
\n",
" \n",
" 1 | \n",
" 3 | \n",
" \"Python\" | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" \"R\" | \n",
"
\n",
" \n",
" 3 | \n",
" 6 | \n",
" \"Java\" | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" did language\n",
"0 2 \"C++\"\n",
"1 3 \"Python\"\n",
"2 3 \"R\"\n",
"3 6 \"Java\""
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"langs = pd.read_csv(\"datasets/developers/languages.csv\", sep=\";\")\n",
"langs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Inner Join"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" did | \n",
" dname | \n",
" language | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2 | \n",
" Asen | \n",
" \"C++\" | \n",
"
\n",
" \n",
" 1 | \n",
" 3 | \n",
" Maria | \n",
" \"Python\" | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Maria | \n",
" \"R\" | \n",
"
\n",
" \n",
" 3 | \n",
" 6 | \n",
" Svetlin | \n",
" \"Java\" | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" did dname language\n",
"0 2 Asen \"C++\"\n",
"1 3 Maria \"Python\"\n",
"2 3 Maria \"R\"\n",
"3 6 Svetlin \"Java\""
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dev_langs_inner = pd.merge(devs,langs,on=\"did\",how='inner')\n",
"dev_langs_inner"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Outer join"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" did | \n",
" dname | \n",
" language | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Ivan | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" Asen | \n",
" \"C++\" | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Maria | \n",
" \"Python\" | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" Maria | \n",
" \"R\" | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" Stoyan | \n",
" NaN | \n",
"
\n",
" \n",
" 5 | \n",
" 5 | \n",
" Aleks | \n",
" NaN | \n",
"
\n",
" \n",
" 6 | \n",
" 6 | \n",
" Svetlin | \n",
" \"Java\" | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" did dname language\n",
"0 1 Ivan NaN\n",
"1 2 Asen \"C++\"\n",
"2 3 Maria \"Python\"\n",
"3 3 Maria \"R\"\n",
"4 4 Stoyan NaN\n",
"5 5 Aleks NaN\n",
"6 6 Svetlin \"Java\""
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dev_langs_outer = pd.merge(devs,langs,on=\"did\",how='outer')\n",
"dev_langs_outer"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Left outer join"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" did | \n",
" dname | \n",
" language | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Ivan | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" Asen | \n",
" \"C++\" | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Maria | \n",
" \"Python\" | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" Maria | \n",
" \"R\" | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" Stoyan | \n",
" NaN | \n",
"
\n",
" \n",
" 5 | \n",
" 5 | \n",
" Aleks | \n",
" NaN | \n",
"
\n",
" \n",
" 6 | \n",
" 6 | \n",
" Svetlin | \n",
" \"Java\" | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" did dname language\n",
"0 1 Ivan NaN\n",
"1 2 Asen \"C++\"\n",
"2 3 Maria \"Python\"\n",
"3 3 Maria \"R\"\n",
"4 4 Stoyan NaN\n",
"5 5 Aleks NaN\n",
"6 6 Svetlin \"Java\""
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dev_langs_left_outer = pd.merge(devs,langs,on=\"did\",how='left')\n",
"dev_langs_left_outer"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Right outer join"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" did | \n",
" dname | \n",
" language | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2 | \n",
" Asen | \n",
" \"C++\" | \n",
"
\n",
" \n",
" 1 | \n",
" 3 | \n",
" Maria | \n",
" \"Python\" | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Maria | \n",
" \"R\" | \n",
"
\n",
" \n",
" 3 | \n",
" 6 | \n",
" Svetlin | \n",
" \"Java\" | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" did dname language\n",
"0 2 Asen \"C++\"\n",
"1 3 Maria \"Python\"\n",
"2 3 Maria \"R\"\n",
"3 6 Svetlin \"Java\""
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dev_langs_right_outer = pd.merge(devs,langs,on=\"did\",how='right')\n",
"dev_langs_right_outer"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}