{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# DataFrames Merge" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "import pandas as pd " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load data " ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
diddname
01Ivan
12Asen
23Maria
34Stoyan
45Aleks
56Svetlin
\n", "
" ], "text/plain": [ " did dname\n", "0 1 Ivan\n", "1 2 Asen\n", "2 3 Maria\n", "3 4 Stoyan\n", "4 5 Aleks\n", "5 6 Svetlin" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "devs = pd.read_csv(\"datasets/developers/developers.csv\", sep=\";\")\n", "devs" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
didlanguage
02\"C++\"
13\"Python\"
23\"R\"
36\"Java\"
\n", "
" ], "text/plain": [ " did language\n", "0 2 \"C++\"\n", "1 3 \"Python\"\n", "2 3 \"R\"\n", "3 6 \"Java\"" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "langs = pd.read_csv(\"datasets/developers/languages.csv\", sep=\";\")\n", "langs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Inner Join" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
diddnamelanguage
02Asen\"C++\"
13Maria\"Python\"
23Maria\"R\"
36Svetlin\"Java\"
\n", "
" ], "text/plain": [ " did dname language\n", "0 2 Asen \"C++\"\n", "1 3 Maria \"Python\"\n", "2 3 Maria \"R\"\n", "3 6 Svetlin \"Java\"" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dev_langs_inner = pd.merge(devs,langs,on=\"did\",how='inner')\n", "dev_langs_inner" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Outer join" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
diddnamelanguage
01IvanNaN
12Asen\"C++\"
23Maria\"Python\"
33Maria\"R\"
44StoyanNaN
55AleksNaN
66Svetlin\"Java\"
\n", "
" ], "text/plain": [ " did dname language\n", "0 1 Ivan NaN\n", "1 2 Asen \"C++\"\n", "2 3 Maria \"Python\"\n", "3 3 Maria \"R\"\n", "4 4 Stoyan NaN\n", "5 5 Aleks NaN\n", "6 6 Svetlin \"Java\"" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dev_langs_outer = pd.merge(devs,langs,on=\"did\",how='outer')\n", "dev_langs_outer" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Left outer join" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
diddnamelanguage
01IvanNaN
12Asen\"C++\"
23Maria\"Python\"
33Maria\"R\"
44StoyanNaN
55AleksNaN
66Svetlin\"Java\"
\n", "
" ], "text/plain": [ " did dname language\n", "0 1 Ivan NaN\n", "1 2 Asen \"C++\"\n", "2 3 Maria \"Python\"\n", "3 3 Maria \"R\"\n", "4 4 Stoyan NaN\n", "5 5 Aleks NaN\n", "6 6 Svetlin \"Java\"" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dev_langs_left_outer = pd.merge(devs,langs,on=\"did\",how='left')\n", "dev_langs_left_outer" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Right outer join" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
diddnamelanguage
02Asen\"C++\"
13Maria\"Python\"
23Maria\"R\"
36Svetlin\"Java\"
\n", "
" ], "text/plain": [ " did dname language\n", "0 2 Asen \"C++\"\n", "1 3 Maria \"Python\"\n", "2 3 Maria \"R\"\n", "3 6 Svetlin \"Java\"" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dev_langs_right_outer = pd.merge(devs,langs,on=\"did\",how='right')\n", "dev_langs_right_outer" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }