[notebooks] Update notebooks
authorVincent Michel <vincent.michel@logilab.fr>
Tue, 01 Jul 2014 14:43:49 +0200
changeset 456 d93286fdd149
parent 454 0907b7cfbd51
child 458 9527d4b3d381
[notebooks] Update notebooks
notebooks/Named Entities Matching with Nazca.ipynb
notebooks/Record linkage with Nazca - Example Dbpedia - INSEE.ipynb
notebooks/Record linkage with Nazca - part 1 - Introduction.ipynb
notebooks/Record linkage with Nazca - part 2 - Normalization and blockings.ipynb
notebooks/Record linkage with Nazca - part 3 - Putting it all together.ipynb
--- a/notebooks/Named Entities Matching with Nazca.ipynb	Mon Jun 30 14:12:39 2014 +0000
+++ b/notebooks/Named Entities Matching with Nazca.ipynb	Tue Jul 01 14:43:49 2014 +0200
@@ -33,13 +33,13 @@
         "<iframe src=http://en.mobile.wikipedia.org/wiki/Named-entity_recognition?useformat=mobile width=700 height=350></iframe>"
        ],
        "output_type": "pyout",
-       "prompt_number": 2,
+       "prompt_number": 1,
        "text": [
-        "<IPython.core.display.HTML at 0x7fd0000698d0>"
+        "<IPython.core.display.HTML at 0x7f706c036950>"
        ]
       }
      ],
-     "prompt_number": 2
+     "prompt_number": 1
     },
     {
      "cell_type": "markdown",
@@ -88,7 +88,7 @@
        ]
       }
      ],
-     "prompt_number": 16
+     "prompt_number": 2
     },
     {
      "cell_type": "markdown",
@@ -123,7 +123,7 @@
        ]
       }
      ],
-     "prompt_number": 17
+     "prompt_number": 3
     },
     {
      "cell_type": "markdown",
@@ -173,7 +173,7 @@
        ]
       }
      ],
-     "prompt_number": 18
+     "prompt_number": 4
     },
     {
      "cell_type": "code",
@@ -222,7 +222,7 @@
        ]
       }
      ],
-     "prompt_number": 19
+     "prompt_number": 5
     },
     {
      "cell_type": "markdown",
@@ -278,7 +278,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 20
+     "prompt_number": 6
     },
     {
      "cell_type": "code",
@@ -297,7 +297,7 @@
        ]
       }
      ],
-     "prompt_number": 21
+     "prompt_number": 7
     },
     {
      "cell_type": "markdown",
@@ -319,7 +319,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 22
+     "prompt_number": 8
     },
     {
      "cell_type": "code",
@@ -338,7 +338,7 @@
        ]
       }
      ],
-     "prompt_number": 23
+     "prompt_number": 9
     },
     {
      "cell_type": "markdown",
@@ -366,7 +366,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 24
+     "prompt_number": 10
     },
     {
      "cell_type": "code",
@@ -385,7 +385,7 @@
        ]
       }
      ],
-     "prompt_number": 25
+     "prompt_number": 11
     },
     {
      "cell_type": "markdown",
@@ -410,7 +410,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 26
+     "prompt_number": 12
     },
     {
      "cell_type": "code",
@@ -429,7 +429,7 @@
        ]
       }
      ],
-     "prompt_number": 27
+     "prompt_number": 13
     },
     {
      "cell_type": "markdown",
@@ -459,7 +459,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 28
+     "prompt_number": 14
     },
     {
      "cell_type": "markdown",
@@ -489,7 +489,7 @@
        ]
       }
      ],
-     "prompt_number": 29
+     "prompt_number": 15
     },
     {
      "cell_type": "code",
@@ -509,7 +509,7 @@
        ]
       }
      ],
-     "prompt_number": 30
+     "prompt_number": 16
     },
     {
      "cell_type": "code",
@@ -529,7 +529,7 @@
        ]
       }
      ],
-     "prompt_number": 31
+     "prompt_number": 17
     },
     {
      "cell_type": "markdown",
@@ -559,7 +559,7 @@
        ]
       }
      ],
-     "prompt_number": 32
+     "prompt_number": 18
     },
     {
      "cell_type": "code",
@@ -579,7 +579,7 @@
        ]
       }
      ],
-     "prompt_number": 33
+     "prompt_number": 19
     },
     {
      "cell_type": "markdown",
@@ -610,7 +610,7 @@
        ]
       }
      ],
-     "prompt_number": 34
+     "prompt_number": 20
     },
     {
      "cell_type": "code",
@@ -630,7 +630,7 @@
        ]
       }
      ],
-     "prompt_number": 35
+     "prompt_number": 21
     },
     {
      "cell_type": "code",
@@ -650,7 +650,7 @@
        ]
       }
      ],
-     "prompt_number": 36
+     "prompt_number": 22
     },
     {
      "cell_type": "markdown",
@@ -680,7 +680,7 @@
        ]
       }
      ],
-     "prompt_number": 37
+     "prompt_number": 23
     },
     {
      "cell_type": "code",
@@ -700,7 +700,7 @@
        ]
       }
      ],
-     "prompt_number": 38
+     "prompt_number": 24
     },
     {
      "cell_type": "code",
@@ -721,7 +721,7 @@
        ]
       }
      ],
-     "prompt_number": 39
+     "prompt_number": 25
     },
     {
      "cell_type": "markdown",
@@ -751,7 +751,7 @@
        ]
       }
      ],
-     "prompt_number": 40
+     "prompt_number": 26
     },
     {
      "cell_type": "code",
@@ -771,7 +771,7 @@
        ]
       }
      ],
-     "prompt_number": 41
+     "prompt_number": 27
     },
     {
      "cell_type": "markdown",
@@ -820,7 +820,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 42
+     "prompt_number": 28
     },
     {
      "cell_type": "code",
@@ -842,7 +842,7 @@
        ]
       }
      ],
-     "prompt_number": 43
+     "prompt_number": 29
     },
     {
      "cell_type": "markdown",
@@ -880,7 +880,7 @@
        ]
       }
      ],
-     "prompt_number": 44
+     "prompt_number": 30
     },
     {
      "cell_type": "markdown",
@@ -911,7 +911,7 @@
        ]
       }
      ],
-     "prompt_number": 45
+     "prompt_number": 31
     },
     {
      "cell_type": "code",
@@ -935,7 +935,7 @@
        ]
       }
      ],
-     "prompt_number": 46
+     "prompt_number": 32
     },
     {
      "cell_type": "markdown",
@@ -968,7 +968,7 @@
        ]
       }
      ],
-     "prompt_number": 47
+     "prompt_number": 33
     },
     {
      "cell_type": "markdown",
@@ -996,7 +996,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 48
+     "prompt_number": 34
     },
     {
      "cell_type": "markdown",
@@ -1036,7 +1036,7 @@
        ]
       }
      ],
-     "prompt_number": 49
+     "prompt_number": 35
     },
     {
      "cell_type": "code",
@@ -1060,7 +1060,7 @@
        ]
       }
      ],
-     "prompt_number": 50
+     "prompt_number": 36
     },
     {
      "cell_type": "markdown",
@@ -1096,7 +1096,7 @@
        ]
       }
      ],
-     "prompt_number": 51
+     "prompt_number": 37
     },
     {
      "cell_type": "code",
@@ -1118,7 +1118,7 @@
        ]
       }
      ],
-     "prompt_number": 52
+     "prompt_number": 38
     },
     {
      "cell_type": "markdown",
@@ -1155,7 +1155,7 @@
        ]
       }
      ],
-     "prompt_number": 53
+     "prompt_number": 39
     },
     {
      "cell_type": "code",
@@ -1177,7 +1177,7 @@
        ]
       }
      ],
-     "prompt_number": 54
+     "prompt_number": 40
     },
     {
      "cell_type": "markdown",
@@ -1219,7 +1219,7 @@
        ]
       }
      ],
-     "prompt_number": 56
+     "prompt_number": 41
     },
     {
      "cell_type": "code",
@@ -1241,7 +1241,7 @@
        ]
       }
      ],
-     "prompt_number": 57
+     "prompt_number": 42
     },
     {
      "cell_type": "markdown",
@@ -1270,7 +1270,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 1
+     "prompt_number": 43
     },
     {
      "cell_type": "markdown",
@@ -1297,7 +1297,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 2
+     "prompt_number": 44
     },
     {
      "cell_type": "markdown",
@@ -1321,7 +1321,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 3
+     "prompt_number": 45
     },
     {
      "cell_type": "markdown",
@@ -1351,16 +1351,16 @@
      "outputs": [
       {
        "html": [
-        "<a href=\"http://dbpedia.org/resource/Matteo_Renzi\">Matteo Renzi</a> had the best showing of any <a href=\"http://dbpedia.org/resource/European\">European</a> leader in parliamentary voting, reaching a level no party has seen in an <a href=\"http://dbpedia.org/resource/Italian_election\">Italian election</a> since <a href=\"http://dbpedia.org/resource/1958\">1958</a>.<br/><br/><a href=\"http://dbpedia.org/resource/Satellite\">Satellite</a> data from <a href=\"http://dbpedia.org/resource/Malaysia_Airlines\">Malaysia Airlines</a> <a href=\"http://dbpedia.org/resource/Flight\">Flight</a> <a href=\"http://dbpedia.org/resource/370\">370</a> was released after pressure from relatives of the mostly <a href=\"http://dbpedia.org/resource/Chinese\">Chinese</a> passengers and from the <a href=\"http://dbpedia.org/resource/Chinese_government\">Chinese government</a>.<br/><br/><a href=\"http://dbpedia.org/resource/China\">China</a> and <a href=\"http://dbpedia.org/resource/Vietnam\">Vietnam</a> traded accusations over the sinking of a <a href=\"http://dbpedia.org/resource/Vietnamese\">Vietnamese</a> fishing vessel near a <a href=\"http://dbpedia.org/resource/Chinese\">Chinese</a> oil rig in disputed waters off <a href=\"http://dbpedia.org/resource/Vietnam\">Vietnam</a>\u2019s coast.<br/><br/>As tensions grow between <a href=\"http://dbpedia.org/resource/China\">China</a> and <a href=\"http://dbpedia.org/resource/Vietnam\">Vietnam</a> over the ramming and sinking of a <a href=\"http://dbpedia.org/resource/Vietnamese\">Vietnamese</a> fishing boat by a <a href=\"http://dbpedia.org/resource/Chinese\">Chinese</a> vessel, reaction in <a href=\"http://dbpedia.org/resource/China\">China</a> is overwhelmingly supportive of the incident.<br/><br/>The popular microblog of a professor at <a href=\"http://dbpedia.org/resource/Peking_University\">Peking University</a> has been blocked, perhaps because he posted a comment about the military suppression of the <a href=\"http://dbpedia.org/resource/1989\">1989</a> <a href=\"http://dbpedia.org/resource/Tiananmen\">Tiananmen</a> protest movement.<br/><br/><a href=\"http://dbpedia.org/resource/Dalia_Grybauskaite\">Dalia Grybauskaite</a>, <a href=\"http://dbpedia.org/resource/58\">58</a>, was re-elected president of <a href=\"http://dbpedia.org/resource/Lithuania\">Lithuania</a> on <a href=\"http://dbpedia.org/resource/Sunday\">Sunday</a> after beating the <a href=\"http://dbpedia.org/resource/Social_Democrat\">Social Democrat</a> candidate in a runoff.<br/><br/>Ten fishermen were rescued after their boat was struck by a <a href=\"http://dbpedia.org/resource/Chinese\">Chinese</a> vessel in the disputed waters in the <a href=\"http://dbpedia.org/resource/South_China_Sea\">South China Sea</a>, state news media reported.<br/><br/>The general contractor that helped oversee the construction of the <a href=\"http://dbpedia.org/resource/Abu_Dhabi\">Abu Dhabi</a> campus is run by a trustee of <a href=\"http://dbpedia.org/resource/N_Y\">N.Y</a>.<a href=\"http://dbpedia.org/resource/U\">U</a>.\u2019s board.<br/><br/>The <a href=\"http://dbpedia.org/resource/Malawi\">Malawi</a> <a href=\"http://dbpedia.org/resource/Electoral_Commission\">Electoral Commission</a> must manually count the <a href=\"http://dbpedia.org/resource/20\">20</a> presidential election votes, a senior official said <a href=\"http://dbpedia.org/resource/Monday\">Monday</a>. It is likely to take two months before an official result is announced.<br/><br/>An express train plowed into a parked freight train in northern <a href=\"http://dbpedia.org/resource/India\">India</a> killing at least <a href=\"http://dbpedia.org/resource/40\">40</a> people and reducing cars to twisted metal, officials said.<br/><br/><a href=\"http://dbpedia.org/resource/United_Nations\">United Nations</a> officials demanded a halt to the expulsion of thousands of citizens of the <a href=\"http://dbpedia.org/resource/Democratic_Republic\">Democratic Republic</a> of <a href=\"http://dbpedia.org/resource/Congo\">Congo</a> from the <a href=\"http://dbpedia.org/resource/Congo_Republic\">Congo Republic</a>.<br/><br/><a href=\"http://dbpedia.org/resource/Flooding\">Flooding</a> in southern <a href=\"http://dbpedia.org/resource/China\">China</a> forced almost half a million people from their homes, the government said <a href=\"http://dbpedia.org/resource/Monday\">Monday</a>.<br/><br/><a href=\"http://dbpedia.org/resource/Residents\">Residents</a> of <a href=\"http://dbpedia.org/resource/Castrillo_Matajud%C3%ADos\">Castrillo Matajud\u00edos</a>, which loosely translates as Little <a href=\"http://dbpedia.org/resource/Fort\">Fort</a> of <a href=\"http://dbpedia.org/resource/Jew\">Jew</a> <a href=\"http://dbpedia.org/resource/Killers\">Killers</a>, have voted to change the name of their village.<br/><br/><a href=\"http://dbpedia.org/resource/Pfizer\">Pfizer</a> said it \u201cdoes not intend to make an offer for <a href=\"http://dbpedia.org/resource/AstraZeneca\">AstraZeneca</a>\u201d in the wake of <a href=\"http://dbpedia.org/resource/AstraZeneca\">AstraZeneca</a>\u2019s rejection of a $<a href=\"http://dbpedia.org/resource/119\">119</a> billion bid.<br/><br/><a href=\"http://dbpedia.org/resource/United_States\">United States</a> <a href=\"http://dbpedia.org/resource/Special_Operations\">Special Operations</a> troops are forming elite units in <a href=\"http://dbpedia.org/resource/Libya\">Libya</a>, <a href=\"http://dbpedia.org/resource/Niger\">Niger</a>, <a href=\"http://dbpedia.org/resource/Mauritania\">Mauritania</a> and <a href=\"http://dbpedia.org/resource/Mali\">Mali</a> in the war against <a href=\"http://dbpedia.org/resource/Al_Qaeda\">Al Qaeda</a>\u2019s affiliates in <a href=\"http://dbpedia.org/resource/Africa\">Africa</a>.<br/><br/><a href=\"http://dbpedia.org/resource/The_military\">The military</a> commander said he would not disclose further details out of concern for the girls\u2019 safety.<br/><br/>In <a href=\"http://dbpedia.org/resource/France\">France</a>, <a href=\"http://dbpedia.org/resource/Britain\">Britain</a> and elsewhere, voters turned against traditional parties to support anti-immigrant groups opposed to the <a href=\"http://dbpedia.org/resource/European_Union\">European Union</a>.<br/><br/>When the <a href=\"http://dbpedia.org/resource/World_Cup\">World Cup</a> begins next month, many of the $<a href=\"http://dbpedia.org/resource/1\">1</a>.<a href=\"http://dbpedia.org/resource/4\">4</a> billion-worth of projects in <a href=\"http://dbpedia.org/resource/Cuiab%C3%A1\">Cuiab\u00e1</a>, one of the <a href=\"http://dbpedia.org/resource/12\">12</a> <a href=\"http://dbpedia.org/resource/Brazilian\">Brazilian</a> host cities, will be far from completion.<br/><br/><a href=\"http://dbpedia.org/resource/Narendra_Modi\">Narendra Modi</a> was sworn in on <a href=\"http://dbpedia.org/resource/Monday\">Monday</a> as <a href=\"http://dbpedia.org/resource/India\">India</a>\u2019s prime minister. Among those attending the ceremony was <a href=\"http://dbpedia.org/resource/Prime_Minister\">Prime Minister</a> <a href=\"http://dbpedia.org/resource/Nawaz_Sharif\">Nawaz Sharif</a> of <a href=\"http://dbpedia.org/resource/Pakistan\">Pakistan</a>, hinting that the two countries may revive a moribund peace process.<br/><br/><a href=\"http://dbpedia.org/resource/Ukrainian\">Ukrainian</a> soldiers used fighter jets and fought a ground battle against the pro-Russian forces, who had seized the airport in <a href=\"http://dbpedia.org/resource/Donetsk\">Donetsk</a> after elections that seemed to marginalize them."
+        "The discovery of the teenagers\u2019 bodies in the <a href=\"http://dbpedia.org/resource/West_Bank\">West Bank</a> prompted vows of retaliation by <a href=\"http://dbpedia.org/resource/Israel\">Israel</a>, which blamed the <a href=\"http://dbpedia.org/resource/Palestinian\">Palestinian</a> group <a href=\"http://dbpedia.org/resource/Hamas\">Hamas</a> for the killings.<br/><br/><a href=\"http://dbpedia.org/resource/The_South\">The South</a> <a href=\"http://dbpedia.org/resource/African\">African</a> track star\u2019s agent and friend testified that the couple\u2019s relationship was strong and that he did not intend to kill her.<br/><br/>The <a href=\"http://dbpedia.org/resource/Japanese_prime_minister\">Japanese prime minister</a> announced that his government would reinterpret the antiwar <a href=\"http://dbpedia.org/resource/Constitution\">Constitution</a> to allow the armed forces to come to the aid of friendly nations.<br/><br/>The first clues that led to the grisly discovery of the bodies came only hours after their abduction in the <a href=\"http://dbpedia.org/resource/West_Bank\">West Bank</a> was reported.<br/><br/>The lawmakers were under pressure to name an inclusive government as insurgents mount a violent challenge north and west of <a href=\"http://dbpedia.org/resource/Baghdad\">Baghdad</a>.<br/><br/>The only viable political future for the country is federation. But <a href=\"http://dbpedia.org/resource/America\">America</a>\u2019s first priority is to see <a href=\"http://dbpedia.org/resource/ISIS\">ISIS</a> crushed.<br/><br/><a href=\"http://dbpedia.org/resource/President\">President</a> <a href=\"http://dbpedia.org/resource/Petro\">Petro</a> <a href=\"http://dbpedia.org/resource/O\">O</a>. <a href=\"http://dbpedia.org/resource/Poroshenko\">Poroshenko</a> said he would resume full-scale efforts to quash the pro-Russian uprising in eastern <a href=\"http://dbpedia.org/resource/Ukraine\">Ukraine</a>.<br/><br/><a href=\"http://dbpedia.org/resource/Nicolas_Sarkozy\">Nicolas Sarkozy</a>, the former <a href=\"http://dbpedia.org/resource/French_president\">French president</a>, has been under scrutiny for possible financial irregularities in his <a href=\"http://dbpedia.org/resource/2007\">2007</a> campaign and for other alleged offenses.<br/><br/>A huge throng of people, mostly young, took to <a href=\"http://dbpedia.org/resource/Hong_Kong\">Hong Kong</a>\u2019s streets <a href=\"http://dbpedia.org/resource/Tuesday\">Tuesday</a>, defying <a href=\"http://dbpedia.org/resource/Beijing\">Beijing</a>\u2019s dwindling tolerance for challenges to its control.<br/><br/><a href=\"http://dbpedia.org/resource/Myanmar\">Myanmar</a> is enjoying some new diplomatic clout, leading <a href=\"http://dbpedia.org/resource/China\">China</a> to court the country as <a href=\"http://dbpedia.org/resource/Beijing\">Beijing</a> presses its territorial claims in the <a href=\"http://dbpedia.org/resource/South_China_Sea\">South China Sea</a>.<br/><br/>The last remaining <a href=\"http://dbpedia.org/resource/African\">African</a> teams in the <a href=\"http://dbpedia.org/resource/World_Cup\">World Cup</a>, <a href=\"http://dbpedia.org/resource/Algeria\">Algeria</a> and <a href=\"http://dbpedia.org/resource/Nigeria\">Nigeria</a>, were eliminated on <a href=\"http://dbpedia.org/resource/Monday\">Monday</a>, ensuring that the continent would once again remember the <a href=\"http://dbpedia.org/resource/2014\">2014</a> event for off-the-field squabbles.<br/><br/>As <a href=\"http://dbpedia.org/resource/Hong_Kong\">Hong Kong</a> prepared for its annual pro-democracy march <a href=\"http://dbpedia.org/resource/Tuesday\">Tuesday</a>, a survey of residents found more discontent than ever with the <a href=\"http://dbpedia.org/resource/Chinese_government\">Chinese government</a>\u2019s policies toward the city, especially among the young.<br/><br/><a href=\"http://dbpedia.org/resource/President\">President</a> <a href=\"http://dbpedia.org/resource/Petro\">Petro</a> <a href=\"http://dbpedia.org/resource/O\">O</a>. <a href=\"http://dbpedia.org/resource/Poroshenko\">Poroshenko</a> ended a 10-day cease-fire, saying that rebels had not put down their weapons and had persisted in attacking government troops.<br/><br/>At least <a href=\"http://dbpedia.org/resource/22\">22</a> people were killed in the firefight \u2014 all of them assailants, the military said. One soldier was injured.<br/><br/>The giant <a href=\"http://dbpedia.org/resource/French\">French</a> bank admitted to transferring billions of dollars on behalf of <a href=\"http://dbpedia.org/resource/Sudan\">Sudan</a> and other countries the <a href=\"http://dbpedia.org/resource/United_States\">United States</a> has blacklisted.<br/><br/>The former chief justice of the <a href=\"http://dbpedia.org/resource/Constitutional_Court\">Constitutional Court</a> was sentenced to life in prison for corruption, the heaviest sentence ever for graft in one of the most corrupt countries in the world.<br/><br/>A former aide to former <a href=\"http://dbpedia.org/resource/Prime_Minister\">Prime Minister</a> <a href=\"http://dbpedia.org/resource/Petr_Necas\">Petr Necas</a> who later married him was found guilty of abuse of power on <a href=\"http://dbpedia.org/resource/Monday\">Monday</a> in a scandal that exposed their affair and toppled the government a year ago.<br/><br/>The court found that in <a href=\"http://dbpedia.org/resource/1973\">1973</a> an <a href=\"http://dbpedia.org/resource/American\">American</a> naval officer provided <a href=\"http://dbpedia.org/resource/Chilean\">Chilean</a> officials with information on two <a href=\"http://dbpedia.org/resource/Americans\">Americans</a>, which led to their executions as part of a coup that ousted <a href=\"http://dbpedia.org/resource/President\">President</a> <a href=\"http://dbpedia.org/resource/Salvador_Allende\">Salvador Allende</a>.<br/><br/><a href=\"http://dbpedia.org/resource/Mayor\">Mayor</a> <a href=\"http://dbpedia.org/resource/Rob_Ford\">Rob Ford</a> of <a href=\"http://dbpedia.org/resource/Toronto\">Toronto</a> returned to his job after undergoing drug and alcohol treatment, saying, \u201cMy top priority will be rebuilding trust.\u201d<br/><br/>The question is whether the new group, which now calls itself simply the <a href=\"http://dbpedia.org/resource/Islamic_State\">Islamic State</a>, will endure."
        ],
        "output_type": "pyout",
-       "prompt_number": 25,
+       "prompt_number": 46,
        "text": [
-        "<IPython.core.display.HTML at 0x445f450>"
+        "<IPython.core.display.HTML at 0x7f706cc11910>"
        ]
       }
      ],
-     "prompt_number": 25
+     "prompt_number": 46
     }
    ],
    "metadata": {}
--- a/notebooks/Record linkage with Nazca - Example Dbpedia - INSEE.ipynb	Mon Jun 30 14:12:39 2014 +0000
+++ b/notebooks/Record linkage with Nazca - Example Dbpedia - INSEE.ipynb	Tue Jul 01 14:43:49 2014 +0200
@@ -34,7 +34,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 19
+     "prompt_number": 1
     },
     {
      "cell_type": "markdown",
@@ -72,7 +72,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 4
+     "prompt_number": 2
     },
     {
      "cell_type": "code",
@@ -93,7 +93,7 @@
        ]
       }
      ],
-     "prompt_number": 5
+     "prompt_number": 3
     },
     {
      "cell_type": "markdown",
@@ -115,7 +115,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 6
+     "prompt_number": 4
     },
     {
      "cell_type": "code",
@@ -136,7 +136,7 @@
        ]
       }
      ],
-     "prompt_number": 7
+     "prompt_number": 5
     },
     {
      "cell_type": "markdown",
@@ -161,7 +161,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 8
+     "prompt_number": 6
     },
     {
      "cell_type": "code",
@@ -182,7 +182,7 @@
        ]
       }
      ],
-     "prompt_number": 9
+     "prompt_number": 7
     },
     {
      "cell_type": "markdown",
@@ -207,7 +207,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 11
+     "prompt_number": 8
     },
     {
      "cell_type": "markdown",
@@ -239,7 +239,7 @@
        ]
       }
      ],
-     "prompt_number": 12
+     "prompt_number": 9
     },
     {
      "cell_type": "code",
@@ -258,7 +258,7 @@
        ]
       }
      ],
-     "prompt_number": 13
+     "prompt_number": 10
     },
     {
      "cell_type": "markdown",
@@ -290,7 +290,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 16
+     "prompt_number": 11
     },
     {
      "cell_type": "markdown",
@@ -318,7 +318,7 @@
        ]
       }
      ],
-     "prompt_number": 17
+     "prompt_number": 12
     },
     {
      "cell_type": "markdown",
@@ -336,7 +336,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 18
+     "prompt_number": 13
     },
     {
      "cell_type": "markdown",
@@ -362,7 +362,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 20
+     "prompt_number": 14
     },
     {
      "cell_type": "markdown",
@@ -382,7 +382,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 21
+     "prompt_number": 15
     },
     {
      "cell_type": "markdown",
@@ -400,7 +400,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 22
+     "prompt_number": 16
     },
     {
      "cell_type": "code",
@@ -426,7 +426,7 @@
        ]
       }
      ],
-     "prompt_number": 24
+     "prompt_number": 17
     },
     {
      "cell_type": "markdown",
@@ -463,7 +463,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 25
+     "prompt_number": 18
     },
     {
      "cell_type": "code",
@@ -474,7 +474,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 26
+     "prompt_number": 19
     },
     {
      "cell_type": "code",
@@ -500,7 +500,7 @@
        ]
       }
      ],
-     "prompt_number": 27
+     "prompt_number": 20
     }
    ],
    "metadata": {}
--- a/notebooks/Record linkage with Nazca - part 1 - Introduction.ipynb	Mon Jun 30 14:12:39 2014 +0000
+++ b/notebooks/Record linkage with Nazca - part 1 - Introduction.ipynb	Tue Jul 01 14:43:49 2014 +0200
@@ -38,7 +38,7 @@
        "output_type": "pyout",
        "prompt_number": 1,
        "text": [
-        "<IPython.core.display.HTML at 0x3bef910>"
+        "<IPython.core.display.HTML at 0x7fd324037950>"
        ]
       }
      ],
@@ -71,7 +71,7 @@
      "outputs": [
       {
        "output_type": "pyout",
-       "prompt_number": 5,
+       "prompt_number": 2,
        "text": [
         "('http://data.bnf.fr/11907966/victor_hugo/',\n",
         " 'Victor Hugo',\n",
@@ -82,7 +82,7 @@
        ]
       }
      ],
-     "prompt_number": 5
+     "prompt_number": 2
     },
     {
      "cell_type": "markdown",
@@ -126,14 +126,6 @@
      ]
     },
     {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
@@ -158,11 +150,11 @@
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "0.0332899093628 (s)\n"
+        "0.033762216568 (s)\n"
        ]
       }
      ],
-     "prompt_number": 14
+     "prompt_number": 10
     },
     {
      "cell_type": "markdown",
@@ -175,7 +167,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "total = t*10000*10000\n",
+      "total = t*10000*10000/1000.\n",
       "print '%s (s) = %s (h) = %s (d)' % (total, total/3600., total/(3600.*24.))"
      ],
      "language": "python",
@@ -185,11 +177,11 @@
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "3328990.93628 (s) = 924.719704522 (h) = 38.5299876884 (d)\n"
+        "3376.2216568 (s) = 0.937839349111 (h) = 0.0390766395463 (d)\n"
        ]
       }
      ],
-     "prompt_number": 18
+     "prompt_number": 11
     },
     {
      "cell_type": "markdown",
@@ -224,7 +216,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 19
+     "prompt_number": 12
     },
     {
      "cell_type": "code",
@@ -250,7 +242,7 @@
        ]
       }
      ],
-     "prompt_number": 23
+     "prompt_number": 13
     },
     {
      "cell_type": "markdown",
@@ -282,7 +274,7 @@
        ]
       }
      ],
-     "prompt_number": 24
+     "prompt_number": 14
     },
     {
      "cell_type": "code",
@@ -302,7 +294,7 @@
        ]
       }
      ],
-     "prompt_number": 25
+     "prompt_number": 15
     },
     {
      "cell_type": "markdown",
@@ -321,7 +313,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 1
+     "prompt_number": 16
     },
     {
      "cell_type": "markdown",
@@ -353,7 +345,7 @@
        ]
       }
      ],
-     "prompt_number": 5
+     "prompt_number": 17
     },
     {
      "cell_type": "markdown",
@@ -390,7 +382,7 @@
        ]
       }
      ],
-     "prompt_number": 30
+     "prompt_number": 18
     },
     {
      "cell_type": "markdown",
@@ -422,7 +414,7 @@
        ]
       }
      ],
-     "prompt_number": 32
+     "prompt_number": 19
     },
     {
      "cell_type": "markdown",
@@ -461,7 +453,7 @@
        ]
       }
      ],
-     "prompt_number": 35
+     "prompt_number": 20
     },
     {
      "cell_type": "markdown",
@@ -484,18 +476,15 @@
      "metadata": {},
      "outputs": [
       {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "abcd abcd 0.0\n",
-        "abcd abce 0.25\n",
-        "abcd abc 0.142857142857\n",
-        "abc abcd 0.142857142857\n",
-        "abcd efgh 1.0\n"
+       "ename": "SyntaxError",
+       "evalue": "invalid syntax (<ipython-input-21-f01d54be2f60>, line 1)",
+       "output_type": "pyerr",
+       "traceback": [
+        "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-21-f01d54be2f60>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m    for sa, sb in (('abcd', 'abcd'), ('abcd', 'abce'), ('abcd', 'abc'), ('abc', 'abcd'), ('abcd', 'efgh'),,\u001b[0m\n\u001b[0m                                                                                                          ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
        ]
       }
      ],
-     "prompt_number": 36
+     "prompt_number": 21
     },
     {
      "cell_type": "markdown",
@@ -529,7 +518,7 @@
        ]
       }
      ],
-     "prompt_number": 39
+     "prompt_number": 22
     },
     {
      "cell_type": "markdown",
@@ -555,7 +544,7 @@
        ]
       }
      ],
-     "prompt_number": 41
+     "prompt_number": 23
     },
     {
      "cell_type": "code",
@@ -574,7 +563,7 @@
        ]
       }
      ],
-     "prompt_number": 42
+     "prompt_number": 24
     },
     {
      "cell_type": "markdown",
@@ -601,7 +590,7 @@
        ]
       }
      ],
-     "prompt_number": 44
+     "prompt_number": 25
     },
     {
      "cell_type": "markdown",
@@ -632,7 +621,7 @@
        ]
       }
      ],
-     "prompt_number": 49
+     "prompt_number": 26
     },
     {
      "cell_type": "code",
@@ -651,7 +640,7 @@
        ]
       }
      ],
-     "prompt_number": 50
+     "prompt_number": 27
     },
     {
      "cell_type": "markdown",
@@ -733,7 +722,7 @@
        ]
       }
      ],
-     "prompt_number": 56
+     "prompt_number": 28
     }
    ],
    "metadata": {}
--- a/notebooks/Record linkage with Nazca - part 2 - Normalization and blockings.ipynb	Mon Jun 30 14:12:39 2014 +0000
+++ b/notebooks/Record linkage with Nazca - part 2 - Normalization and blockings.ipynb	Tue Jul 01 14:43:49 2014 +0200
@@ -31,7 +31,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 3
+     "prompt_number": 1
     },
     {
      "cell_type": "markdown",
@@ -65,7 +65,7 @@
        ]
       }
      ],
-     "prompt_number": 12
+     "prompt_number": 2
     },
     {
      "cell_type": "markdown",
@@ -91,7 +91,7 @@
        ]
       }
      ],
-     "prompt_number": 13
+     "prompt_number": 3
     },
     {
      "cell_type": "markdown",
@@ -117,7 +117,7 @@
        ]
       }
      ],
-     "prompt_number": 14
+     "prompt_number": 4
     },
     {
      "cell_type": "markdown",
@@ -153,7 +153,7 @@
        ]
       }
      ],
-     "prompt_number": 15
+     "prompt_number": 5
     },
     {
      "cell_type": "code",
@@ -176,7 +176,7 @@
        ]
       }
      ],
-     "prompt_number": 24
+     "prompt_number": 6
     },
     {
      "cell_type": "markdown",
@@ -213,7 +213,7 @@
        ]
       }
      ],
-     "prompt_number": 26
+     "prompt_number": 7
     },
     {
      "cell_type": "markdown",
@@ -241,7 +241,7 @@
        ]
       }
      ],
-     "prompt_number": 28
+     "prompt_number": 8
     },
     {
      "cell_type": "markdown",
@@ -271,7 +271,7 @@
        ]
       }
      ],
-     "prompt_number": 31
+     "prompt_number": 9
     },
     {
      "cell_type": "markdown",
@@ -322,7 +322,7 @@
        ]
       }
      ],
-     "prompt_number": 36
+     "prompt_number": 10
     },
     {
      "cell_type": "markdown",
@@ -349,7 +349,7 @@
        ]
       }
      ],
-     "prompt_number": 37
+     "prompt_number": 11
     },
     {
      "cell_type": "markdown",
@@ -386,7 +386,7 @@
        ]
       }
      ],
-     "prompt_number": 41
+     "prompt_number": 12
     },
     {
      "cell_type": "markdown",
@@ -421,7 +421,7 @@
        ]
       }
      ],
-     "prompt_number": 42
+     "prompt_number": 13
     },
     {
      "cell_type": "markdown",
@@ -456,7 +456,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 44
+     "prompt_number": 14
     },
     {
      "cell_type": "markdown",
@@ -474,7 +474,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 46
+     "prompt_number": 15
     },
     {
      "cell_type": "markdown",
@@ -492,7 +492,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 49
+     "prompt_number": 16
     },
     {
      "cell_type": "markdown",
@@ -520,7 +520,7 @@
        ]
       }
      ],
-     "prompt_number": 50
+     "prompt_number": 17
     },
     {
      "cell_type": "markdown",
@@ -573,7 +573,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 1
+     "prompt_number": 18
     },
     {
      "cell_type": "markdown",
@@ -617,7 +617,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 2
+     "prompt_number": 19
     },
     {
      "cell_type": "code",
@@ -662,7 +662,7 @@
        ]
       }
      ],
-     "prompt_number": 3
+     "prompt_number": 20
     },
     {
      "cell_type": "markdown",
@@ -717,7 +717,7 @@
        ]
       }
      ],
-     "prompt_number": 4
+     "prompt_number": 21
     },
     {
      "cell_type": "markdown",
@@ -749,7 +749,7 @@
        ]
       }
      ],
-     "prompt_number": 10
+     "prompt_number": 22
     },
     {
      "cell_type": "code",
@@ -770,7 +770,7 @@
        ]
       }
      ],
-     "prompt_number": 11
+     "prompt_number": 23
     },
     {
      "cell_type": "code",
@@ -791,7 +791,7 @@
        ]
       }
      ],
-     "prompt_number": 12
+     "prompt_number": 24
     },
     {
      "cell_type": "markdown",
@@ -822,7 +822,7 @@
        ]
       }
      ],
-     "prompt_number": 23
+     "prompt_number": 25
     },
     {
      "cell_type": "code",
@@ -890,7 +890,7 @@
        ]
       }
      ],
-     "prompt_number": 22
+     "prompt_number": 26
     },
     {
      "cell_type": "markdown",
@@ -927,7 +927,7 @@
        ]
       }
      ],
-     "prompt_number": 28
+     "prompt_number": 27
     },
     {
      "cell_type": "markdown",
@@ -957,7 +957,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 9
+     "prompt_number": 28
     },
     {
      "cell_type": "code",
@@ -979,23 +979,23 @@
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "[(2, 'R3'), (3, 'R4')]\n",
-        "['R3', 'ref3', (5.1, 48)]\n",
-        "['R4', 'ref4', (5.2, 48.1)]\n",
-        "[(1, 'T2')]\n",
-        "['T2', 'target2', (5.3, 48.2)]\n",
-        "**********\n",
         "[(0, 'R1'), (1, 'R2')]\n",
         "['R1', 'ref1', (6.14194444444, 48.67)]\n",
         "['R2', 'ref2', (6.2, 49)]\n",
         "[(0, 'T1'), (2, 'T3')]\n",
         "['T1', 'target1', (6.2, 48.9)]\n",
         "['T3', 'target3', (6.25, 48.91)]\n",
+        "**********\n",
+        "[(2, 'R3'), (3, 'R4')]\n",
+        "['R3', 'ref3', (5.1, 48)]\n",
+        "['R4', 'ref4', (5.2, 48.1)]\n",
+        "[(1, 'T2')]\n",
+        "['T2', 'target2', (5.3, 48.2)]\n",
         "**********\n"
        ]
       }
      ],
-     "prompt_number": 10
+     "prompt_number": 29
     },
     {
      "cell_type": "markdown",
@@ -1019,7 +1019,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 11
+     "prompt_number": 30
     },
     {
      "cell_type": "code",
@@ -1066,7 +1066,7 @@
        ]
       }
      ],
-     "prompt_number": 12
+     "prompt_number": 31
     },
     {
      "cell_type": "markdown",
@@ -1101,7 +1101,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 22
+     "prompt_number": 32
     },
     {
      "cell_type": "code",
@@ -1142,7 +1142,7 @@
        ]
       }
      ],
-     "prompt_number": 23
+     "prompt_number": 33
     },
     {
      "cell_type": "markdown",
@@ -1173,7 +1173,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 26
+     "prompt_number": 34
     },
     {
      "cell_type": "code",
@@ -1218,7 +1218,7 @@
        ]
       }
      ],
-     "prompt_number": 27
+     "prompt_number": 35
     },
     {
      "cell_type": "markdown",
@@ -1239,7 +1239,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 28
+     "prompt_number": 36
     },
     {
      "cell_type": "code",
@@ -1258,7 +1258,7 @@
        ]
       }
      ],
-     "prompt_number": 29
+     "prompt_number": 37
     }
    ],
    "metadata": {}
--- a/notebooks/Record linkage with Nazca - part 3 - Putting it all together.ipynb	Mon Jun 30 14:12:39 2014 +0000
+++ b/notebooks/Record linkage with Nazca - part 3 - Putting it all together.ipynb	Tue Jul 01 14:43:49 2014 +0200
@@ -84,7 +84,7 @@
        ]
       }
      ],
-     "prompt_number": 2
+     "prompt_number": 1
     },
     {
      "cell_type": "markdown",
@@ -114,7 +114,7 @@
        ]
       }
      ],
-     "prompt_number": 5
+     "prompt_number": 2
     },
     {
      "cell_type": "markdown",
@@ -142,7 +142,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 8
+     "prompt_number": 3
     },
     {
      "cell_type": "code",
@@ -158,13 +158,13 @@
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "(('R1', 0), ('T1', 0), 0)\n",
-        "(('R2', 1), ('T3', 2), 0)\n",
-        "(('R4', 3), ('T2', 1), 0)\n"
+        "(('R1', 0), ('T1', 0), 4.5532517433166504)\n",
+        "(('R2', 1), ('T3', 2), 11.396689414978027)\n",
+        "(('R4', 3), ('T2', 1), 15.692409515380859)\n"
        ]
       }
      ],
-     "prompt_number": 9
+     "prompt_number": 4
     },
     {
      "cell_type": "markdown",
@@ -187,14 +187,14 @@
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "(('R1', 0), ('T3', 2), 0)\n",
-        "(('R1', 0), ('T1', 0), 0)\n",
-        "(('R2', 1), ('T3', 2), 0)\n",
-        "(('R4', 3), ('T2', 1), 0)\n"
+        "(('R1', 0), ('T3', 2), 29.124841690063477)\n",
+        "(('R1', 0), ('T1', 0), 4.5532517433166504)\n",
+        "(('R2', 1), ('T3', 2), 11.396689414978027)\n",
+        "(('R4', 3), ('T2', 1), 15.692409515380859)\n"
        ]
       }
      ],
-     "prompt_number": 14
+     "prompt_number": 5
     },
     {
      "cell_type": "markdown",
@@ -222,7 +222,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 11
+     "prompt_number": 6
     },
     {
      "cell_type": "code",
@@ -244,7 +244,7 @@
        ]
       }
      ],
-     "prompt_number": 13
+     "prompt_number": 7
     }
    ],
    "metadata": {}