{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Introduction to Eland Webinar\n", "\n", "- [Webinar Recording on Youtube](https://www.youtube.com/watch?v=U8fnkzp_sfo)\n", "- [Eland Documentation](https://eland.readthedocs.io)\n", "- [Source Code on GitHub](https://github.com/elastic/eland)\n", "- [Elastic Cloud](https://cloud.elastic.co)\n", "- [NYC Open Data dataset](https://data.cityofnewyork.us/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/43nn-pn8j)\n", "\n", "This Jupyter Notebook goes along with the webinar 'Introduction to Eland' which is available\n", "on Youtube. To follow along either create an Elasticsearch deployment on Elastic Cloud (free trial available)\n", "or start your own Elasticsearch cluster locally.\n", "\n", "You'll need to install the following libraries:\n", "\n", "```bash\n", "$ python -m pip install eland numpy pandas\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## DataFrame Demo" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "# Standard imports\n", "import eland as ed\n", "import pandas as pd\n", "import numpy as np\n", "from elasticsearch import Elasticsearch\n", "\n", "# Function for pretty-printing JSON\n", "def json(x):\n", " import json\n", " print(json.dumps(x, indent=2, sort_keys=True))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"cluster_name\": \"167e473c7bba4bae85004385d4e0ce46\",\n", " \"cluster_uuid\": \"4Y2FwBhRSsWq9uGedb1DmQ\",\n", " \"name\": \"instance-0000000000\",\n", " \"tagline\": \"You Know, for Search\",\n", " \"version\": {\n", " \"build_date\": \"2020-06-14T19:35:50.234439Z\",\n", " \"build_flavor\": \"default\",\n", " \"build_hash\": \"757314695644ea9a1dc2fecd26d1a43856725e65\",\n", " \"build_snapshot\": false,\n", " \"build_type\": \"docker\",\n", " \"lucene_version\": \"8.5.1\",\n", " \"minimum_index_compatibility_version\": \"6.0.0-beta1\",\n", " \"minimum_wire_compatibility_version\": \"6.8.0\",\n", " \"number\": \"7.8.0\"\n", " }\n", "}\n" ] } ], "source": [ "# Connect to an Elastic Cloud instance\n", "# or another Elasticsearch index below\n", "\n", "ELASTIC_CLOUD_ID = \"\"\n", "ELASTIC_CLOUD_PASSWORD = \"\"\n", "\n", "es = Elasticsearch(\n", " cloud_id=ELASTIC_CLOUD_ID,\n", " basic_auth=(\"elastic\", ELASTIC_CLOUD_PASSWORD) \n", ")\n", "json(es.info())" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 193197 entries, 0 to 400255\n", "Data columns (total 26 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 CAMIS 193197 non-null int64 \n", " 1 DBA 193197 non-null object \n", " 2 BORO 193197 non-null object \n", " 3 BUILDING 193197 non-null object \n", " 4 STREET 193197 non-null object \n", " 5 ZIPCODE 193197 non-null float64\n", " 6 PHONE 193197 non-null object \n", " 7 CUISINE DESCRIPTION 193197 non-null object \n", " 8 INSPECTION DATE 193197 non-null object \n", " 9 ACTION 193197 non-null object \n", " 10 VIOLATION CODE 193197 non-null object \n", " 11 VIOLATION DESCRIPTION 193197 non-null object \n", " 12 CRITICAL FLAG 193197 non-null object \n", " 13 SCORE 193197 non-null float64\n", " 14 GRADE 193197 non-null object \n", " 15 GRADE DATE 193197 non-null object \n", " 16 RECORD DATE 193197 non-null object \n", " 17 INSPECTION TYPE 193197 non-null object \n", " 18 Latitude 193197 non-null float64\n", " 19 Longitude 193197 non-null float64\n", " 20 Community Board 193197 non-null float64\n", " 21 Council District 193197 non-null float64\n", " 22 Census Tract 193197 non-null float64\n", " 23 BIN 193197 non-null float64\n", " 24 BBL 193197 non-null float64\n", " 25 NTA 193197 non-null object \n", "dtypes: float64(9), int64(1), object(16)\n", "memory usage: 39.8+ MB\n" ] } ], "source": [ "# Load the dataset from NYC Open Data and take a look\n", "pd_df = pd.read_csv(\"nyc-restaurants.csv\").dropna()\n", "pd_df.info()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 193197 entries, 0 to 400255\n", "Data columns (total 25 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 camis 193197 non-null int64 \n", " 1 dba 193197 non-null object \n", " 2 boro 193197 non-null object \n", " 3 building 193197 non-null object \n", " 4 street 193197 non-null object \n", " 5 zipcode 193197 non-null float64\n", " 6 phone 193197 non-null object \n", " 7 cuisine_description 193197 non-null object \n", " 8 inspection_date 193197 non-null object \n", " 9 action 193197 non-null object \n", " 10 violation_code 193197 non-null object \n", " 11 violation_description 193197 non-null object \n", " 12 critical_flag 193197 non-null object \n", " 13 score 193197 non-null float64\n", " 14 grade 193197 non-null object \n", " 15 grade_date 193197 non-null object \n", " 16 record_date 193197 non-null object \n", " 17 inspection_type 193197 non-null object \n", " 18 community_board 193197 non-null float64\n", " 19 council_district 193197 non-null float64\n", " 20 census_tract 193197 non-null float64\n", " 21 bin 193197 non-null float64\n", " 22 bbl 193197 non-null float64\n", " 23 nta 193197 non-null object \n", " 24 location 193197 non-null object \n", "dtypes: float64(7), int64(1), object(17)\n", "memory usage: 38.3+ MB\n" ] } ], "source": [ "# Rename the columns to be snake_case\n", "pd_df.columns = [x.lower().replace(\" \", \"_\") for x in pd_df.columns]\n", "\n", "# Combine the 'latitude' and 'longitude' columns into one column 'location' for 'geo_point'\n", "pd_df[\"location\"] = pd_df[[\"latitude\", \"longitude\"]].apply(lambda x: \",\".join(str(item) for item in x), axis=1)\n", "\n", "# Drop the old columns in favor of 'location'\n", "pd_df.drop([\"latitude\", \"longitude\"], axis=1, inplace=True)\n", "\n", "pd_df.info()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 193197 entries, 10388 to 398749\n", "Data columns (total 25 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 camis 193197 non-null int64 \n", " 1 dba 193197 non-null object \n", " 2 boro 193197 non-null object \n", " 3 building 193197 non-null object \n", " 4 street 193197 non-null object \n", " 5 zipcode 193197 non-null int64 \n", " 6 phone 193197 non-null object \n", " 7 cuisine_description 193197 non-null object \n", " 8 inspection_date 193197 non-null object \n", " 9 action 193197 non-null object \n", " 10 violation_code 193197 non-null object \n", " 11 violation_description 193197 non-null object \n", " 12 critical_flag 193197 non-null object \n", " 13 score 193197 non-null float64\n", " 14 grade 193197 non-null object \n", " 15 grade_date 193197 non-null object \n", " 16 record_date 193197 non-null object \n", " 17 inspection_type 193197 non-null object \n", " 18 community_board 193197 non-null float64\n", " 19 council_district 193197 non-null float64\n", " 20 census_tract 193197 non-null float64\n", " 21 bin 193197 non-null float64\n", " 22 bbl 193197 non-null float64\n", " 23 nta 193197 non-null object \n", " 24 location 193197 non-null object \n", "dtypes: float64(6), int64(2), object(17)\n", "memory usage: 80.0 bytes\n" ] } ], "source": [ "df = ed.pandas_to_eland(\n", " pd_df=pd_df,\n", " es_client=es,\n", "\n", " # Where the data will live in Elasticsearch\n", " es_dest_index=\"nyc-restaurants\",\n", " \n", " # Type overrides for certain columns, 'location' detected\n", " # automatically as 'keyword' but we want these interpreted as 'geo_point'.\n", " es_type_overrides={\n", " \"location\": \"geo_point\",\n", " \"dba\": \"text\",\n", " \"zipcode\": \"short\"\n", " },\n", "\n", " # If the index already exists what should we do?\n", " es_if_exists=\"replace\",\n", " \n", " # Wait for data to be indexed before returning\n", " es_refresh=True,\n", ")\n", "df.info()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"nyc-restaurants\": {\n", " \"mappings\": {\n", " \"properties\": {\n", " \"action\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"bbl\": {\n", " \"type\": \"double\"\n", " },\n", " \"bin\": {\n", " \"type\": \"double\"\n", " },\n", " \"boro\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"building\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"camis\": {\n", " \"type\": \"long\"\n", " },\n", " \"census_tract\": {\n", " \"type\": \"double\"\n", " },\n", " \"community_board\": {\n", " \"type\": \"double\"\n", " },\n", " \"council_district\": {\n", " \"type\": \"double\"\n", " },\n", " \"critical_flag\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"cuisine_description\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"dba\": {\n", " \"type\": \"text\"\n", " },\n", " \"grade\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"grade_date\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"inspection_date\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"inspection_type\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"location\": {\n", " \"type\": \"geo_point\"\n", " },\n", " \"nta\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"phone\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"record_date\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"score\": {\n", " \"type\": \"double\"\n", " },\n", " \"street\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"violation_code\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"violation_description\": {\n", " \"type\": \"keyword\"\n", " },\n", " \"zipcode\": {\n", " \"type\": \"short\"\n", " }\n", " }\n", " }\n", " }\n", "}\n" ] } ], "source": [ "json(es.indices.get_mapping(index=\"nyc-restaurants\"))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(193197, 25)" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Shape is determined by using count API\n", "df.shape" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "es_index_pattern: nyc-restaurants\n", "Index:\n", " es_index_field: _id\n", " is_source_field: False\n", "Mappings:\n", " capabilities:\n", " es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name\n", "camis camis True long None int64 True True False camis\n", "dba dba True text None object True False False None\n", "boro boro True keyword None object True True False boro\n", "building building True keyword None object True True False building\n", "street street True keyword None object True True False street\n", "zipcode zipcode True short None int64 True True False zipcode\n", "phone phone True keyword None object True True False phone\n", "cuisine_description cuisine_description True keyword None object True True False cuisine_description\n", "inspection_date inspection_date True keyword None object True True False inspection_date\n", "action action True keyword None object True True False action\n", "violation_code violation_code True keyword None object True True False violation_code\n", "violation_description violation_description True keyword None object True True False violation_description\n", "critical_flag critical_flag True keyword None object True True False critical_flag\n", "score score True double None float64 True True False score\n", "grade grade True keyword None object True True False grade\n", "grade_date grade_date True keyword None object True True False grade_date\n", "record_date record_date True keyword None object True True False record_date\n", "inspection_type inspection_type True keyword None object True True False inspection_type\n", "community_board community_board True double None float64 True True False community_board\n", "council_district council_district True double None float64 True True False council_district\n", "census_tract census_tract True double None float64 True True False census_tract\n", "bin bin True double None float64 True True False bin\n", "bbl bbl True double None float64 True True False bbl\n", "nta nta True keyword None object True True False nta\n", "location location True geo_point None object True True False location\n", "Operations:\n", " tasks: [('tail': ('sort_field': '_doc', 'count': 10))]\n", " size: 10\n", " sort_params: _doc:desc\n", " _source: ['camis', 'dba', 'boro', 'building', 'street', 'zipcode', 'phone', 'cuisine_description', 'inspection_date', 'action', 'violation_code', 'violation_description', 'critical_flag', 'score', 'grade', 'grade_date', 'record_date', 'inspection_type', 'community_board', 'council_district', 'census_tract', 'bin', 'bbl', 'nta', 'location']\n", " body: {}\n", " post_processing: [('sort_index')]\n", "\n" ] } ], "source": [ "# DataFrame has many APIs compatible with Pandas\n", "\n", "#df.head(10)\n", "#df.columns\n", "#df.dba\n", "#df[\"grade\"]\n", "#df[df.grade.isin([\"A\", \"B\"])]\n", "#print(df[df.grade.isin([\"A\", \"B\"])].es_info())\n", "#print(df.tail(10).es_info())" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
camiszipcodescorecommunity_boardcouncil_districtcensus_tractbinbbl
count1.931970e+05193197.000000193197.000000193197.000000193197.000000193197.0000001.931970e+051.931970e+05
mean4.605010e+0710677.21254012.947680248.60260320.02071528796.0482982.513373e+062.450622e+09
std4.415232e+06595.1422468.180244130.69701415.80966430672.6834691.351134e+061.313578e+09
min3.011234e+0710000.000000-1.000000101.0000001.000000100.0000001.000000e+061.000000e+09
25%4.138051e+0710022.0000009.000000105.0000004.0000007895.6056911.042708e+061.011024e+09
50%5.000527e+0710468.00611412.000000301.00000019.74752916022.9171063.007191e+063.002924e+09
75%5.005661e+0711228.62453513.000000401.00000034.00000040246.0003374.002294e+064.003343e+09
max5.010416e+0712345.00000099.000000503.00000051.000000162100.0000005.799501e+065.270001e+09
\n", "
" ], "text/plain": [ " camis zipcode score community_board \\\n", "count 1.931970e+05 193197.000000 193197.000000 193197.000000 \n", "mean 4.605010e+07 10677.212540 12.947680 248.602603 \n", "std 4.415232e+06 595.142246 8.180244 130.697014 \n", "min 3.011234e+07 10000.000000 -1.000000 101.000000 \n", "25% 4.138051e+07 10022.000000 9.000000 105.000000 \n", "50% 5.000527e+07 10468.006114 12.000000 301.000000 \n", "75% 5.005661e+07 11228.624535 13.000000 401.000000 \n", "max 5.010416e+07 12345.000000 99.000000 503.000000 \n", "\n", " council_district census_tract bin bbl \n", "count 193197.000000 193197.000000 1.931970e+05 1.931970e+05 \n", "mean 20.020715 28796.048298 2.513373e+06 2.450622e+09 \n", "std 15.809664 30672.683469 1.351134e+06 1.313578e+09 \n", "min 1.000000 100.000000 1.000000e+06 1.000000e+09 \n", "25% 4.000000 7895.605691 1.042708e+06 1.011024e+09 \n", "50% 19.747529 16022.917106 3.007191e+06 3.002924e+09 \n", "75% 34.000000 40246.000337 4.002294e+06 4.003343e+09 \n", "max 51.000000 162100.000000 5.799501e+06 5.270001e+09 " ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Aggregating values\n", "df.describe()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmkAAAJOCAYAAADs2JBcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3df7DldX3f8de7rBA1VVDTrVlIltStKZGkMVshMZO5Ixld1AanowZLA1gSpg0mJkPHrGmntDHM6DTV6FSdYYSIjhUpsZUREkLR27SdgQAhVQEJG0RZAqLyQ1ejZs2nf5zvJifLXdB7L/e+793HY+bOPedzvud8P3c/nN0n53u+99QYIwAA9PJ31nsCAAA8mkgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBrAKasbfqcCq8RcKcFipql+rqnur6itVdUdVnVJVR1TVr1fVn03jN1fVcdP2P1FVN1bVI9P3n5h7rMWqurCq/m+SryX5gar6waq6tqoenB7/1ev1swIbW/lYKOBwUVXPTfI/k5w0xvjzqtqe5Igk/yzJmUlemeRPk/xwkr1JRpI/S/LLST6Y5FVJ3pXkOWOML1XVYpIfSHJqkjuSPDXJp5L8+yTvT3JikmuT/NQY47Y1+SGBTcMracDh5FtJjkpyQlU9aYxx9xjjz5L8fJJ/N8a4Y8z8vzHGl5K8LMmdY4z3jzH2jzE+mOTTSf7p3GO+d4xx6xhjf5JdSe4eY/zOtP0tSX43s7gD+I6INOCwMcbYk+RXkvyHJA9U1WVV9b1JjsvsFbODfW+Szx409tkk2+au3zN3+fuTnFRVDx/4SnJGkr+/Sj8CcBgRacBhZYzxX8cYP5lZUI0kb8kstP7BEpv/+bTdvO9Lcu/8Q85dvifJ/xpjHD339d1jjH+9ej8BcLgQacBho6qeW1Uvqqqjknw9yV8k+ask70nypqraMZ2l+cNV9cwkVyf5h1X1z6tqS1X9bJITknz0ELv46LT9z1XVk6avf1JV/2gNfjxgkxFpwOHkqCRvTvLFJPcn+XtJ3pjkrUkuT/IHSb6c5OIkT57el/byJOcn+VKSNyR5+Rjji0s9+BjjK0lenOT0zF6Fuz+zV+qOeuJ+JGCzcnYnAEBDXkkDAGhIpAEANCTSAAAaEmkAAA1tWe8JrLZnPetZY/v27Wu+369+9at56lOfuub75bFZl36sSU/WpR9r0tNqr8vNN9/8xTHG9yx126aLtO3bt+emm25a8/0uLi5mYWFhzffLY7Mu/ViTnqxLP9akp9Vel6o6+FNN/prDnQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABrast4TYH1t333Vek9h1dz95pet9xQAYNV4JQ0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANPS4kVZVl1TVA1X1qbmx/1RVn66qT1TVf6+qo+due2NV7amqO6rqJXPju6axPVW1e278+Kq6YRr/UFUdOY0fNV3fM92+fbV+aACA7r6dV9Lem2TXQWPXJnneGOOHk/xpkjcmSVWdkOT0JD803eddVXVEVR2R5J1JTk1yQpLXTNsmyVuSvG2M8ZwkDyU5Zxo/J8lD0/jbpu0AAA4LjxtpY4w/TPLgQWN/MMbYP129Psmx0+XTklw2xvjGGOMzSfYkecH0tWeMcdcY45tJLktyWlVVkhcluWK6/6VJXjH3WJdOl69Icsq0PQDAprdlFR7jXyb50HR5W2bRdsDeaSxJ7jlo/KQkz0zy8FzwzW+/7cB9xhj7q+qRafsvHjyBqjo3yblJsnXr1iwuLq7sJ1qGffv2rct+V+r8E/c//kYbxFJ//ht1XTYza9KTdenHmvS0luuyokirqn+bZH+SD6zOdJZnjHFRkouSZOfOnWNhYWHN57C4uJj12O9Knb37qvWewqq5+4yFR41t1HXZzKxJT9alH2vS01quy7IjrarOTvLyJKeMMcY0fG+S4+Y2O3YayyHGv5Tk6KraMr2aNr/9gcfaW1Vbkjx92h4AYNNb1q/gqKpdSd6Q5GfGGF+bu+nKJKdPZ2Yen2RHkj9KcmOSHdOZnEdmdnLBlVPcfTzJK6f7n5XkI3OPddZ0+ZVJPjYXgwAAm9rjvpJWVR9MspDkWVW1N8kFmZ3NeVSSa6f38l8/xvhXY4xbq+ryJLdldhj0vDHGt6bHeV2Sa5IckeSSMcat0y5+LcllVfWbSW5JcvE0fnGS91fVnsxOXDh9FX5eAIAN4XEjbYzxmiWGL15i7MD2Fya5cInxq5NcvcT4XZmd/Xnw+NeTvOrx5gcAsBn5xAEAgIZEGgBAQyINAKAhkQYA0JBIAwBoSKQBADQk0gAAGhJpAAANiTQAgIZEGgBAQyINAKAhkQYA0JBIAwBoSKQBADQk0gAAGhJpAAANiTQAgIZEGgBAQyINAKAhkQYA0JBIAwBoSKQBADQk0gAAGhJpAAANiTQAgIZEGgBAQyINAKAhkQYA0JBIAwBoSKQBADQk0gAAGhJpAAANiTQAgIZEGgBAQyINAKAhkQYA0JBIAwBoSKQBADQk0gAAGhJpAAANiTQAgIZEGgBAQyINAKAhkQYA0JBIAwBoSKQBADQk0gAAGhJpAAANiTQAgIZEGgBAQyINAKAhkQYA0JBIAwBoSKQBADQk0gAAGhJpAAANiTQAgIZEGgBAQyINAKAhkQYA0JBIAwBoSKQBADQk0gAAGhJpAAANiTQAgIZEGgBAQyINAKAhkQYA0JBIAwBoSKQBADQk0gAAGhJpAAANPW6kVdUlVfVAVX1qbuwZVXVtVd05fT9mGq+qekdV7amqT1TV8+fuc9a0/Z1Vddbc+I9V1Sen+7yjquqx9gEAcDj4dl5Je2+SXQeN7U5y3RhjR5LrputJcmqSHdPXuUnencyCK8kFSU5K8oIkF8xF17uT/MLc/XY9zj4AADa9x420McYfJnnwoOHTklw6Xb40ySvmxt83Zq5PcnRVPTvJS5JcO8Z4cIzxUJJrk+yabnvaGOP6McZI8r6DHmupfQAAbHpblnm/rWOM+6bL9yfZOl3eluSeue32TmOPNb53ifHH2sejVNW5mb1yl61bt2ZxcfE7/HFWbt++feuy35U6/8T96z2FVbPUn/9GXZfNzJr0ZF36sSY9reW6LDfS/toYY1TVWI3JLHcfY4yLklyUJDt37hwLCwtP5HSWtLi4mPXY70qdvfuq9Z7Cqrn7jIVHjW3UddnMrElP1qUfa9LTWq7Lcs/u/Px0qDLT9wem8XuTHDe33bHT2GONH7vE+GPtAwBg01tupF2Z5MAZmmcl+cjc+JnTWZ4nJ3lkOmR5TZIXV9Ux0wkDL05yzXTbl6vq5OmszjMPeqyl9gEAsOk97uHOqvpgkoUkz6qqvZmdpfnmJJdX1TlJPpvk1dPmVyd5aZI9Sb6W5LVJMsZ4sKrelOTGabvfGGMcOBnhFzM7g/TJSX5v+spj7AMAYNN73EgbY7zmEDedssS2I8l5h3icS5JcssT4TUmet8T4l5baBwDA4cAnDgAANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhoRZFWVb9aVbdW1aeq6oNV9V1VdXxV3VBVe6rqQ1V15LTtUdP1PdPt2+ce543T+B1V9ZK58V3T2J6q2r2SuQIAbCTLjrSq2pbkl5PsHGM8L8kRSU5P8pYkbxtjPCfJQ0nOme5yTpKHpvG3Tdulqk6Y7vdDSXYleVdVHVFVRyR5Z5JTk5yQ5DXTtgAAm95KD3duSfLkqtqS5ClJ7kvyoiRXTLdfmuQV0+XTpuuZbj+lqmoav2yM8Y0xxmeS7EnygulrzxjjrjHGN5NcNm0LALDpbVnuHccY91bVbyX5XJK/SPIHSW5O8vAYY/+02d4k26bL25LcM913f1U9kuSZ0/j1cw89f597Dho/aam5VNW5Sc5Nkq1bt2ZxcXG5P9ay7du3b132u1Lnn7j/8TfaIJb689+o67KZWZOerEs/1qSntVyXZUdaVR2T2Stbxyd5OMl/y+xw5ZobY1yU5KIk2blz51hYWFjzOSwuLmY99rtSZ+++ar2nsGruPmPhUWMbdV02M2vSk3Xpx5r0tJbrspLDnT+d5DNjjC+MMf4yyYeTvDDJ0dPhzyQ5Nsm90+V7kxyXJNPtT0/ypfnxg+5zqHEAgE1vJZH2uSQnV9VTpveWnZLktiQfT/LKaZuzknxkunzldD3T7R8bY4xp/PTp7M/jk+xI8kdJbkyyYzpb9MjMTi64cgXzBQDYMFbynrQbquqKJH+cZH+SWzI75HhVksuq6jensYunu1yc5P1VtSfJg5lFV8YYt1bV5ZkF3v4k540xvpUkVfW6JNdkduboJWOMW5c7XwCAjWTZkZYkY4wLklxw0PBdmZ2ZefC2X0/yqkM8zoVJLlxi/OokV69kjgAAG5FPHAAAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANDQiiKtqo6uqiuq6tNVdXtV/XhVPaOqrq2qO6fvx0zbVlW9o6r2VNUnqur5c49z1rT9nVV11tz4j1XVJ6f7vKOqaiXzBQDYKFb6Strbk/z+GOMHk/xIktuT7E5y3RhjR5LrputJcmqSHdPXuUnenSRV9YwkFyQ5KckLklxwIOymbX5h7n67VjhfAIANYdmRVlVPT/JTSS5OkjHGN8cYDyc5Lcml02aXJnnFdPm0JO8bM9cnObqqnp3kJUmuHWM8OMZ4KMm1SXZNtz1tjHH9GGMked/cYwEAbGpbVnDf45N8IcnvVNWPJLk5yeuTbB1j3Ddtc3+SrdPlbUnumbv/3mnsscb3LjH+KFV1bmavzmXr1q1ZXFxc9g+1XPv27VuX/a7U+SfuX+8prJql/vw36rpsZtakJ+vSjzXpaS3XZSWRtiXJ85P80hjjhqp6e/7m0GaSZIwxqmqsZILfjjHGRUkuSpKdO3eOhYWFJ3qXj7K4uJj12O9Knb37qvWewqq5+4yFR41t1HXZzKxJT9alH2vS01quy0rek7Y3yd4xxg3T9Ssyi7bPT4cqM31/YLr93iTHzd3/2GnsscaPXWIcAGDTW3akjTHuT3JPVT13GjolyW1Jrkxy4AzNs5J8ZLp8ZZIzp7M8T07yyHRY9JokL66qY6YTBl6c5Jrpti9X1cnTWZ1nzj0WAMCmtpLDnUnyS0k+UFVHJrkryWszC7/Lq+qcJJ9N8upp26uTvDTJniRfm7bNGOPBqnpTkhun7X5jjPHgdPkXk7w3yZOT/N70BQCw6a0o0sYYf5Jk5xI3nbLEtiPJeYd4nEuSXLLE+E1JnreSOQIAbEQ+cQAAoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDK460qjqiqm6pqo9O14+vqhuqak9VfaiqjpzGj5qu75lu3z73GG+cxu+oqpfMje+axvZU1e6VzhUAYKNYjVfSXp/k9rnrb0nytjHGc5I8lOScafycJA9N42+btktVnZDk9CQ/lGRXkndN4XdEkncmOTXJCUleM20LALDprSjSqurYJC9L8p7peiV5UZIrpk0uTfKK6fJp0/VMt58ybX9aksvGGN8YY3wmyZ4kL5i+9owx7hpjfDPJZdO2AACb3pYV3v+3k7whyd+drj8zycNjjP3T9b1Jtk2XtyW5J0nGGPur6pFp+21Jrp97zPn73HPQ+ElLTaKqzk1ybpJs3bo1i4uLy/+Jlmnfvn3rst+VOv/E/Y+/0Qax1J//Rl2Xzcya9GRd+rEmPa3luiw70qrq5UkeGGPcXFULqzel79wY46IkFyXJzp07x8LC2k9ncXEx67HflTp791XrPYVVc/cZC48a26jrsplZk56sSz/WpKe1XJeVvJL2wiQ/U1UvTfJdSZ6W5O1Jjq6qLdOraccmuXfa/t4kxyXZW1Vbkjw9yZfmxg+Yv8+hxgEANrVlvydtjPHGMcaxY4ztmb3x/2NjjDOSfDzJK6fNzkrykenyldP1TLd/bIwxpvHTp7M/j0+yI8kfJbkxyY7pbNEjp31cudz5AgBsJCt9T9pSfi3JZVX1m0luSXLxNH5xkvdX1Z4kD2YWXRlj3FpVlye5Lcn+JOeNMb6VJFX1uiTXJDkiySVjjFufgPkCALSzKpE2xlhMsjhdviuzMzMP3ubrSV51iPtfmOTCJcavTnL1aswRAGAj8YkDAAANiTQAgIZEGgBAQ0/EiQOb3vYlfrfY+Sfu31S/cwwAWF9eSQMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaEikAQA0JNIAABoSaQAADS070qrquKr6eFXdVlW3VtXrp/FnVNW1VXXn9P2Yabyq6h1VtaeqPlFVz597rLOm7e+sqrPmxn+sqj453ecdVVUr+WEBADaKlbyStj/J+WOME5KcnOS8qjohye4k140xdiS5brqeJKcm2TF9nZvk3cks6pJckOSkJC9IcsGBsJu2+YW5++1awXwBADaMZUfaGOO+McYfT5e/kuT2JNuSnJbk0mmzS5O8Yrp8WpL3jZnrkxxdVc9O8pIk144xHhxjPJTk2iS7ptueNsa4fowxkrxv7rEAADa1LavxIFW1PcmPJrkhydYxxn3TTfcn2Tpd3pbknrm77Z3GHmt87xLjS+3/3MxencvWrVuzuLi47J/l23H+ifsfNbb1yUuPs3aWWvd9+/Y94f898J2xJj1Zl36sSU9ruS4rjrSq+u4kv5vkV8YYX55/29gYY1TVWOk+Hs8Y46IkFyXJzp07x8LCwhO6v7N3X/WosfNP3J///MlVaV6W6e4zFh41tri4mCf6vwe+M9akJ+vSjzXpaS3XZUVnd1bVkzILtA+MMT48DX9+OlSZ6fsD0/i9SY6bu/ux09hjjR+7xDgAwKa3krM7K8nFSW4fY7x17qYrkxw4Q/OsJB+ZGz9zOsvz5CSPTIdFr0ny4qo6Zjph4MVJrplu+3JVnTzt68y5xwIA2NRWcnzuhUl+Lsknq+pPprFfT/LmJJdX1TlJPpvk1dNtVyd5aZI9Sb6W5LVJMsZ4sKrelOTGabvfGGM8OF3+xSTvTfLkJL83fcGSth/iMPRSh6c7u/vNL1vvKQDQwLIjbYzxf5Ic6veWnbLE9iPJeYd4rEuSXLLE+E1JnrfcOQIAbFQ+cQAAoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCGRBgDQkEgDAGhIpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDW9Z7AsDftn33Ves9hVVz95tftt5TANiwvJIGANCQSAMAaEikAQA0JNIAABoSaQAADYk0AICGRBoAQEN+TxrwhFnqd76df+L+nL0Bfxec3/kGrDWvpAEANCTSAAAaEmkAAA2JNACAhkQaAEBDIg0AoCG/ggPg27DUrxPZqPw6EdgY2r+SVlW7quqOqtpTVbvXez4AAGuhdaRV1RFJ3pnk1CQnJHlNVZ2wvrMCAHjidT/c+YIke8YYdyVJVV2W5LQkt63rrAA2sM30SRCbxWY6BO2tAaunxhjrOoHHUlWvTLJrjPHz0/WfS3LSGON1B213bpJzp6vPTXLHmk505llJvrgO++WxWZd+rElP1qUfa9LTaq/L948xvmepG7q/kvZtGWNclOSi9ZxDVd00xti5nnPg0axLP9akJ+vSjzXpaS3XpfV70pLcm+S4uevHTmMAAJta90i7McmOqjq+qo5McnqSK9d5TgAAT7jWhzvHGPur6nVJrklyRJJLxhi3rvO0DmVdD7dySNalH2vSk3Xpx5r0tGbr0vrEAQCAw1X3w50AAIclkQYA0JBIWwU+umr9VdVxVfXxqrqtqm6tqtdP48+oqmur6s7p+zHrPdfDTVUdUVW3VNVHp+vHV9UN0/PlQ9NJQayhqjq6qq6oqk9X1e1V9eOeK+uvqn51+vvrU1X1war6Ls+XtVVVl1TVA1X1qbmxJZ8bNfOOaW0+UVXPX+35iLQV8tFVbexPcv4Y44QkJyc5b1qH3UmuG2PsSHLddJ219fokt89df0uSt40xnpPkoSTnrMusDm9vT/L7Y4wfTPIjma2P58o6qqptSX45yc4xxvMyO1nu9Hi+rLX3Jtl10NihnhunJtkxfZ2b5N2rPRmRtnJ//dFVY4xvJjnw0VWsoTHGfWOMP54ufyWzf3S2ZbYWl06bXZrkFeszw8NTVR2b5GVJ3jNdryQvSnLFtIk1WWNV9fQkP5Xk4iQZY3xzjPFwPFc62JLkyVW1JclTktwXz5c1Ncb4wyQPHjR8qOfGaUneN2auT3J0VT17Necj0lZuW5J75q7vncZYJ1W1PcmPJrkhydYxxn3TTfcn2bpO0zpc/XaSNyT5q+n6M5M8PMbYP133fFl7xyf5QpLfmQ5Dv6eqnhrPlXU1xrg3yW8l+VxmcfZIkpvj+dLBoZ4bT/i//yKNTaWqvjvJ7yb5lTHGl+dvG7PfN+N3zqyRqnp5kgfGGDev91z4W7YkeX6Sd48xfjTJV3PQoU3PlbU3vc/ptMwi+nuTPDWPPuzGOlvr54ZIWzkfXdVEVT0ps0D7wBjjw9Pw5w+8/Dx9f2C95ncYemGSn6mquzN7G8CLMnsv1NHT4ZzE82U97E2yd4xxw3T9isyizXNlff10ks+MMb4wxvjLJB/O7Dnk+bL+DvXceML//RdpK+ejqxqY3ut0cZLbxxhvnbvpyiRnTZfPSvKRtZ7b4WqM8cYxxrFjjO2ZPS8+NsY4I8nHk7xy2syarLExxv1J7qmq505DpyS5LZ4r6+1zSU6uqqdMf58dWBfPl/V3qOfGlUnOnM7yPDnJI3OHRVeFTxxYBVX10szee3Pgo6suXOcpHXaq6ieT/O8kn8zfvP/p1zN7X9rlSb4vyWeTvHqMcfCbQnmCVdVCkn8zxnh5Vf1AZq+sPSPJLUn+xRjjG+s5v8NNVf3jzAPm+1AAAAB2SURBVE7mODLJXUlem9n/tHuurKOq+o9Jfjazs9VvSfLzmb3HyfNljVTVB5MsJHlWks8nuSDJ/8gSz40ppv9LZoelv5bktWOMm1Z1PiINAKAfhzsBABoSaQAADYk0AICGRBoAQEMiDQCgIZEGANCQSAMAaOj/A8jJNNoTQHk2AAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Plotting with matplotlib\n", "from matplotlib import pyplot as plt\n", "\n", "df[[\"score\"]].hist(figsize=[10,10])\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
camisdbaborobuildingstreetzipcodephonecuisine_descriptioninspection_dateaction...grade_daterecord_dateinspection_typecommunity_boardcouncil_districtcensus_tractbinbblntalocation
5312741144258BURGER KINGBrooklyn52125 AVENUE11220.07187650844Hamburgers12/26/2018Violations were cited in the following area(s)....12/26/201807/07/2020Cycle Inspection / Initial Inspection307.038.07600.03329902.03.008070e+09BK3240.643852716573,-74.011628212186
6126841144258BURGER KINGBrooklyn52125 AVENUE11220.07187650844Hamburgers07/20/2017Violations were cited in the following area(s)....07/20/201707/07/2020Cycle Inspection / Initial Inspection307.038.07600.03329902.03.008070e+09BK3240.643852716573,-74.011628212186
2071741144258BURGER KINGBrooklyn52125 AVENUE11220.07187650844Hamburgers03/04/2020Violations were cited in the following area(s)....03/04/202007/07/2020Cycle Inspection / Initial Inspection307.038.07600.03329902.03.008070e+09BK3240.643852716573,-74.011628212186
464841271801PINO'SBrooklyn52015 AVENUE11220.07184396012Pizza05/25/2019Violations were cited in the following area(s)....05/25/201907/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013942.03.008080e+09BK3440.643888405293005,-74.011563356969
22441271801PINO'SBrooklyn52015 AVENUE11220.07184396012Pizza05/25/2019Violations were cited in the following area(s)....05/25/201907/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013942.03.008080e+09BK3440.643888405293005,-74.011563356969
946541144258BURGER KINGBrooklyn52125 AVENUE11220.07187650844Hamburgers03/04/2020Violations were cited in the following area(s)....03/04/202007/07/2020Cycle Inspection / Initial Inspection307.038.07600.03329902.03.008070e+09BK3240.643852716573,-74.011628212186
10451240396492ROYAL KING'S PIZZABrooklyn52115 AVENUE11220.07184923846Pizza12/19/2018Violations were cited in the following area(s)....12/19/201807/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013939.03.008080e+09BK3440.643849974348996,-74.01160298782
10672841271801PINO'SBrooklyn52015 AVENUE11220.07184396012Pizza01/25/2018Violations were cited in the following area(s)....01/25/201807/07/2020Cycle Inspection / Re-inspection307.038.010000.03013942.03.008080e+09BK3440.643888405293005,-74.011563356969
6274850004330KFCBrooklyn52235 AVENUE11220.07184922813Chicken05/28/2019Violations were cited in the following area(s)....05/28/201907/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013937.03.008080e+09BK3440.643800563168,-74.01165342693001
7921141271801PINO'SBrooklyn52015 AVENUE11220.07184396012Pizza11/05/2016Violations were cited in the following area(s)....11/05/201607/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013942.03.008080e+09BK3440.643888405293005,-74.011563356969
21854550004330KFCBrooklyn52235 AVENUE11220.07184922813Chicken01/10/2018Violations were cited in the following area(s)....01/10/201807/07/2020Cycle Inspection / Re-inspection307.038.010000.03013937.03.008080e+09BK3440.643800563168,-74.01165342693001
23866341271801PINO'SBrooklyn52015 AVENUE11220.07184396012Pizza11/05/2016Violations were cited in the following area(s)....11/05/201607/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013942.03.008080e+09BK3440.643888405293005,-74.011563356969
24520540396492ROYAL KING'S PIZZABrooklyn52115 AVENUE11220.07184923846Pizza12/19/2018Violations were cited in the following area(s)....12/19/201807/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013939.03.008080e+09BK3440.643849974348996,-74.01160298782
24523341271801PINO'SBrooklyn52015 AVENUE11220.07184396012Pizza01/25/2018Violations were cited in the following area(s)....01/25/201807/07/2020Cycle Inspection / Re-inspection307.038.010000.03013942.03.008080e+09BK3440.643888405293005,-74.011563356969
24741750004330KFCBrooklyn52235 AVENUE11220.07184922813Chicken05/05/2017Violations were cited in the following area(s)....05/05/201707/07/2020Cycle Inspection / Re-inspection307.038.010000.03013937.03.008080e+09BK3440.643800563168,-74.01165342693001
18687450099704MASTER'S PIZZERIABrooklyn52015 AVENUE11220.07184396012Pizza11/18/2019Violations were cited in the following area(s)....11/18/201907/07/2020Pre-permit (Operational) / Initial Inspection307.038.010000.03013942.03.008080e+09BK3440.643888405293005,-74.011563356969
19810440396492ROYAL KING'S PIZZABrooklyn52115 AVENUE11220.07184923846Pizza12/28/2017Violations were cited in the following area(s)....12/28/201707/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013939.03.008080e+09BK3440.643849974348996,-74.01160298782
21342540396492ROYAL KING'S PIZZABrooklyn52115 AVENUE11220.07184923846Pizza12/19/2018Violations were cited in the following area(s)....12/19/201807/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013939.03.008080e+09BK3440.643849974348996,-74.01160298782
20236350004330KFCBrooklyn52235 AVENUE11220.07184922813Chicken05/28/2019Violations were cited in the following area(s)....05/28/201907/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013937.03.008080e+09BK3440.643800563168,-74.01165342693001
15805940396492ROYAL KING'S PIZZABrooklyn52115 AVENUE11220.07184923846Pizza12/19/2018Violations were cited in the following area(s)....12/19/201807/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013939.03.008080e+09BK3440.643849974348996,-74.01160298782
16367241144258BURGER KINGBrooklyn52125 AVENUE11220.07187650844Hamburgers08/13/2018Violations were cited in the following area(s)....08/13/201807/07/2020Cycle Inspection / Re-inspection307.038.07600.03329902.03.008070e+09BK3240.643852716573,-74.011628212186
13850840396492ROYAL KING'S PIZZABrooklyn52115 AVENUE11220.07184923846Pizza01/29/2020Violations were cited in the following area(s)....01/29/202007/07/2020Cycle Inspection / Re-inspection307.038.010000.03013939.03.008080e+09BK3440.643849974348996,-74.01160298782
14094041144258BURGER KINGBrooklyn52125 AVENUE11220.07187650844Hamburgers07/20/2017Violations were cited in the following area(s)....07/20/201707/07/2020Cycle Inspection / Initial Inspection307.038.07600.03329902.03.008070e+09BK3240.643852716573,-74.011628212186
14315750004330KFCBrooklyn52235 AVENUE11220.07184922813Chicken01/10/2018Violations were cited in the following area(s)....01/10/201807/07/2020Cycle Inspection / Re-inspection307.038.010000.03013937.03.008080e+09BK3440.643800563168,-74.01165342693001
14954841144258BURGER KINGBrooklyn52125 AVENUE11220.07187650844Hamburgers07/20/2017Violations were cited in the following area(s)....07/20/201707/07/2020Cycle Inspection / Initial Inspection307.038.07600.03329902.03.008070e+09BK3240.643852716573,-74.011628212186
14974250004330KFCBrooklyn52235 AVENUE11220.07184922813Chicken05/31/2018Violations were cited in the following area(s)....05/31/201807/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013937.03.008080e+09BK3440.643800563168,-74.01165342693001
24999441271801PINO'SBrooklyn52015 AVENUE11220.07184396012Pizza01/25/2018Violations were cited in the following area(s)....01/25/201807/07/2020Cycle Inspection / Re-inspection307.038.010000.03013942.03.008080e+09BK3440.643888405293005,-74.011563356969
25760341144258BURGER KINGBrooklyn52125 AVENUE11220.07187650844Hamburgers08/13/2018Violations were cited in the following area(s)....08/13/201807/07/2020Cycle Inspection / Re-inspection307.038.07600.03329902.03.008070e+09BK3240.643852716573,-74.011628212186
26882350004330KFCBrooklyn52235 AVENUE11220.07184922813Chicken01/10/2018Violations were cited in the following area(s)....01/10/201807/07/2020Cycle Inspection / Re-inspection307.038.010000.03013937.03.008080e+09BK3440.643800563168,-74.01165342693001
26952141144258BURGER KINGBrooklyn52125 AVENUE11220.07187650844Hamburgers12/17/2019Violations were cited in the following area(s)....12/17/201907/07/2020Cycle Inspection / Re-inspection307.038.07600.03329902.03.008070e+09BK3240.643852716573,-74.011628212186
27750050099704MASTER'S PIZZERIABrooklyn52015 AVENUE11220.07184396012Pizza11/18/2019Violations were cited in the following area(s)....11/18/201907/07/2020Pre-permit (Operational) / Initial Inspection307.038.010000.03013942.03.008080e+09BK3440.643888405293005,-74.011563356969
27950340396492ROYAL KING'S PIZZABrooklyn52115 AVENUE11220.07184923846Pizza01/29/2020Violations were cited in the following area(s)....01/29/202007/07/2020Cycle Inspection / Re-inspection307.038.010000.03013939.03.008080e+09BK3440.643849974348996,-74.01160298782
29986341144258BURGER KINGBrooklyn52125 AVENUE11220.07187650844Hamburgers12/26/2018Violations were cited in the following area(s)....12/26/201807/07/2020Cycle Inspection / Initial Inspection307.038.07600.03329902.03.008070e+09BK3240.643852716573,-74.011628212186
31978741271801PINO'SBrooklyn52015 AVENUE11220.07184396012Pizza05/25/2019Violations were cited in the following area(s)....05/25/201907/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013942.03.008080e+09BK3440.643888405293005,-74.011563356969
33657050004330KFCBrooklyn52235 AVENUE11220.07184922813Chicken01/10/2018Violations were cited in the following area(s)....01/10/201807/07/2020Cycle Inspection / Re-inspection307.038.010000.03013937.03.008080e+09BK3440.643800563168,-74.01165342693001
34055150004330KFCBrooklyn52235 AVENUE11220.07184922813Chicken04/10/2017Establishment re-opened by DOHMH...04/10/201707/07/2020Cycle Inspection / Reopening Inspection307.038.010000.03013937.03.008080e+09BK3440.643800563168,-74.01165342693001
39550841144258BURGER KINGBrooklyn52125 AVENUE11220.07187650844Hamburgers12/17/2019Violations were cited in the following area(s)....12/17/201907/07/2020Cycle Inspection / Re-inspection307.038.07600.03329902.03.008070e+09BK3240.643852716573,-74.011628212186
30936640396492ROYAL KING'S PIZZABrooklyn52115 AVENUE11220.07184923846Pizza12/28/2017Violations were cited in the following area(s)....12/28/201707/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013939.03.008080e+09BK3440.643849974348996,-74.01160298782
34085740396492ROYAL KING'S PIZZABrooklyn52115 AVENUE11220.07184923846Pizza01/29/2020Violations were cited in the following area(s)....01/29/202007/07/2020Cycle Inspection / Re-inspection307.038.010000.03013939.03.008080e+09BK3440.643849974348996,-74.01160298782
35866050004330KFCBrooklyn52235 AVENUE11220.07184922813Chicken05/31/2018Violations were cited in the following area(s)....05/31/201807/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013937.03.008080e+09BK3440.643800563168,-74.01165342693001
39345141271801PINO'SBrooklyn52015 AVENUE11220.07184396012Pizza06/05/2018Violations were cited in the following area(s)....06/05/201807/07/2020Cycle Inspection / Initial Inspection307.038.010000.03013942.03.008080e+09BK3440.643888405293005,-74.011563356969
\n", "
\n", "

41 rows × 25 columns

" ], "text/plain": [ " camis dba boro building street zipcode \\\n", "53127 41144258 BURGER KING Brooklyn 5212 5 AVENUE 11220.0 \n", "61268 41144258 BURGER KING Brooklyn 5212 5 AVENUE 11220.0 \n", "20717 41144258 BURGER KING Brooklyn 5212 5 AVENUE 11220.0 \n", "4648 41271801 PINO'S Brooklyn 5201 5 AVENUE 11220.0 \n", "224 41271801 PINO'S Brooklyn 5201 5 AVENUE 11220.0 \n", "9465 41144258 BURGER KING Brooklyn 5212 5 AVENUE 11220.0 \n", "104512 40396492 ROYAL KING'S PIZZA Brooklyn 5211 5 AVENUE 11220.0 \n", "106728 41271801 PINO'S Brooklyn 5201 5 AVENUE 11220.0 \n", "62748 50004330 KFC Brooklyn 5223 5 AVENUE 11220.0 \n", "79211 41271801 PINO'S Brooklyn 5201 5 AVENUE 11220.0 \n", "218545 50004330 KFC Brooklyn 5223 5 AVENUE 11220.0 \n", "238663 41271801 PINO'S Brooklyn 5201 5 AVENUE 11220.0 \n", "245205 40396492 ROYAL KING'S PIZZA Brooklyn 5211 5 AVENUE 11220.0 \n", "245233 41271801 PINO'S Brooklyn 5201 5 AVENUE 11220.0 \n", "247417 50004330 KFC Brooklyn 5223 5 AVENUE 11220.0 \n", "186874 50099704 MASTER'S PIZZERIA Brooklyn 5201 5 AVENUE 11220.0 \n", "198104 40396492 ROYAL KING'S PIZZA Brooklyn 5211 5 AVENUE 11220.0 \n", "213425 40396492 ROYAL KING'S PIZZA Brooklyn 5211 5 AVENUE 11220.0 \n", "202363 50004330 KFC Brooklyn 5223 5 AVENUE 11220.0 \n", "158059 40396492 ROYAL KING'S PIZZA Brooklyn 5211 5 AVENUE 11220.0 \n", "163672 41144258 BURGER KING Brooklyn 5212 5 AVENUE 11220.0 \n", "138508 40396492 ROYAL KING'S PIZZA Brooklyn 5211 5 AVENUE 11220.0 \n", "140940 41144258 BURGER KING Brooklyn 5212 5 AVENUE 11220.0 \n", "143157 50004330 KFC Brooklyn 5223 5 AVENUE 11220.0 \n", "149548 41144258 BURGER KING Brooklyn 5212 5 AVENUE 11220.0 \n", "149742 50004330 KFC Brooklyn 5223 5 AVENUE 11220.0 \n", "249994 41271801 PINO'S Brooklyn 5201 5 AVENUE 11220.0 \n", "257603 41144258 BURGER KING Brooklyn 5212 5 AVENUE 11220.0 \n", "268823 50004330 KFC Brooklyn 5223 5 AVENUE 11220.0 \n", "269521 41144258 BURGER KING Brooklyn 5212 5 AVENUE 11220.0 \n", "277500 50099704 MASTER'S PIZZERIA Brooklyn 5201 5 AVENUE 11220.0 \n", "279503 40396492 ROYAL KING'S PIZZA Brooklyn 5211 5 AVENUE 11220.0 \n", "299863 41144258 BURGER KING Brooklyn 5212 5 AVENUE 11220.0 \n", "319787 41271801 PINO'S Brooklyn 5201 5 AVENUE 11220.0 \n", "336570 50004330 KFC Brooklyn 5223 5 AVENUE 11220.0 \n", "340551 50004330 KFC Brooklyn 5223 5 AVENUE 11220.0 \n", "395508 41144258 BURGER KING Brooklyn 5212 5 AVENUE 11220.0 \n", "309366 40396492 ROYAL KING'S PIZZA Brooklyn 5211 5 AVENUE 11220.0 \n", "340857 40396492 ROYAL KING'S PIZZA Brooklyn 5211 5 AVENUE 11220.0 \n", "358660 50004330 KFC Brooklyn 5223 5 AVENUE 11220.0 \n", "393451 41271801 PINO'S Brooklyn 5201 5 AVENUE 11220.0 \n", "\n", " phone cuisine_description inspection_date \\\n", "53127 7187650844 Hamburgers 12/26/2018 \n", "61268 7187650844 Hamburgers 07/20/2017 \n", "20717 7187650844 Hamburgers 03/04/2020 \n", "4648 7184396012 Pizza 05/25/2019 \n", "224 7184396012 Pizza 05/25/2019 \n", "9465 7187650844 Hamburgers 03/04/2020 \n", "104512 7184923846 Pizza 12/19/2018 \n", "106728 7184396012 Pizza 01/25/2018 \n", "62748 7184922813 Chicken 05/28/2019 \n", "79211 7184396012 Pizza 11/05/2016 \n", "218545 7184922813 Chicken 01/10/2018 \n", "238663 7184396012 Pizza 11/05/2016 \n", "245205 7184923846 Pizza 12/19/2018 \n", "245233 7184396012 Pizza 01/25/2018 \n", "247417 7184922813 Chicken 05/05/2017 \n", "186874 7184396012 Pizza 11/18/2019 \n", "198104 7184923846 Pizza 12/28/2017 \n", "213425 7184923846 Pizza 12/19/2018 \n", "202363 7184922813 Chicken 05/28/2019 \n", "158059 7184923846 Pizza 12/19/2018 \n", "163672 7187650844 Hamburgers 08/13/2018 \n", "138508 7184923846 Pizza 01/29/2020 \n", "140940 7187650844 Hamburgers 07/20/2017 \n", "143157 7184922813 Chicken 01/10/2018 \n", "149548 7187650844 Hamburgers 07/20/2017 \n", "149742 7184922813 Chicken 05/31/2018 \n", "249994 7184396012 Pizza 01/25/2018 \n", "257603 7187650844 Hamburgers 08/13/2018 \n", "268823 7184922813 Chicken 01/10/2018 \n", "269521 7187650844 Hamburgers 12/17/2019 \n", "277500 7184396012 Pizza 11/18/2019 \n", "279503 7184923846 Pizza 01/29/2020 \n", "299863 7187650844 Hamburgers 12/26/2018 \n", "319787 7184396012 Pizza 05/25/2019 \n", "336570 7184922813 Chicken 01/10/2018 \n", "340551 7184922813 Chicken 04/10/2017 \n", "395508 7187650844 Hamburgers 12/17/2019 \n", "309366 7184923846 Pizza 12/28/2017 \n", "340857 7184923846 Pizza 01/29/2020 \n", "358660 7184922813 Chicken 05/31/2018 \n", "393451 7184396012 Pizza 06/05/2018 \n", "\n", " action ... grade_date \\\n", "53127 Violations were cited in the following area(s). ... 12/26/2018 \n", "61268 Violations were cited in the following area(s). ... 07/20/2017 \n", "20717 Violations were cited in the following area(s). ... 03/04/2020 \n", "4648 Violations were cited in the following area(s). ... 05/25/2019 \n", "224 Violations were cited in the following area(s). ... 05/25/2019 \n", "9465 Violations were cited in the following area(s). ... 03/04/2020 \n", "104512 Violations were cited in the following area(s). ... 12/19/2018 \n", "106728 Violations were cited in the following area(s). ... 01/25/2018 \n", "62748 Violations were cited in the following area(s). ... 05/28/2019 \n", "79211 Violations were cited in the following area(s). ... 11/05/2016 \n", "218545 Violations were cited in the following area(s). ... 01/10/2018 \n", "238663 Violations were cited in the following area(s). ... 11/05/2016 \n", "245205 Violations were cited in the following area(s). ... 12/19/2018 \n", "245233 Violations were cited in the following area(s). ... 01/25/2018 \n", "247417 Violations were cited in the following area(s). ... 05/05/2017 \n", "186874 Violations were cited in the following area(s). ... 11/18/2019 \n", "198104 Violations were cited in the following area(s). ... 12/28/2017 \n", "213425 Violations were cited in the following area(s). ... 12/19/2018 \n", "202363 Violations were cited in the following area(s). ... 05/28/2019 \n", "158059 Violations were cited in the following area(s). ... 12/19/2018 \n", "163672 Violations were cited in the following area(s). ... 08/13/2018 \n", "138508 Violations were cited in the following area(s). ... 01/29/2020 \n", "140940 Violations were cited in the following area(s). ... 07/20/2017 \n", "143157 Violations were cited in the following area(s). ... 01/10/2018 \n", "149548 Violations were cited in the following area(s). ... 07/20/2017 \n", "149742 Violations were cited in the following area(s). ... 05/31/2018 \n", "249994 Violations were cited in the following area(s). ... 01/25/2018 \n", "257603 Violations were cited in the following area(s). ... 08/13/2018 \n", "268823 Violations were cited in the following area(s). ... 01/10/2018 \n", "269521 Violations were cited in the following area(s). ... 12/17/2019 \n", "277500 Violations were cited in the following area(s). ... 11/18/2019 \n", "279503 Violations were cited in the following area(s). ... 01/29/2020 \n", "299863 Violations were cited in the following area(s). ... 12/26/2018 \n", "319787 Violations were cited in the following area(s). ... 05/25/2019 \n", "336570 Violations were cited in the following area(s). ... 01/10/2018 \n", "340551 Establishment re-opened by DOHMH ... 04/10/2017 \n", "395508 Violations were cited in the following area(s). ... 12/17/2019 \n", "309366 Violations were cited in the following area(s). ... 12/28/2017 \n", "340857 Violations were cited in the following area(s). ... 01/29/2020 \n", "358660 Violations were cited in the following area(s). ... 05/31/2018 \n", "393451 Violations were cited in the following area(s). ... 06/05/2018 \n", "\n", " record_date inspection_type \\\n", "53127 07/07/2020 Cycle Inspection / Initial Inspection \n", "61268 07/07/2020 Cycle Inspection / Initial Inspection \n", "20717 07/07/2020 Cycle Inspection / Initial Inspection \n", "4648 07/07/2020 Cycle Inspection / Initial Inspection \n", "224 07/07/2020 Cycle Inspection / Initial Inspection \n", "9465 07/07/2020 Cycle Inspection / Initial Inspection \n", "104512 07/07/2020 Cycle Inspection / Initial Inspection \n", "106728 07/07/2020 Cycle Inspection / Re-inspection \n", "62748 07/07/2020 Cycle Inspection / Initial Inspection \n", "79211 07/07/2020 Cycle Inspection / Initial Inspection \n", "218545 07/07/2020 Cycle Inspection / Re-inspection \n", "238663 07/07/2020 Cycle Inspection / Initial Inspection \n", "245205 07/07/2020 Cycle Inspection / Initial Inspection \n", "245233 07/07/2020 Cycle Inspection / Re-inspection \n", "247417 07/07/2020 Cycle Inspection / Re-inspection \n", "186874 07/07/2020 Pre-permit (Operational) / Initial Inspection \n", "198104 07/07/2020 Cycle Inspection / Initial Inspection \n", "213425 07/07/2020 Cycle Inspection / Initial Inspection \n", "202363 07/07/2020 Cycle Inspection / Initial Inspection \n", "158059 07/07/2020 Cycle Inspection / Initial Inspection \n", "163672 07/07/2020 Cycle Inspection / Re-inspection \n", "138508 07/07/2020 Cycle Inspection / Re-inspection \n", "140940 07/07/2020 Cycle Inspection / Initial Inspection \n", "143157 07/07/2020 Cycle Inspection / Re-inspection \n", "149548 07/07/2020 Cycle Inspection / Initial Inspection \n", "149742 07/07/2020 Cycle Inspection / Initial Inspection \n", "249994 07/07/2020 Cycle Inspection / Re-inspection \n", "257603 07/07/2020 Cycle Inspection / Re-inspection \n", "268823 07/07/2020 Cycle Inspection / Re-inspection \n", "269521 07/07/2020 Cycle Inspection / Re-inspection \n", "277500 07/07/2020 Pre-permit (Operational) / Initial Inspection \n", "279503 07/07/2020 Cycle Inspection / Re-inspection \n", "299863 07/07/2020 Cycle Inspection / Initial Inspection \n", "319787 07/07/2020 Cycle Inspection / Initial Inspection \n", "336570 07/07/2020 Cycle Inspection / Re-inspection \n", "340551 07/07/2020 Cycle Inspection / Reopening Inspection \n", "395508 07/07/2020 Cycle Inspection / Re-inspection \n", "309366 07/07/2020 Cycle Inspection / Initial Inspection \n", "340857 07/07/2020 Cycle Inspection / Re-inspection \n", "358660 07/07/2020 Cycle Inspection / Initial Inspection \n", "393451 07/07/2020 Cycle Inspection / Initial Inspection \n", "\n", " community_board council_district census_tract bin \\\n", "53127 307.0 38.0 7600.0 3329902.0 \n", "61268 307.0 38.0 7600.0 3329902.0 \n", "20717 307.0 38.0 7600.0 3329902.0 \n", "4648 307.0 38.0 10000.0 3013942.0 \n", "224 307.0 38.0 10000.0 3013942.0 \n", "9465 307.0 38.0 7600.0 3329902.0 \n", "104512 307.0 38.0 10000.0 3013939.0 \n", "106728 307.0 38.0 10000.0 3013942.0 \n", "62748 307.0 38.0 10000.0 3013937.0 \n", "79211 307.0 38.0 10000.0 3013942.0 \n", "218545 307.0 38.0 10000.0 3013937.0 \n", "238663 307.0 38.0 10000.0 3013942.0 \n", "245205 307.0 38.0 10000.0 3013939.0 \n", "245233 307.0 38.0 10000.0 3013942.0 \n", "247417 307.0 38.0 10000.0 3013937.0 \n", "186874 307.0 38.0 10000.0 3013942.0 \n", "198104 307.0 38.0 10000.0 3013939.0 \n", "213425 307.0 38.0 10000.0 3013939.0 \n", "202363 307.0 38.0 10000.0 3013937.0 \n", "158059 307.0 38.0 10000.0 3013939.0 \n", "163672 307.0 38.0 7600.0 3329902.0 \n", "138508 307.0 38.0 10000.0 3013939.0 \n", "140940 307.0 38.0 7600.0 3329902.0 \n", "143157 307.0 38.0 10000.0 3013937.0 \n", "149548 307.0 38.0 7600.0 3329902.0 \n", "149742 307.0 38.0 10000.0 3013937.0 \n", "249994 307.0 38.0 10000.0 3013942.0 \n", "257603 307.0 38.0 7600.0 3329902.0 \n", "268823 307.0 38.0 10000.0 3013937.0 \n", "269521 307.0 38.0 7600.0 3329902.0 \n", "277500 307.0 38.0 10000.0 3013942.0 \n", "279503 307.0 38.0 10000.0 3013939.0 \n", "299863 307.0 38.0 7600.0 3329902.0 \n", "319787 307.0 38.0 10000.0 3013942.0 \n", "336570 307.0 38.0 10000.0 3013937.0 \n", "340551 307.0 38.0 10000.0 3013937.0 \n", "395508 307.0 38.0 7600.0 3329902.0 \n", "309366 307.0 38.0 10000.0 3013939.0 \n", "340857 307.0 38.0 10000.0 3013939.0 \n", "358660 307.0 38.0 10000.0 3013937.0 \n", "393451 307.0 38.0 10000.0 3013942.0 \n", "\n", " bbl nta location \n", "53127 3.008070e+09 BK32 40.643852716573,-74.011628212186 \n", "61268 3.008070e+09 BK32 40.643852716573,-74.011628212186 \n", "20717 3.008070e+09 BK32 40.643852716573,-74.011628212186 \n", "4648 3.008080e+09 BK34 40.643888405293005,-74.011563356969 \n", "224 3.008080e+09 BK34 40.643888405293005,-74.011563356969 \n", "9465 3.008070e+09 BK32 40.643852716573,-74.011628212186 \n", "104512 3.008080e+09 BK34 40.643849974348996,-74.01160298782 \n", "106728 3.008080e+09 BK34 40.643888405293005,-74.011563356969 \n", "62748 3.008080e+09 BK34 40.643800563168,-74.01165342693001 \n", "79211 3.008080e+09 BK34 40.643888405293005,-74.011563356969 \n", "218545 3.008080e+09 BK34 40.643800563168,-74.01165342693001 \n", "238663 3.008080e+09 BK34 40.643888405293005,-74.011563356969 \n", "245205 3.008080e+09 BK34 40.643849974348996,-74.01160298782 \n", "245233 3.008080e+09 BK34 40.643888405293005,-74.011563356969 \n", "247417 3.008080e+09 BK34 40.643800563168,-74.01165342693001 \n", "186874 3.008080e+09 BK34 40.643888405293005,-74.011563356969 \n", "198104 3.008080e+09 BK34 40.643849974348996,-74.01160298782 \n", "213425 3.008080e+09 BK34 40.643849974348996,-74.01160298782 \n", "202363 3.008080e+09 BK34 40.643800563168,-74.01165342693001 \n", "158059 3.008080e+09 BK34 40.643849974348996,-74.01160298782 \n", "163672 3.008070e+09 BK32 40.643852716573,-74.011628212186 \n", "138508 3.008080e+09 BK34 40.643849974348996,-74.01160298782 \n", "140940 3.008070e+09 BK32 40.643852716573,-74.011628212186 \n", "143157 3.008080e+09 BK34 40.643800563168,-74.01165342693001 \n", "149548 3.008070e+09 BK32 40.643852716573,-74.011628212186 \n", "149742 3.008080e+09 BK34 40.643800563168,-74.01165342693001 \n", "249994 3.008080e+09 BK34 40.643888405293005,-74.011563356969 \n", "257603 3.008070e+09 BK32 40.643852716573,-74.011628212186 \n", "268823 3.008080e+09 BK34 40.643800563168,-74.01165342693001 \n", "269521 3.008070e+09 BK32 40.643852716573,-74.011628212186 \n", "277500 3.008080e+09 BK34 40.643888405293005,-74.011563356969 \n", "279503 3.008080e+09 BK34 40.643849974348996,-74.01160298782 \n", "299863 3.008070e+09 BK32 40.643852716573,-74.011628212186 \n", "319787 3.008080e+09 BK34 40.643888405293005,-74.011563356969 \n", "336570 3.008080e+09 BK34 40.643800563168,-74.01165342693001 \n", "340551 3.008080e+09 BK34 40.643800563168,-74.01165342693001 \n", "395508 3.008070e+09 BK32 40.643852716573,-74.011628212186 \n", "309366 3.008080e+09 BK34 40.643849974348996,-74.01160298782 \n", "340857 3.008080e+09 BK34 40.643849974348996,-74.01160298782 \n", "358660 3.008080e+09 BK34 40.643800563168,-74.01165342693001 \n", "393451 3.008080e+09 BK34 40.643888405293005,-74.011563356969 \n", "\n", "[41 rows x 25 columns]" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# es_query() allows for the full Elasticsearch querying capabilities\n", "df.es_query({\n", " \"geo_distance\": {\n", " \"distance\": \"50m\",\n", " \"location\": {\n", " \"lat\": 40.643852716573,\n", " \"lon\": -74.011628212186\n", " }\n", " }\n", "})" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
camisdbaborobuildingstreetzipcodephonecuisine_descriptioninspection_dateaction...grade_daterecord_dateinspection_typecommunity_boardcouncil_districtcensus_tractbinbblntalocation
576550033781RED HOOK LOBSTER POUNDBrooklyn284VAN BRUNT STREET11231.07188587650Seafood04/19/2018Violations were cited in the following area(s)....04/19/201807/07/2020Cycle Inspection / Initial Inspection306.038.05900.03008365.03.005290e+09BK3340.67974632809,-74.010098611838
1237950058053RED HOT IIBrooklyn3497 AVENUE11215.07183692577Chinese05/17/2018Violations were cited in the following area(s)....05/17/201807/07/2020Cycle Inspection / Re-inspection306.039.015100.03026127.03.010940e+09BK3740.666194419994,-73.98214269199799
1297850059700RED POKEManhattan6009 AVENUE10036.02129748100Hawaiian03/21/2017Violations were cited in the following area(s)....03/21/201707/07/2020Pre-permit (Operational) / Re-inspection104.03.012100.01088997.01.010330e+09MN1540.758993434643,-73.992203122611
1675940365239DORRIAN'S RED HAND RESTAURANTManhattan16162 AVENUE10028.02127726660Irish11/08/2018Violations were cited in the following area(s)....11/08/201807/07/2020Cycle Inspection / Initial Inspection108.05.013800.01049947.01.015460e+09MN3240.776404966262,-73.952802065662
1862450095340RED PEONY CHINESE CUISINEManhattan24WEST 56 STREET10019.02123808883Chinese11/21/2019Violations were cited in the following area(s)....11/21/201907/07/2020Pre-permit (Operational) / Re-inspection105.04.010400.01034840.01.012710e+09MN1740.762699245064,-73.975463733228
..................................................................
39122950061162CODE REDBronx1320EAST GUN HILL ROAD10469.07188811808Caribbean05/14/2018Violations were cited in the following area(s)....05/14/201807/07/2020Cycle Inspection / Initial Inspection211.012.035000.02056100.02.045890e+09BX3140.871378316318996,-73.848028279305
39353150014078RED LOBSTERManhattan5TIMES SQ10036.02127306706Seafood11/08/2017Violations were cited in the following area(s)....11/08/201707/07/2020Cycle Inspection / Re-inspection105.03.011300.01024656.01.010130e+09MN1740.755702020307005,-73.987207980138
39617140368313RED FLAME DINERManhattan67WEST 44 STREET10036.02128693965American02/16/2018Violations were cited in the following area(s)....02/16/201807/07/2020Cycle Inspection / Initial Inspection105.04.09600.01034217.01.012600e+09MN1740.755627203336,-73.981938150269
39650150068499RED GINGERStaten Island1650RICHMOND AVENUE10314.07189828808Other09/19/2017Violations were cited in the following area(s)....09/19/201707/07/2020Pre-permit (Operational) / Initial Inspection502.050.029103.05037014.05.022360e+09SI0540.608078102502,-74.162260908042
39895050059700RED POKEManhattan6009 AVENUE10036.02129748100Hawaiian12/08/2017Violations were cited in the following area(s)....12/08/201707/07/2020Cycle Inspection / Re-inspection104.03.012100.01088997.01.010330e+09MN1540.758993434643,-73.992203122611
\n", "
\n", "

573 rows × 25 columns

" ], "text/plain": [ " camis dba boro building \\\n", "5765 50033781 RED HOOK LOBSTER POUND Brooklyn 284 \n", "12379 50058053 RED HOT II Brooklyn 349 \n", "12978 50059700 RED POKE Manhattan 600 \n", "16759 40365239 DORRIAN'S RED HAND RESTAURANT Manhattan 1616 \n", "18624 50095340 RED PEONY CHINESE CUISINE Manhattan 24 \n", "... ... ... ... ... \n", "391229 50061162 CODE RED Bronx 1320 \n", "393531 50014078 RED LOBSTER Manhattan 5 \n", "396171 40368313 RED FLAME DINER Manhattan 67 \n", "396501 50068499 RED GINGER Staten Island 1650 \n", "398950 50059700 RED POKE Manhattan 600 \n", "\n", " street zipcode phone cuisine_description \\\n", "5765 VAN BRUNT STREET 11231.0 7188587650 Seafood \n", "12379 7 AVENUE 11215.0 7183692577 Chinese \n", "12978 9 AVENUE 10036.0 2129748100 Hawaiian \n", "16759 2 AVENUE 10028.0 2127726660 Irish \n", "18624 WEST 56 STREET 10019.0 2123808883 Chinese \n", "... ... ... ... ... \n", "391229 EAST GUN HILL ROAD 10469.0 7188811808 Caribbean \n", "393531 TIMES SQ 10036.0 2127306706 Seafood \n", "396171 WEST 44 STREET 10036.0 2128693965 American \n", "396501 RICHMOND AVENUE 10314.0 7189828808 Other \n", "398950 9 AVENUE 10036.0 2129748100 Hawaiian \n", "\n", " inspection_date action ... \\\n", "5765 04/19/2018 Violations were cited in the following area(s). ... \n", "12379 05/17/2018 Violations were cited in the following area(s). ... \n", "12978 03/21/2017 Violations were cited in the following area(s). ... \n", "16759 11/08/2018 Violations were cited in the following area(s). ... \n", "18624 11/21/2019 Violations were cited in the following area(s). ... \n", "... ... ... ... \n", "391229 05/14/2018 Violations were cited in the following area(s). ... \n", "393531 11/08/2017 Violations were cited in the following area(s). ... \n", "396171 02/16/2018 Violations were cited in the following area(s). ... \n", "396501 09/19/2017 Violations were cited in the following area(s). ... \n", "398950 12/08/2017 Violations were cited in the following area(s). ... \n", "\n", " grade_date record_date inspection_type \\\n", "5765 04/19/2018 07/07/2020 Cycle Inspection / Initial Inspection \n", "12379 05/17/2018 07/07/2020 Cycle Inspection / Re-inspection \n", "12978 03/21/2017 07/07/2020 Pre-permit (Operational) / Re-inspection \n", "16759 11/08/2018 07/07/2020 Cycle Inspection / Initial Inspection \n", "18624 11/21/2019 07/07/2020 Pre-permit (Operational) / Re-inspection \n", "... ... ... ... \n", "391229 05/14/2018 07/07/2020 Cycle Inspection / Initial Inspection \n", "393531 11/08/2017 07/07/2020 Cycle Inspection / Re-inspection \n", "396171 02/16/2018 07/07/2020 Cycle Inspection / Initial Inspection \n", "396501 09/19/2017 07/07/2020 Pre-permit (Operational) / Initial Inspection \n", "398950 12/08/2017 07/07/2020 Cycle Inspection / Re-inspection \n", "\n", " community_board council_district census_tract bin \\\n", "5765 306.0 38.0 5900.0 3008365.0 \n", "12379 306.0 39.0 15100.0 3026127.0 \n", "12978 104.0 3.0 12100.0 1088997.0 \n", "16759 108.0 5.0 13800.0 1049947.0 \n", "18624 105.0 4.0 10400.0 1034840.0 \n", "... ... ... ... ... \n", "391229 211.0 12.0 35000.0 2056100.0 \n", "393531 105.0 3.0 11300.0 1024656.0 \n", "396171 105.0 4.0 9600.0 1034217.0 \n", "396501 502.0 50.0 29103.0 5037014.0 \n", "398950 104.0 3.0 12100.0 1088997.0 \n", "\n", " bbl nta location \n", "5765 3.005290e+09 BK33 40.67974632809,-74.010098611838 \n", "12379 3.010940e+09 BK37 40.666194419994,-73.98214269199799 \n", "12978 1.010330e+09 MN15 40.758993434643,-73.992203122611 \n", "16759 1.015460e+09 MN32 40.776404966262,-73.952802065662 \n", "18624 1.012710e+09 MN17 40.762699245064,-73.975463733228 \n", "... ... ... ... \n", "391229 2.045890e+09 BX31 40.871378316318996,-73.848028279305 \n", "393531 1.010130e+09 MN17 40.755702020307005,-73.987207980138 \n", "396171 1.012600e+09 MN17 40.755627203336,-73.981938150269 \n", "396501 5.022360e+09 SI05 40.608078102502,-74.162260908042 \n", "398950 1.010330e+09 MN15 40.758993434643,-73.992203122611 \n", "\n", "[573 rows x 25 columns]" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Full-text search example\n", "df.es_query({\"match\": {\"dba\": \"red\"}})" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 100 entries, 107677 to 96813\n", "Data columns (total 25 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 camis 100 non-null int64 \n", " 1 dba 100 non-null object \n", " 2 boro 100 non-null object \n", " 3 building 100 non-null object \n", " 4 street 100 non-null object \n", " 5 zipcode 100 non-null float64\n", " 6 phone 100 non-null object \n", " 7 cuisine_description 100 non-null object \n", " 8 inspection_date 100 non-null object \n", " 9 action 100 non-null object \n", " 10 violation_code 100 non-null object \n", " 11 violation_description 100 non-null object \n", " 12 critical_flag 100 non-null object \n", " 13 score 100 non-null float64\n", " 14 grade 100 non-null object \n", " 15 grade_date 100 non-null object \n", " 16 record_date 100 non-null object \n", " 17 inspection_type 100 non-null object \n", " 18 community_board 100 non-null float64\n", " 19 council_district 100 non-null float64\n", " 20 census_tract 100 non-null float64\n", " 21 bin 100 non-null float64\n", " 22 bbl 100 non-null float64\n", " 23 nta 100 non-null object \n", " 24 location 100 non-null object \n", "dtypes: float64(7), int64(1), object(17)\n", "memory usage: 20.3+ KB\n", "\n" ] } ], "source": [ "# Pull a subset of your data for building graphs / operations locally.\n", "sample_df = df[df.grade == \"B\"].sample(100).to_pandas()\n", "sample_df.info()\n", "print(type(sample_df))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Machine Learning Demo" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Feature Names: ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']\n", "Data example: [1.423e+01 1.710e+00 2.430e+00 1.560e+01 1.270e+02 2.800e+00 3.060e+00\n", " 2.800e-01 2.290e+00 5.640e+00 1.040e+00 3.920e+00 1.065e+03]\n", "[0 1 2]\n", "[0 1 2]\n" ] } ], "source": [ "# Import scikit-learn and train a dataset locally\n", "from sklearn import datasets\n", "from sklearn.tree import DecisionTreeClassifier\n", "\n", "# Train the data locally\n", "digits = datasets.load_wine()\n", "print(\"Feature Names:\", digits.feature_names)\n", "print(\"Data example:\", digits.data[0])\n", "\n", "# Save 10, 80, and 140 for testing our model\n", "data = [x for i, x in enumerate(digits.data) if i not in (10, 80, 140)]\n", "target = [x for i, x in enumerate(digits.target) if i not in (10, 80, 140)]\n", "\n", "sk_classifier = DecisionTreeClassifier()\n", "sk_classifier.fit(data, target)\n", "\n", "# Test out our model against the three targets\n", "print(sk_classifier.predict(digits.data[[10, 80, 140]]))\n", "print(digits.target[[10, 80, 140]])" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "POST https://167e473c7bba4bae85004385d4e0ce46.us-central1.gcp.cloud.es.io/_ingest/pipeline/_simulate [status:200 request:0.053s]\n", "> {\"pipeline\":{\"processors\":[{\"inference\":{\"model_id\":\"wine-classifier\",\"inference_config\":{\"classification\":{}},\"field_map\":{}}}]},\"docs\":[{\"_source\":{\"alcohol\":14.1,\"malic_acid\":2.16,\"ash\":2.3,\"alcalinity_of_ash\":18.0,\"magnesium\":105.0,\"total_phenols\":2.95,\"flavanoids\":3.32,\"nonflavanoid_phenols\":0.22,\"proanthocyanins\":2.38,\"color_intensity\":5.75,\"hue\":1.25,\"od280/od315_of_diluted_wines\":3.17,\"proline\":1510.0}},{\"_source\":{\"alcohol\":12.0,\"malic_acid\":0.92,\"ash\":2.0,\"alcalinity_of_ash\":19.0,\"magnesium\":86.0,\"total_phenols\":2.42,\"flavanoids\":2.26,\"nonflavanoid_phenols\":0.3,\"proanthocyanins\":1.43,\"color_intensity\":2.5,\"hue\":1.38,\"od280/od315_of_diluted_wines\":3.12,\"proline\":278.0}},{\"_source\":{\"alcohol\":12.93,\"malic_acid\":2.81,\"ash\":2.7,\"alcalinity_of_ash\":21.0,\"magnesium\":96.0,\"total_phenols\":1.54,\"flavanoids\":0.5,\"nonflavanoid_phenols\":0.53,\"proanthocyanins\":0.75,\"color_intensity\":4.6,\"hue\":0.77,\"od280/od315_of_diluted_wines\":2.31,\"proline\":600.0}}]}\n", "< {\"docs\":[{\"doc\":{\"_index\":\"_index\",\"_type\":\"_doc\",\"_id\":\"_id\",\"_source\":{\"alcohol\":14.1,\"alcalinity_of_ash\":18.0,\"proanthocyanins\":2.38,\"od280/od315_of_diluted_wines\":3.17,\"total_phenols\":2.95,\"magnesium\":105.0,\"flavanoids\":3.32,\"proline\":1510.0,\"malic_acid\":2.16,\"ash\":2.3,\"nonflavanoid_phenols\":0.22,\"hue\":1.25,\"color_intensity\":5.75,\"ml\":{\"inference\":{\"predicted_value\":\"0\",\"model_id\":\"wine-classifier\"}}},\"_ingest\":{\"timestamp\":\"2020-07-08T15:35:49.98965Z\"}}},{\"doc\":{\"_index\":\"_index\",\"_type\":\"_doc\",\"_id\":\"_id\",\"_source\":{\"alcohol\":12.0,\"alcalinity_of_ash\":19.0,\"proanthocyanins\":1.43,\"od280/od315_of_diluted_wines\":3.12,\"total_phenols\":2.42,\"magnesium\":86.0,\"flavanoids\":2.26,\"proline\":278.0,\"malic_acid\":0.92,\"ash\":2.0,\"nonflavanoid_phenols\":0.3,\"hue\":1.38,\"color_intensity\":2.5,\"ml\":{\"inference\":{\"predicted_value\":\"1\",\"model_id\":\"wine-classifier\"}}},\"_ingest\":{\"timestamp\":\"2020-07-08T15:35:49.98966Z\"}}},{\"doc\":{\"_index\":\"_index\",\"_type\":\"_doc\",\"_id\":\"_id\",\"_source\":{\"alcohol\":12.93,\"alcalinity_of_ash\":21.0,\"proanthocyanins\":0.75,\"od280/od315_of_diluted_wines\":2.31,\"total_phenols\":1.54,\"magnesium\":96.0,\"flavanoids\":0.5,\"proline\":600.0,\"malic_acid\":2.81,\"ash\":2.7,\"nonflavanoid_phenols\":0.53,\"hue\":0.77,\"color_intensity\":4.6,\"ml\":{\"inference\":{\"predicted_value\":\"2\",\"model_id\":\"wine-classifier\"}}},\"_ingest\":{\"timestamp\":\"2020-07-08T15:35:49.989672Z\"}}}]}\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[0 1 2]\n", "[0 1 2]\n" ] } ], "source": [ "from eland.ml import MLModel\n", "\n", "# Serialize the scikit-learn model into Elasticsearch\n", "ed_classifier = MLModel.import_model(\n", " es_client=es,\n", " model_id=\"wine-classifier\",\n", " model=sk_classifier,\n", " feature_names=digits.feature_names,\n", " overwrite=True\n", ")\n", "\n", "# Capture the Elasticsearch API call w/ logging\n", "import logging\n", "logger = logging.getLogger(\"elasticsearch\")\n", "logger.setLevel(logging.DEBUG)\n", "logger.addHandler(logging.StreamHandler())\n", "\n", "# Use the same data as before, but now with the model in Elasticsearch\n", "print(ed_classifier.predict(digits.data[[10, 80, 140]].tolist()))\n", "print(digits.target[[10, 80, 140]])\n", "\n", "logger.handlers = []" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"docs\": [\n", " {\n", " \"_source\": {\n", " \"alcalinity_of_ash\": 18.0,\n", " \"alcohol\": 14.1,\n", " \"ash\": 2.3,\n", " \"color_intensity\": 5.75,\n", " \"flavanoids\": 3.32,\n", " \"hue\": 1.25,\n", " \"magnesium\": 105.0,\n", " \"malic_acid\": 2.16,\n", " \"nonflavanoid_phenols\": 0.22,\n", " \"od280/od315_of_diluted_wines\": 3.17,\n", " \"proanthocyanins\": 2.38,\n", " \"proline\": 1510.0,\n", " \"total_phenols\": 2.95\n", " }\n", " },\n", " {\n", " \"_source\": {\n", " \"alcalinity_of_ash\": 19.0,\n", " \"alcohol\": 12.0,\n", " \"ash\": 2.0,\n", " \"color_intensity\": 2.5,\n", " \"flavanoids\": 2.26,\n", " \"hue\": 1.38,\n", " \"magnesium\": 86.0,\n", " \"malic_acid\": 0.92,\n", " \"nonflavanoid_phenols\": 0.3,\n", " \"od280/od315_of_diluted_wines\": 3.12,\n", " \"proanthocyanins\": 1.43,\n", " \"proline\": 278.0,\n", " \"total_phenols\": 2.42\n", " }\n", " },\n", " {\n", " \"_source\": {\n", " \"alcalinity_of_ash\": 21.0,\n", " \"alcohol\": 12.93,\n", " \"ash\": 2.7,\n", " \"color_intensity\": 4.6,\n", " \"flavanoids\": 0.5,\n", " \"hue\": 0.77,\n", " \"magnesium\": 96.0,\n", " \"malic_acid\": 2.81,\n", " \"nonflavanoid_phenols\": 0.53,\n", " \"od280/od315_of_diluted_wines\": 2.31,\n", " \"proanthocyanins\": 0.75,\n", " \"proline\": 600.0,\n", " \"total_phenols\": 1.54\n", " }\n", " }\n", " ],\n", " \"pipeline\": {\n", " \"processors\": [\n", " {\n", " \"inference\": {\n", " \"field_map\": {},\n", " \"inference_config\": {\n", " \"classification\": {}\n", " },\n", " \"model_id\": \"wine-classifier\"\n", " }\n", " }\n", " ]\n", " }\n", "}\n" ] } ], "source": [ "json({\"pipeline\":{\"processors\":[{\"inference\":{\"model_id\":\"wine-classifier\",\"inference_config\":{\"classification\":{}},\"field_map\":{}}}]},\"docs\":[{\"_source\":{\"alcohol\":14.1,\"malic_acid\":2.16,\"ash\":2.3,\"alcalinity_of_ash\":18.0,\"magnesium\":105.0,\"total_phenols\":2.95,\"flavanoids\":3.32,\"nonflavanoid_phenols\":0.22,\"proanthocyanins\":2.38,\"color_intensity\":5.75,\"hue\":1.25,\"od280/od315_of_diluted_wines\":3.17,\"proline\":1510.0}},{\"_source\":{\"alcohol\":12.0,\"malic_acid\":0.92,\"ash\":2.0,\"alcalinity_of_ash\":19.0,\"magnesium\":86.0,\"total_phenols\":2.42,\"flavanoids\":2.26,\"nonflavanoid_phenols\":0.3,\"proanthocyanins\":1.43,\"color_intensity\":2.5,\"hue\":1.38,\"od280/od315_of_diluted_wines\":3.12,\"proline\":278.0}},{\"_source\":{\"alcohol\":12.93,\"malic_acid\":2.81,\"ash\":2.7,\"alcalinity_of_ash\":21.0,\"magnesium\":96.0,\"total_phenols\":1.54,\"flavanoids\":0.5,\"nonflavanoid_phenols\":0.53,\"proanthocyanins\":0.75,\"color_intensity\":4.6,\"hue\":0.77,\"od280/od315_of_diluted_wines\":2.31,\"proline\":600.0}}]})" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"docs\": [\n", " {\n", " \"doc\": {\n", " \"_id\": \"_id\",\n", " \"_index\": \"_index\",\n", " \"_ingest\": {\n", " \"timestamp\": \"2020-07-08T15:35:49.98965Z\"\n", " },\n", " \"_source\": {\n", " \"alcalinity_of_ash\": 18.0,\n", " \"alcohol\": 14.1,\n", " \"ash\": 2.3,\n", " \"color_intensity\": 5.75,\n", " \"flavanoids\": 3.32,\n", " \"hue\": 1.25,\n", " \"magnesium\": 105.0,\n", " \"malic_acid\": 2.16,\n", " \"ml\": {\n", " \"inference\": {\n", " \"model_id\": \"wine-classifier\",\n", " \"predicted_value\": \"0\"\n", " }\n", " },\n", " \"nonflavanoid_phenols\": 0.22,\n", " \"od280/od315_of_diluted_wines\": 3.17,\n", " \"proanthocyanins\": 2.38,\n", " \"proline\": 1510.0,\n", " \"total_phenols\": 2.95\n", " },\n", " \"_type\": \"_doc\"\n", " }\n", " },\n", " {\n", " \"doc\": {\n", " \"_id\": \"_id\",\n", " \"_index\": \"_index\",\n", " \"_ingest\": {\n", " \"timestamp\": \"2020-07-08T15:35:49.98966Z\"\n", " },\n", " \"_source\": {\n", " \"alcalinity_of_ash\": 19.0,\n", " \"alcohol\": 12.0,\n", " \"ash\": 2.0,\n", " \"color_intensity\": 2.5,\n", " \"flavanoids\": 2.26,\n", " \"hue\": 1.38,\n", " \"magnesium\": 86.0,\n", " \"malic_acid\": 0.92,\n", " \"ml\": {\n", " \"inference\": {\n", " \"model_id\": \"wine-classifier\",\n", " \"predicted_value\": \"1\"\n", " }\n", " },\n", " \"nonflavanoid_phenols\": 0.3,\n", " \"od280/od315_of_diluted_wines\": 3.12,\n", " \"proanthocyanins\": 1.43,\n", " \"proline\": 278.0,\n", " \"total_phenols\": 2.42\n", " },\n", " \"_type\": \"_doc\"\n", " }\n", " },\n", " {\n", " \"doc\": {\n", " \"_id\": \"_id\",\n", " \"_index\": \"_index\",\n", " \"_ingest\": {\n", " \"timestamp\": \"2020-07-08T15:35:49.989672Z\"\n", " },\n", " \"_source\": {\n", " \"alcalinity_of_ash\": 21.0,\n", " \"alcohol\": 12.93,\n", " \"ash\": 2.7,\n", " \"color_intensity\": 4.6,\n", " \"flavanoids\": 0.5,\n", " \"hue\": 0.77,\n", " \"magnesium\": 96.0,\n", " \"malic_acid\": 2.81,\n", " \"ml\": {\n", " \"inference\": {\n", " \"model_id\": \"wine-classifier\",\n", " \"predicted_value\": \"2\"\n", " }\n", " },\n", " \"nonflavanoid_phenols\": 0.53,\n", " \"od280/od315_of_diluted_wines\": 2.31,\n", " \"proanthocyanins\": 0.75,\n", " \"proline\": 600.0,\n", " \"total_phenols\": 1.54\n", " },\n", " \"_type\": \"_doc\"\n", " }\n", " }\n", " ]\n", "}\n" ] } ], "source": [ "json({\"docs\":[{\"doc\":{\"_index\":\"_index\",\"_type\":\"_doc\",\"_id\":\"_id\",\"_source\":{\"alcohol\":14.1,\"alcalinity_of_ash\":18.0,\"proanthocyanins\":2.38,\"od280/od315_of_diluted_wines\":3.17,\"total_phenols\":2.95,\"magnesium\":105.0,\"flavanoids\":3.32,\"proline\":1510.0,\"malic_acid\":2.16,\"ash\":2.3,\"nonflavanoid_phenols\":0.22,\"hue\":1.25,\"color_intensity\":5.75,\"ml\":{\"inference\":{\"predicted_value\":\"0\",\"model_id\":\"wine-classifier\"}}},\"_ingest\":{\"timestamp\":\"2020-07-08T15:35:49.98965Z\"}}},{\"doc\":{\"_index\":\"_index\",\"_type\":\"_doc\",\"_id\":\"_id\",\"_source\":{\"alcohol\":12.0,\"alcalinity_of_ash\":19.0,\"proanthocyanins\":1.43,\"od280/od315_of_diluted_wines\":3.12,\"total_phenols\":2.42,\"magnesium\":86.0,\"flavanoids\":2.26,\"proline\":278.0,\"malic_acid\":0.92,\"ash\":2.0,\"nonflavanoid_phenols\":0.3,\"hue\":1.38,\"color_intensity\":2.5,\"ml\":{\"inference\":{\"predicted_value\":\"1\",\"model_id\":\"wine-classifier\"}}},\"_ingest\":{\"timestamp\":\"2020-07-08T15:35:49.98966Z\"}}},{\"doc\":{\"_index\":\"_index\",\"_type\":\"_doc\",\"_id\":\"_id\",\"_source\":{\"alcohol\":12.93,\"alcalinity_of_ash\":21.0,\"proanthocyanins\":0.75,\"od280/od315_of_diluted_wines\":2.31,\"total_phenols\":1.54,\"magnesium\":96.0,\"flavanoids\":0.5,\"proline\":600.0,\"malic_acid\":2.81,\"ash\":2.7,\"nonflavanoid_phenols\":0.53,\"hue\":0.77,\"color_intensity\":4.6,\"ml\":{\"inference\":{\"predicted_value\":\"2\",\"model_id\":\"wine-classifier\"}}},\"_ingest\":{\"timestamp\":\"2020-07-08T15:35:49.989672Z\"}}}]})" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "es_index_pattern: nyc-restaurants\n", "Index:\n", " es_index_field: _id\n", " is_source_field: False\n", "Mappings:\n", " capabilities:\n", " es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name\n", "camis camis True long None int64 True True False camis\n", "dba dba True text None object True False False None\n", "boro boro True keyword None object True True False boro\n", "building building True keyword None object True True False building\n", "street street True keyword None object True True False street\n", "zipcode zipcode True short None int64 True True False zipcode\n", "phone phone True keyword None object True True False phone\n", "cuisine_description cuisine_description True keyword None object True True False cuisine_description\n", "inspection_date inspection_date True keyword None object True True False inspection_date\n", "action action True keyword None object True True False action\n", "violation_code violation_code True keyword None object True True False violation_code\n", "violation_description violation_description True keyword None object True True False violation_description\n", "critical_flag critical_flag True keyword None object True True False critical_flag\n", "score score True double None float64 True True False score\n", "grade grade True keyword None object True True False grade\n", "grade_date grade_date True keyword None object True True False grade_date\n", "record_date record_date True keyword None object True True False record_date\n", "inspection_type inspection_type True keyword None object True True False inspection_type\n", "community_board community_board True double None float64 True True False community_board\n", "council_district council_district True double None float64 True True False council_district\n", "census_tract census_tract True double None float64 True True False census_tract\n", "bin bin True double None float64 True True False bin\n", "bbl bbl True double None float64 True True False bbl\n", "nta nta True keyword None object True True False nta\n", "location location True geo_point None object True True False location\n", "Operations:\n", " tasks: [('boolean_filter': ('boolean_filter': {'script': {'script': {'source': \"doc['zipcode'].value > doc['score'].value\", 'lang': 'painless'}}}))]\n", " size: None\n", " sort_params: None\n", " _source: ['camis', 'dba', 'boro', 'building', 'street', 'zipcode', 'phone', 'cuisine_description', 'inspection_date', 'action', 'violation_code', 'violation_description', 'critical_flag', 'score', 'grade', 'grade_date', 'record_date', 'inspection_type', 'community_board', 'council_district', 'census_tract', 'bin', 'bbl', 'nta', 'location']\n", " body: {'query': {'script': {'script': {'source': \"doc['zipcode'].value > doc['score'].value\", 'lang': 'painless'}}}}\n", " post_processing: []\n", "\n" ] } ], "source": [ "print(df[df[\"zipcode\"] > df[\"score\"]].es_info())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }