From 360db10cde6b27e3a41b62ab84cc4164084c0a42 Mon Sep 17 00:00:00 2001 From: Ryan Holbrook Date: Tue, 3 Jan 2023 21:34:21 +0000 Subject: [PATCH 01/55] Change imgur images links to Google hosted --- notebooks/computer_vision/raw/ex1.ipynb | 2 +- notebooks/computer_vision/raw/ex4.ipynb | 2 +- notebooks/computer_vision/raw/ex5.ipynb | 2 +- notebooks/computer_vision/raw/ex6.ipynb | 4 +-- notebooks/computer_vision/raw/ex_tpus.ipynb | 12 ++++---- notebooks/computer_vision/raw/tut1.ipynb | 6 ++-- notebooks/computer_vision/raw/tut2.ipynb | 12 ++++---- notebooks/computer_vision/raw/tut3.ipynb | 8 +++--- notebooks/computer_vision/raw/tut4.ipynb | 6 ++-- notebooks/computer_vision/raw/tut5.ipynb | 8 +++--- notebooks/computer_vision/raw/tut6.ipynb | 4 +-- notebooks/data_viz_to_coder/raw/ex1.ipynb | 2 +- notebooks/data_viz_to_coder/raw/ex2.ipynb | 4 +-- notebooks/data_viz_to_coder/raw/ex3.ipynb | 2 +- notebooks/data_viz_to_coder/raw/ex5.ipynb | 2 +- notebooks/data_viz_to_coder/raw/ex7.ipynb | 8 +++--- notebooks/data_viz_to_coder/raw/tut1.ipynb | 8 +++--- notebooks/data_viz_to_coder/raw/tut2.ipynb | 2 +- notebooks/data_viz_to_coder/raw/tut3.ipynb | 2 +- notebooks/data_viz_to_coder/raw/tut4.ipynb | 2 +- notebooks/data_viz_to_coder/raw/tut5.ipynb | 2 +- notebooks/data_viz_to_coder/raw/tut6.ipynb | 2 +- notebooks/data_viz_to_coder/raw/tut7.ipynb | 6 ++-- notebooks/data_viz_to_coder/raw/tut8.ipynb | 4 +-- .../deep_learning/raw/ex1_convolutions.ipynb | 2 +- .../deep_learning/raw/ex7_from_scratch.ipynb | 2 +- notebooks/deep_learning/raw/ex_tpus.ipynb | 12 ++++---- notebooks/deep_learning_intro/raw/ex6.ipynb | 2 +- notebooks/deep_learning_intro/raw/tut1.ipynb | 6 ++-- notebooks/deep_learning_intro/raw/tut2.ipynb | 10 +++---- notebooks/deep_learning_intro/raw/tut3.ipynb | 4 +-- notebooks/deep_learning_intro/raw/tut4.ipynb | 6 ++-- notebooks/deep_learning_intro/raw/tut5.ipynb | 2 +- notebooks/deep_learning_intro/raw/tut6.ipynb | 4 +-- notebooks/embeddings/raw/1-embeddings.ipynb | 2 +- .../embeddings/raw/2-factorization.ipynb | 4 +-- notebooks/ethics/raw/tut3.ipynb | 2 +- notebooks/ethics/raw/tut4.ipynb | 8 +++--- notebooks/feature_engineering/raw/ex1.ipynb | 2 +- .../feature_engineering_new/raw/tut1.ipynb | 4 +-- .../feature_engineering_new/raw/tut2.ipynb | 4 +-- .../feature_engineering_new/raw/tut4.ipynb | 8 +++--- .../feature_engineering_new/raw/tut5.ipynb | 6 ++-- .../feature_engineering_new/raw/tut6.ipynb | 2 +- notebooks/game_ai/raw/ex2.ipynb | 6 ++-- notebooks/game_ai/raw/ex3.ipynb | 10 +++---- notebooks/game_ai/raw/ex4.ipynb | 6 ++-- notebooks/game_ai/raw/tut1.ipynb | 6 ++-- notebooks/game_ai/raw/tut2.ipynb | 6 ++-- notebooks/game_ai/raw/tut3.ipynb | 10 +++---- notebooks/game_ai/raw/tut4.ipynb | 2 +- notebooks/game_ai/raw/tut_halite.ipynb | 14 +++++----- notebooks/geospatial/raw/ex1.ipynb | 2 +- notebooks/geospatial/raw/ex2.ipynb | 2 +- notebooks/geospatial/raw/ex3.ipynb | 2 +- notebooks/geospatial/raw/ex4.ipynb | 2 +- notebooks/geospatial/raw/ex5.ipynb | 2 +- notebooks/geospatial/raw/tut1.ipynb | 4 +-- notebooks/geospatial/raw/tut2.ipynb | 2 +- notebooks/geospatial/raw/tut4.ipynb | 2 +- .../deprecated/ex4_old.ipynb | 12 ++++---- notebooks/intro_to_programming/raw/ex4.ipynb | 12 ++++---- notebooks/intro_to_programming/raw/tut2.ipynb | 4 +-- notebooks/machine_learning/raw/ex7.ipynb | 2 +- .../machine_learning/raw/ex_automl.ipynb | 8 +++--- notebooks/machine_learning/raw/tut1.ipynb | 6 ++-- notebooks/machine_learning/raw/tut5.ipynb | 4 +-- notebooks/machine_learning/raw/tut7.ipynb | 2 +- notebooks/machine_learning/raw/tut8.ipynb | 2 +- .../machine_learning/raw/tut_automl.ipynb | 4 +-- .../machine_learning/raw/tut_titanic.ipynb | 28 +++++++++---------- notebooks/microchallenges/raw/tut2.ipynb | 2 +- .../raw/ex5_shap_advanced.ipynb | 2 +- .../raw/tut2_perm_importance.ipynb | 4 +-- .../raw/tut4_shap_basic.ipynb | 4 +-- .../raw/tut5_shap_advanced.ipynb | 12 ++++---- notebooks/ml_intermediate/raw/ex1.ipynb | 2 +- notebooks/ml_intermediate/raw/ex2.ipynb | 4 +-- notebooks/ml_intermediate/raw/ex3.ipynb | 2 +- notebooks/ml_intermediate/raw/ex4.ipynb | 2 +- notebooks/ml_intermediate/raw/ex5.ipynb | 2 +- notebooks/ml_intermediate/raw/ex6.ipynb | 2 +- notebooks/ml_intermediate/raw/tut2.ipynb | 6 ++-- notebooks/ml_intermediate/raw/tut3.ipynb | 4 +-- notebooks/ml_intermediate/raw/tut5.ipynb | 2 +- notebooks/ml_intermediate/raw/tut6.ipynb | 2 +- notebooks/ml_intermediate/raw/tut7.ipynb | 2 +- notebooks/nlp/raw/ex1.ipynb | 2 +- notebooks/pandas/raw/ex_0.ipynb | 6 ++-- notebooks/pandas/raw/ex_1.ipynb | 4 +-- notebooks/sql/raw/ex6.ipynb | 2 +- notebooks/sql/raw/tut1.ipynb | 2 +- notebooks/sql/raw/tut2.ipynb | 6 ++-- notebooks/sql/raw/tut3.ipynb | 8 +++--- notebooks/sql/raw/tut4.ipynb | 14 +++++----- notebooks/sql/raw/tut5.ipynb | 10 +++---- notebooks/sql/raw/tut6.ipynb | 8 +++--- notebooks/sql_advanced/raw/ex1.ipynb | 4 +-- notebooks/sql_advanced/raw/ex2.ipynb | 2 +- notebooks/sql_advanced/raw/ex3.ipynb | 4 +-- notebooks/sql_advanced/raw/ex4.ipynb | 2 +- notebooks/sql_advanced/raw/tut1.ipynb | 10 +++---- notebooks/sql_advanced/raw/tut2.ipynb | 4 +-- notebooks/sql_advanced/raw/tut3.ipynb | 20 ++++++------- notebooks/sql_advanced/raw/tut4.ipynb | 6 ++-- notebooks/time_series/raw/tut2.ipynb | 6 ++-- notebooks/time_series/raw/tut3.ipynb | 14 +++++----- notebooks/time_series/raw/tut4.ipynb | 10 +++---- notebooks/time_series/raw/tut5.ipynb | 8 +++--- notebooks/time_series/raw/tut6.ipynb | 10 +++---- 110 files changed, 291 insertions(+), 291 deletions(-) diff --git a/notebooks/computer_vision/raw/ex1.ipynb b/notebooks/computer_vision/raw/ex1.ipynb index 8c526b3a2..0d9be6e8f 100644 --- a/notebooks/computer_vision/raw/ex1.ipynb +++ b/notebooks/computer_vision/raw/ex1.ipynb @@ -199,7 +199,7 @@ "Now that the base is defined to do the feature extraction, create a head of `Dense` layers to perform the classification, following this diagram:\n", "\n", "
\n", - "\"Diagram\n", + "\"Diagram\n", "
\n" ] }, diff --git a/notebooks/computer_vision/raw/ex4.ipynb b/notebooks/computer_vision/raw/ex4.ipynb index 72488540e..2479444ca 100644 --- a/notebooks/computer_vision/raw/ex4.ipynb +++ b/notebooks/computer_vision/raw/ex4.ipynb @@ -128,7 +128,7 @@ "What happens if you add another convolutional layer with $3 \\times 3$ kernels? Consider this next illustration:\n", "\n", "
\n", - "\"Illustration\n", + "\"Illustration\n", "
\n", "\n", "Now trace back the connections from the neuron at top and you can see that it's connected to a $5 \\times 5$ patch of pixels in the input (the bottom layer): each neuron in the $3 \\times 3$ patch in the middle layer is connected to a $3 \\times 3$ input patch, but they overlap in a $5 \\times 5$ patch. So that neuron at top has a $5 \\times 5$ receptive field." diff --git a/notebooks/computer_vision/raw/ex5.ipynb b/notebooks/computer_vision/raw/ex5.ipynb index 99a054aa4..b38b28821 100644 --- a/notebooks/computer_vision/raw/ex5.ipynb +++ b/notebooks/computer_vision/raw/ex5.ipynb @@ -99,7 +99,7 @@ "\n", "
\n", "\n", - "\"Diagram\n", + "\"Diagram\n", "
\n", "\n", "# 1) Define Model #\n", diff --git a/notebooks/computer_vision/raw/ex6.ipynb b/notebooks/computer_vision/raw/ex6.ipynb index b92c6d1ba..55b7756f9 100644 --- a/notebooks/computer_vision/raw/ex6.ipynb +++ b/notebooks/computer_vision/raw/ex6.ipynb @@ -151,7 +151,7 @@ "The [EuroSAT](https://www.kaggle.com/ryanholbrook/eurosat) dataset consists of satellite images of the Earth classified by geographic feature. Below are a number of images from this dataset.\n", "\n", "
\n", - "\"Sixteen\n", + "\"Sixteen\n", "
" ] }, @@ -194,7 +194,7 @@ "The [TensorFlow Flowers](https://www.kaggle.com/ryanholbrook/tensorflow-flowers) dataset consists of photographs of flowers of several species. Below is a sample.\n", "\n", "
\n", - "\"Sixteen\n", + "\"Sixteen\n", "
" ] }, diff --git a/notebooks/computer_vision/raw/ex_tpus.ipynb b/notebooks/computer_vision/raw/ex_tpus.ipynb index 16f368e56..55f242743 100644 --- a/notebooks/computer_vision/raw/ex_tpus.ipynb +++ b/notebooks/computer_vision/raw/ex_tpus.ipynb @@ -43,25 +43,25 @@ "First, click on the **Save Version** button in the upper right.\n", "\n", "
\n", - "\"The\n", + "\"The\n", "
\n", "\n", "Choose **Advanced Settings**.\n", "\n", "
\n", - "\"Advanced\n", + "\"Advanced\n", "
\n", "\n", "Select **Run with TPU for this session** from the dropdown menu and click the blue **Save** button.\n", "\n", "
\n", - "\"The\n", + "\"The\n", "
\n", "\n", "Select **Save & Run All (Commit)** and click the blue **Save** button.\n", "\n", "
\n", - "\"The\n", + "\"The\n", "
\n", "\n", "The commit may take a while to finish (about 10-15 min), but there's no harm in doing something else while it's running and coming back later.\n", @@ -73,13 +73,13 @@ "Now you're ready to make a submission! Click on the **Output** heading in the menu to the right of the notebook.\n", "\n", "
\n", - "\"The\n", + "\"The\n", "
\n", "\n", "And finally you'll submit the predictions! Just look for the blue **Submit** button. After clicking it, you should shortly be on the leaderboard!\n", "\n", "
\n", - "\"The\n", + "\"The\n", "
\n", "\n" ] diff --git a/notebooks/computer_vision/raw/tut1.ipynb b/notebooks/computer_vision/raw/tut1.ipynb index 95a5d46a2..7d1534031 100644 --- a/notebooks/computer_vision/raw/tut1.ipynb +++ b/notebooks/computer_vision/raw/tut1.ipynb @@ -45,7 +45,7 @@ "\n", "
\n", "\n", - "\"The\n", + "\"The\n", "
\n", "\n", "The base is used to **extract the features** from an image. It is formed primarily of layers performing the convolution operation, but often includes other kinds of layers as well. (You'll learn about these in the next lesson.)\n", @@ -58,7 +58,7 @@ "\n", "
\n", "\n", - "\"The\n", + "\"The\n", "
\n", "\n", "The features actually extracted look a bit different, but it gives the idea." @@ -78,7 +78,7 @@ "\n", "
\n", "\n", - "\"Attaching\n", + "\"Attaching\n", "
\n", "\n", "Because the head usually consists of only a few dense layers, very accurate classifiers can be created from relatively little data. \n", diff --git a/notebooks/computer_vision/raw/tut2.ipynb b/notebooks/computer_vision/raw/tut2.ipynb index 9b846e687..4557e949f 100644 --- a/notebooks/computer_vision/raw/tut2.ipynb +++ b/notebooks/computer_vision/raw/tut2.ipynb @@ -69,7 +69,7 @@ "\n", "
\n", "\n", - "\"An\n", + "\"An\n", "
The three steps of feature extraction.
\n", "
\n", "\n", @@ -107,14 +107,14 @@ "\n", "
\n", "\n", - "\"A\n", + "\"A\n", "
\n", "\n", "A kernel operates by scanning over an image and producing a *weighted sum* of pixel values. In this way, a kernel will act sort of like a polarized lens, emphasizing or deemphasizing certain patterns of information.\n", "\n", "
\n", "\n", - "\"A\n", + "\"A\n", "
A kernel acts as a kind of lens.
\n", "
\n", "\n", @@ -129,7 +129,7 @@ "The **activations** in the network we call **feature maps**. They are what result when we apply a filter to an image; they contain the visual features the kernel extracts. Here are a few kernels pictured with feature maps they produced.\n", "\n", "
\n", - "\"Three
Kernels and features.
\n", + "\"Three
Kernels and features.
\n", "
\n", "\n", "From the pattern of numbers in the kernel, you can tell the kinds of feature maps it creates. Generally, what a convolution accentuates in its inputs will match the shape of the *positive* numbers in the kernel. The left and middle kernels above will both filter for horizontal shapes.\n", @@ -142,7 +142,7 @@ "\n", "
\n", "\n", - "\"Graph\n", + "\"Graph\n", "
The graph of the rectifier function looks like a line with the negative part \"rectified\" to 0.
\n", "
\n", "\n", @@ -173,7 +173,7 @@ "\n", "
\n", "\n", - "\"ReLU\n", + "\"ReLU\n", "
\n", "\n", "Like other activation functions, the ReLU function is **nonlinear**. Essentially this means that the total effect of all the layers in the network becomes different than what you would get by just adding the effects together -- which would be the same as what you could achieve with only a single layer. The nonlinearity ensures features will combine in interesting ways as they move deeper into the network. (We'll explore this \"feature compounding\" more in Lesson 5.)\n", diff --git a/notebooks/computer_vision/raw/tut3.ipynb b/notebooks/computer_vision/raw/tut3.ipynb index 7d27bc509..b3e572a14 100644 --- a/notebooks/computer_vision/raw/tut3.ipynb +++ b/notebooks/computer_vision/raw/tut3.ipynb @@ -42,7 +42,7 @@ "\n", "
\n", "\n", - "\"An\n", + "\"An\n", "
\n", "\n", "Notice that after applying the ReLU function (**Detect**) the feature map ends up with a lot of \"dead space,\" that is, large areas containing only 0's (the black areas in the image). Having to carry these 0 activations through the entire network would increase the size of the model without adding much useful information. Instead, we would like to *condense* the feature map to retain only the most useful part -- the feature itself.\n", @@ -51,7 +51,7 @@ "\n", "
\n", "\n", - "\"Maximum\n", + "\"Maximum\n", "
\n", "\n", "When applied after the ReLU activation, it has the effect of \"intensifying\" features. The pooling step increases the proportion of active pixels to zero pixels.\n", @@ -168,7 +168,7 @@ "\n", "
\n", "\n", - "\"Pooling\n", + "\"Pooling\n", "
\n", "\n", "The two dots in the original image became indistinguishable after repeated pooling. In other words, pooling destroyed some of their positional information. Since the network can no longer distinguish between them in the feature maps, it can't distinguish them in the original image either: it has become *invariant* to that difference in position.\n", @@ -177,7 +177,7 @@ "\n", "
\n", "\n", - "\"But\n", + "\"But\n", "
\n", "\n", "This invariance to small differences in the positions of features is a nice property for an image classifier to have. Just because of differences in perspective or framing, the same kind of feature might be positioned in various parts of the original image, but we would still like for the classifier to recognize that they are the same. Because this invariance is *built into* the network, we can get away with using much less data for training: we no longer have to teach it to ignore that difference. This gives convolutional networks a big efficiency advantage over a network with only dense layers. (You'll see another way to get invariance for free in **Lesson 6** with **Data Augmentation**!)\n", diff --git a/notebooks/computer_vision/raw/tut4.ipynb b/notebooks/computer_vision/raw/tut4.ipynb index bfd1f432d..4741d453f 100644 --- a/notebooks/computer_vision/raw/tut4.ipynb +++ b/notebooks/computer_vision/raw/tut4.ipynb @@ -124,7 +124,7 @@ "The convolution and pooling operations share a common feature: they are both performed over a **sliding window**. With convolution, this \"window\" is given by the dimensions of the kernel, the parameter `kernel_size`. With pooling, it is the pooling window, given by `pool_size`.\n", "\n", "
\n", - "\"A\n", + "\"A\n", "
\n", "\n", "There are two additional parameters affecting both convolution and pooling layers -- these are the `strides` of the window and whether to use `padding` at the image edges. The `strides` parameter says how far the window should move at each step, and the `padding` parameter describes how we handle the pixels at the edges of the input.\n", @@ -163,7 +163,7 @@ "The distance the window moves at each step is called the **stride**. We need to specify the stride in both dimensions of the image: one for moving left to right and one for moving top to bottom. This animation shows `strides=(2, 2)`, a movement of 2 pixels each step.\n", "\n", "
\n", - "\"Sliding\n", + "\"Sliding\n", "
\n", "\n", "What effect does the stride have? Whenever the stride in either direction is greater than 1, the sliding window will skip over some of the pixels in the input at each step.\n", @@ -183,7 +183,7 @@ "The alternative is to use `padding='same'`. The trick here is to **pad** the input with 0's around its borders, using just enough 0's to make the size of the output the *same* as the size of the input. This can have the effect however of diluting the influence of pixels at the borders. The animation below shows a sliding window with `'same'` padding.\n", "\n", "
\n", - "\"Illustration\n", + "\"Illustration\n", "
\n", "\n", "The VGG model we've been looking at uses `same` padding for all of its convolutional layers. Most modern convnets will use some combination of the two. (Another parameter to tune!)\n", diff --git a/notebooks/computer_vision/raw/tut5.ipynb b/notebooks/computer_vision/raw/tut5.ipynb index a67843ed3..e616195b2 100644 --- a/notebooks/computer_vision/raw/tut5.ipynb +++ b/notebooks/computer_vision/raw/tut5.ipynb @@ -14,7 +14,7 @@ "In the last three lessons, we saw how convolutional networks perform **feature extraction** through three operations: **filter**, **detect**, and **condense**. A single round of feature extraction can only extract relatively simple features from an image, things like simple lines or contrasts. These are too simple to solve most classification problems. Instead, convnets will repeat this extraction over and over, so that the features become more complex and refined as they travel deeper into the network.\n", "\n", "
\n", - "\"Features\n", + "\"Features\n", "
\n", "\n", "# Convolutional Blocks #\n", @@ -22,14 +22,14 @@ "It does this by passing them through long chains of **convolutional blocks** which perform this extraction.\n", "\n", "
\n", - "\"Extraction\n", + "\"Extraction\n", "
\n", "\n", "These convolutional blocks are stacks of `Conv2D` and `MaxPool2D` layers, whose role in feature extraction we learned about in the last few lessons.\n", "\n", "
\n", "\n", - "\"A\n", + "\"A\n", "
\n", "\n", "Each block represents a round of extraction, and by composing these blocks the convnet can combine and recombine the features produced, growing them and shaping them to better fit the problem at hand. The deep structure of modern convnets is what allows this sophisticated feature engineering and has been largely responsible for their superior performance.\n", @@ -125,7 +125,7 @@ "\n", "
\n", "\n", - "\"Diagram\n", + "\"Diagram\n", "
\n", "\n", "Now we'll define the model. See how our model consists of three blocks of `Conv2D` and `MaxPool2D` layers (the base) followed by a head of `Dense` layers. We can translate this diagram more or less directly into a Keras `Sequential` model just by filling in the appropriate parameters." diff --git a/notebooks/computer_vision/raw/tut6.ipynb b/notebooks/computer_vision/raw/tut6.ipynb index 0f8defc30..00064404a 100644 --- a/notebooks/computer_vision/raw/tut6.ipynb +++ b/notebooks/computer_vision/raw/tut6.ipynb @@ -30,13 +30,13 @@ "Typically, many kinds of transformation are used when augmenting a dataset. These might include rotating the image, adjusting the color or contrast, warping the image, or many other things, usually applied in combination. Here is a sample of the different ways a single image might be transformed.\n", "\n", "
\n", - "\"Sixteen\n", + "\"Sixteen\n", "
\n", "\n", "Data augmentation is usually done *online*, meaning, as the images are being fed into the network for training. Recall that training is usually done on mini-batches of data. This is what a batch of 16 images might look like when data augmentation is used.\n", "\n", "
\n", - "\"A\n", + "\"A\n", "
\n", "\n", "Each time an image is used during training, a new random transformation is applied. This way, the model is always seeing something a little different than what it's seen before. This extra variance in the training data is what helps the model on new data.\n", diff --git a/notebooks/data_viz_to_coder/raw/ex1.ipynb b/notebooks/data_viz_to_coder/raw/ex1.ipynb index eea25fc8e..5505e3315 100644 --- a/notebooks/data_viz_to_coder/raw/ex1.ipynb +++ b/notebooks/data_viz_to_coder/raw/ex1.ipynb @@ -13,7 +13,7 @@ "- Click on the blue triangle (in the shape of a \"Play button\") that appears to the left of the code cell.\n", "- If your code was run sucessfully, you will see `Setup Complete` as output below the cell.\n", "\n", - "![ex0_run_code](https://i.imgur.com/4NzqJ7G.png)" + "![ex0_run_code](https://storage.googleapis.com/kaggle-media/learn/images/4NzqJ7G.png)" ] }, { diff --git a/notebooks/data_viz_to_coder/raw/ex2.ipynb b/notebooks/data_viz_to_coder/raw/ex2.ipynb index d47328d41..71f056f5f 100644 --- a/notebooks/data_viz_to_coder/raw/ex2.ipynb +++ b/notebooks/data_viz_to_coder/raw/ex2.ipynb @@ -10,11 +10,11 @@ "\n", "You have recently been hired to manage the museums in the City of Los Angeles. Your first project focuses on the four museums pictured in the images below.\n", "\n", - "![ex1_museums](https://i.imgur.com/pFYL8J1.png)\n", + "![ex1_museums](https://storage.googleapis.com/kaggle-media/learn/images/pFYL8J1.png)\n", "\n", "You will leverage data from the Los Angeles [Data Portal](https://data.lacity.org/) that tracks monthly visitors to each museum. \n", "\n", - "![ex1_xlsx](https://i.imgur.com/mGWYlym.png)\n", + "![ex1_xlsx](https://storage.googleapis.com/kaggle-media/learn/images/mGWYlym.png)\n", "\n", "## Setup\n", "\n", diff --git a/notebooks/data_viz_to_coder/raw/ex3.ipynb b/notebooks/data_viz_to_coder/raw/ex3.ipynb index 5e235661d..f4003b264 100644 --- a/notebooks/data_viz_to_coder/raw/ex3.ipynb +++ b/notebooks/data_viz_to_coder/raw/ex3.ipynb @@ -10,7 +10,7 @@ "\n", "You've recently decided to create your very own video game! As an avid reader of [IGN Game Reviews](https://www.ign.com/reviews/games), you hear about all of the most recent game releases, along with the ranking they've received from experts, ranging from 0 (_Disaster_) to 10 (_Masterpiece_).\n", "\n", - "![ex2_ign](https://i.imgur.com/Oh06Fu1.png)\n", + "![ex2_ign](https://storage.googleapis.com/kaggle-media/learn/images/Oh06Fu1.png)\n", "\n", "You're interested in using [IGN reviews](https://www.ign.com/reviews/games) to guide the design of your upcoming game. Thankfully, someone has summarized the rankings in a really useful CSV file that you can use to guide your analysis.\n", "\n", diff --git a/notebooks/data_viz_to_coder/raw/ex5.ipynb b/notebooks/data_viz_to_coder/raw/ex5.ipynb index d1bee350b..23fbb19f2 100644 --- a/notebooks/data_viz_to_coder/raw/ex5.ipynb +++ b/notebooks/data_viz_to_coder/raw/ex5.ipynb @@ -10,7 +10,7 @@ "\n", "You'll work with a real-world dataset containing information collected from microscopic images of breast cancer tumors, similar to the image below.\n", "\n", - "![ex4_cancer_image](https://i.imgur.com/qUESsJe.png)\n", + "![ex4_cancer_image](https://storage.googleapis.com/kaggle-media/learn/images/qUESsJe.png)\n", "\n", "Each tumor has been labeled as either [**benign**](https://en.wikipedia.org/wiki/Benign_tumor) (_noncancerous_) or **malignant** (_cancerous_).\n", "\n", diff --git a/notebooks/data_viz_to_coder/raw/ex7.ipynb b/notebooks/data_viz_to_coder/raw/ex7.ipynb index 0596935a6..cf3ad491a 100644 --- a/notebooks/data_viz_to_coder/raw/ex7.ipynb +++ b/notebooks/data_viz_to_coder/raw/ex7.ipynb @@ -57,18 +57,18 @@ "\n", "Once you have selected a dataset, click on the **[+ Add data]** option in the top right corner. This will generate a pop-up window that you can use to search for your chosen dataset. \n", "\n", - "![ex6_search_dataset](https://i.imgur.com/cIIWPUS.png)\n", + "![ex6_search_dataset](https://storage.googleapis.com/kaggle-media/learn/images/cIIWPUS.png)\n", "\n", "Once you have found the dataset, click on the **[Add]** button to attach it to the notebook. You can check that it was successful by looking at the **Data** dropdown menu to the right of the notebook -- look for an **input** folder containing a subfolder that matches the name of the dataset.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "You can click on the carat to the left of the name of the dataset to double-check that it contains a CSV file. For instance, the image below shows that the example dataset contains two CSV files: (1) **dc-wikia-data.csv**, and (2) **marvel-wikia-data.csv**.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Once you've uploaded a dataset with a CSV file, run the code cell below **without changes** to receive credit for your work!" @@ -92,7 +92,7 @@ "\n", "Now that the dataset is attached to the notebook, you can find its filepath. To do this, begin by clicking on the CSV file you'd like to use. This will open the CSV file in a tab below the notebook. You can find the filepath towards the top of this new tab. \n", "\n", - "![ex6_filepath](https://i.imgur.com/fgXQV47.png)\n", + "![ex6_filepath](https://storage.googleapis.com/kaggle-media/learn/images/fgXQV47.png)\n", "\n", "After you find the filepath corresponding to your dataset, fill it in as the value for `my_filepath` in the code cell below, and run the code cell to check that you've provided a valid filepath. For instance, in the case of this example dataset, we would set\n", "```\n", diff --git a/notebooks/data_viz_to_coder/raw/tut1.ipynb b/notebooks/data_viz_to_coder/raw/tut1.ipynb index 42f478d54..9380b4a1d 100644 --- a/notebooks/data_viz_to_coder/raw/tut1.ipynb +++ b/notebooks/data_viz_to_coder/raw/tut1.ipynb @@ -12,7 +12,7 @@ "\n", "So, if you've never written a line of code, and you want to learn the **_bare minimum_** to start making faster, more attractive plots today, you're in the right place! To take a peek at some of the charts you'll make, check out the figures below.\n", "\n", - "![tut1_plots_you_make](https://i.imgur.com/54BoIBW.png)\n", + "![tut1_plots_you_make](https://storage.googleapis.com/kaggle-media/learn/images/54BoIBW.png)\n", "\n", "# Your coding environment\n", "\n", @@ -23,7 +23,7 @@ "\n", "We refer to these pages as **Jupyter notebooks** (or, often just **notebooks**), and we'll work with them throughout the mini-course. Another example of a notebook can be found in the image below. \n", "\n", - "![tut0_notebook](https://i.imgur.com/ccJNqYc.png)\n", + "![tut0_notebook](https://storage.googleapis.com/kaggle-media/learn/images/ccJNqYc.png)\n", "\n", "In the notebook you're reading now, we've already run all of the code for you. Soon, you will work with a notebook that allows you to write and run your own code! \n", "\n", @@ -54,7 +54,7 @@ "\n", "In this notebook, we'll work with a dataset of historical FIFA rankings for six countries: Argentina (ARG), Brazil (BRA), Spain (ESP), France (FRA), Germany (GER), and Italy (ITA). The dataset is stored as a CSV file (short for [comma-separated values file](https://bit.ly/2Iu5D4x). Opening the CSV file in Excel shows a row for each date, along with a column for each country. \n", "\n", - "![tut0_fifa_head](https://i.imgur.com/W0E7GjV.png)\n", + "![tut0_fifa_head](https://storage.googleapis.com/kaggle-media/learn/images/W0E7GjV.png)\n", "\n", "To load the data into the notebook, we'll use two distinct steps, implemented in the code cell below as follows:\n", "- begin by specifying the location (or [filepath](https://bit.ly/1lWCX7s)) where the dataset can be accessed, and then\n", @@ -78,7 +78,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "![tut0_read_csv](https://i.imgur.com/I6UEDSK.png)\n", + "![tut0_read_csv](https://storage.googleapis.com/kaggle-media/learn/images/I6UEDSK.png)\n", "\n", "Note that the code cell above has **four** different lines.\n", "\n", diff --git a/notebooks/data_viz_to_coder/raw/tut2.ipynb b/notebooks/data_viz_to_coder/raw/tut2.ipynb index 67571f99d..b69c8fbee 100644 --- a/notebooks/data_viz_to_coder/raw/tut2.ipynb +++ b/notebooks/data_viz_to_coder/raw/tut2.ipynb @@ -41,7 +41,7 @@ "4. \"HUMBLE.\", by Kendrick Lamar [(link)](https://bit.ly/2YlhPw4)\n", "5. \"Unforgettable\", by French Montana [(link)](https://bit.ly/2oL7w8b)\n", "\n", - "![tut1_spotify_head](https://i.imgur.com/GAGf6Td.png)\n", + "![tut1_spotify_head](https://storage.googleapis.com/kaggle-media/learn/images/GAGf6Td.png)\n", "\n", "Notice that the first date that appears is January 6, 2017, corresponding to the release date of \"The Shape of You\", by Ed Sheeran. And, using the table, you can see that \"The Shape of You\" was streamed 12,287,078 times globally on the day of its release. Notice that the other songs have missing values in the first row, because they weren't released until later!\n", "\n", diff --git a/notebooks/data_viz_to_coder/raw/tut3.ipynb b/notebooks/data_viz_to_coder/raw/tut3.ipynb index 8ab5a0b04..5a06641a0 100644 --- a/notebooks/data_viz_to_coder/raw/tut3.ipynb +++ b/notebooks/data_viz_to_coder/raw/tut3.ipynb @@ -40,7 +40,7 @@ "\n", "Opening this CSV file in Excel shows a row for each month (where `1` = January, `2` = February, etc) and a column for each airline code.\n", "\n", - "![tut2_flight_delay_head](https://i.imgur.com/5nYs9se.png)\n", + "![tut2_flight_delay_head](https://storage.googleapis.com/kaggle-media/learn/images/5nYs9se.png)\n", "\n", "Each entry shows the average arrival delay (in minutes) for a different airline and month (all in year 2015). Negative entries denote flights that (_on average_) tended to arrive early. For instance, the average American Airlines flight (_airline code: **AA**_) in January arrived roughly 7 minutes late, and the average Alaska Airlines flight (_airline code: **AS**_) in April arrived roughly 3 minutes early.\n", "\n", diff --git a/notebooks/data_viz_to_coder/raw/tut4.ipynb b/notebooks/data_viz_to_coder/raw/tut4.ipynb index 1b07f9d75..09e85217f 100644 --- a/notebooks/data_viz_to_coder/raw/tut4.ipynb +++ b/notebooks/data_viz_to_coder/raw/tut4.ipynb @@ -34,7 +34,7 @@ "\n", "We'll work with a (_synthetic_) dataset of insurance charges, to see if we can understand why some customers pay more than others. \n", "\n", - "![tut3_insurance](https://i.imgur.com/1nmy2YO.png)\n", + "![tut3_insurance](https://storage.googleapis.com/kaggle-media/learn/images/1nmy2YO.png)\n", "\n", "If you like, you can read more about the dataset [here](https://www.kaggle.com/mirichoi0218/insurance/home)." ] diff --git a/notebooks/data_viz_to_coder/raw/tut5.ipynb b/notebooks/data_viz_to_coder/raw/tut5.ipynb index 511c7ec6c..5721f16f3 100644 --- a/notebooks/data_viz_to_coder/raw/tut5.ipynb +++ b/notebooks/data_viz_to_coder/raw/tut5.ipynb @@ -34,7 +34,7 @@ "\n", "We'll work with a dataset of 150 different flowers, or 50 each from three different species of iris (*Iris setosa*, *Iris versicolor*, and *Iris virginica*).\n", "\n", - "![tut4_iris](https://i.imgur.com/RcxYYBA.png)\n", + "![tut4_iris](https://storage.googleapis.com/kaggle-media/learn/images/RcxYYBA.png)\n", "\n", "# Load and examine the data\n", "\n", diff --git a/notebooks/data_viz_to_coder/raw/tut6.ipynb b/notebooks/data_viz_to_coder/raw/tut6.ipynb index ba55982df..79449ccef 100644 --- a/notebooks/data_viz_to_coder/raw/tut6.ipynb +++ b/notebooks/data_viz_to_coder/raw/tut6.ipynb @@ -13,7 +13,7 @@ "source": [ "# What have you learned?\n", "\n", - "\n", + "\n", "\n", " \n", " \n", diff --git a/notebooks/data_viz_to_coder/raw/tut7.ipynb b/notebooks/data_viz_to_coder/raw/tut7.ipynb index 6d3c72a8b..cd1f6a5e8 100644 --- a/notebooks/data_viz_to_coder/raw/tut7.ipynb +++ b/notebooks/data_viz_to_coder/raw/tut7.ipynb @@ -16,7 +16,7 @@ "The link will bring you to a webpage with a collection of datasets that you can use in your own projects. \n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Note that the datasets that you see on the page will likely look different from what's shown in the screenshot above, since many new datasets are uploaded every day! There are many different file types on Kaggle Datasets, including CSV files, but also more exotic file types such as JSON, SQLite, and BigQuery. We'll be careful to select a dataset with at least one CSV file, since that is the file type we have been working with in this course.\n", @@ -24,14 +24,14 @@ "To search for a specific dataset, use the search bar at the top of the screen. Say, for instance, you'd like to work with a dataset about comic book characters. Begin by typing **\"comic\"** in the search window. \n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Then, find the [**FiveThirtyEight Comic Characters Dataset**](https://www.kaggle.com/fivethirtyeight/fivethirtyeight-comic-characters-dataset). Note that the dataset contains 3 files, including a CSV file that we can use.\n", "\n", "Then, click on the dataset to select it. This will bring you to a webpage that describes the dataset.\n", "\n", - "![tut6_sample_dataset](https://i.imgur.com/4W1Sa7H.png)\n", + "![tut6_sample_dataset](https://storage.googleapis.com/kaggle-media/learn/images/4W1Sa7H.png)\n", "\n", "Scroll down to see the list of files in the dataset under **Data Explorer**, on the left of the window. The dataset contains three files: (1) **README.md**, (2) **dc-wikia-data.csv**, and (3) **marvel-wikia-data.csv**. The first file is selected as default. Click on one of the CSV files instead to see a quick preview of the file. \n", "\n", diff --git a/notebooks/data_viz_to_coder/raw/tut8.ipynb b/notebooks/data_viz_to_coder/raw/tut8.ipynb index 9ffd0fd7f..f7f74ce6b 100644 --- a/notebooks/data_viz_to_coder/raw/tut8.ipynb +++ b/notebooks/data_viz_to_coder/raw/tut8.ipynb @@ -20,11 +20,11 @@ "\n", "Then, in the top left corner, click on **[+ New Notebook]**.\n", "\n", - "![tut7_new_kernel](https://i.imgur.com/kw9cct2.png)\n", + "![tut7_new_kernel](https://storage.googleapis.com/kaggle-media/learn/images/kw9cct2.png)\n", "\n", "This opens a notebook. As a first step, check the language of the notebook by selecting **File > Language**. If it's not Python, change the language to Python now.\n", "\n", - "![tut7_default_lang](https://i.imgur.com/FcQhCjF.png)\n", + "![tut7_default_lang](https://storage.googleapis.com/kaggle-media/learn/images/FcQhCjF.png)\n", "\n", "The notebook should hvae some default code. **_Please erase this code, and replace it with the code in the cell below._** (_This is the same code that you used in all of the exercises to set up your Python environment._)" ] diff --git a/notebooks/deep_learning/raw/ex1_convolutions.ipynb b/notebooks/deep_learning/raw/ex1_convolutions.ipynb index 8a3ef0763..a76755d79 100644 --- a/notebooks/deep_learning/raw/ex1_convolutions.ipynb +++ b/notebooks/deep_learning/raw/ex1_convolutions.ipynb @@ -8,7 +8,7 @@ "\n", "You don't directly choose the numbers to go into your convolutions for deep learning... instead the deep learning technique determines what convolutions will be useful from the data (as part of model-training). We'll come back to how the model does that soon.\n", "\n", - "![Imgur](https://i.imgur.com/op9Maqr.png)\n", + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/op9Maqr.png)\n", "\n", "But looking closely at convolutions and how they are applied to your image will improve your intuition for these models, how they work, and how to debug them when they don't work.\n", "\n", diff --git a/notebooks/deep_learning/raw/ex7_from_scratch.ipynb b/notebooks/deep_learning/raw/ex7_from_scratch.ipynb index 914d75c67..380be9e98 100644 --- a/notebooks/deep_learning/raw/ex7_from_scratch.ipynb +++ b/notebooks/deep_learning/raw/ex7_from_scratch.ipynb @@ -10,7 +10,7 @@ "\n", "As an example, your model will take an images like this and identify it as a shoe:\n", "\n", - "![Imgur](https://i.imgur.com/GyXOnSB.png)" + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/GyXOnSB.png)" ] }, { diff --git a/notebooks/deep_learning/raw/ex_tpus.ipynb b/notebooks/deep_learning/raw/ex_tpus.ipynb index c65212fcb..5415d5c4b 100644 --- a/notebooks/deep_learning/raw/ex_tpus.ipynb +++ b/notebooks/deep_learning/raw/ex_tpus.ipynb @@ -26,25 +26,25 @@ "First, click on the **Save Version** button in the upper right.\n", "\n", "
\n", - "\"The\n", + "\"The\n", "
\n", "\n", "Choose **Advanced Settings**.\n", "\n", "
\n", - "\"Advanced\n", + "\"Advanced\n", "
\n", "\n", "Select **Run with TPU for this session** from the dropdown menu and click the blue **Save** button.\n", "\n", "
\n", - "\"The\n", + "\"The\n", "
\n", "\n", "Select **Save & Run All (Commit)** and click the blue **Save** button.\n", "\n", "
\n", - "\"The\n", + "\"The\n", "
\n", "\n", "The commit may take a while to finish (about 10-15 min), but there's no harm in doing something else while it's running and coming back later.\n", @@ -56,13 +56,13 @@ "Now you're ready to make a submission! Click on the **Output** heading in the menu to the right of the notebook.\n", "\n", "
\n", - "\"The\n", + "\"The\n", "
\n", "\n", "And finally you'll submit the predictions! Just look for the blue **Submit** button. After clicking it, you should shortly be on the leaderboard!\n", "\n", "
\n", - "\"The\n", + "\"The\n", "
\n", "\n" ] diff --git a/notebooks/deep_learning_intro/raw/ex6.ipynb b/notebooks/deep_learning_intro/raw/ex6.ipynb index f5e9f4bc8..a3feebda8 100644 --- a/notebooks/deep_learning_intro/raw/ex6.ipynb +++ b/notebooks/deep_learning_intro/raw/ex6.ipynb @@ -117,7 +117,7 @@ "Define a model with an architecture given by this diagram:\n", "\n", "
\n", - "\"Diagram\n", + "\"Diagram\n", "
Diagram of a binary classifier.
\n", "
\n" ] diff --git a/notebooks/deep_learning_intro/raw/tut1.ipynb b/notebooks/deep_learning_intro/raw/tut1.ipynb index 46053bde8..bc9f5236e 100644 --- a/notebooks/deep_learning_intro/raw/tut1.ipynb +++ b/notebooks/deep_learning_intro/raw/tut1.ipynb @@ -29,7 +29,7 @@ "So let's begin with the fundamental component of a neural network: the individual neuron. As a diagram, a **neuron** (or **unit**) with one input looks like:\n", "\n", "
\n", - "\"Diagram\n", + "\"Diagram\n", "
The Linear Unit: $y = w x + b$\n", "
\n", "
\n", @@ -57,7 +57,7 @@ "Let's think about how this might work on a dataset like [80 Cereals](https://www.kaggle.com/crawford/80-cereals). Training a model with `'sugars'` (grams of sugars per serving) as input and `'calories'` (calories per serving) as output, we might find the bias is `b=90` and the weight is `w=2.5`. We could estimate the calorie content of a cereal with 5 grams of sugar per serving like this:\n", "\n", "
\n", - "\"Computing\n", + "\"Computing\n", "
Computing with the linear unit.\n", "
\n", "
\n", @@ -69,7 +69,7 @@ "The *80 Cereals* dataset has many more features than just `'sugars'`. What if we wanted to expand our model to include things like fiber or protein content? That's easy enough. We can just add more input connections to the neuron, one for each additional feature. To find the output, we would multiply each input to its connection weight and then add them all together.\n", "\n", "
\n", - "\"Three\n", + "\"Three\n", "
A linear unit with three inputs.\n", "
\n", "
\n", diff --git a/notebooks/deep_learning_intro/raw/tut2.ipynb b/notebooks/deep_learning_intro/raw/tut2.ipynb index 3a8608af6..048187b30 100644 --- a/notebooks/deep_learning_intro/raw/tut2.ipynb +++ b/notebooks/deep_learning_intro/raw/tut2.ipynb @@ -15,7 +15,7 @@ "Neural networks typically organize their neurons into **layers**. When we collect together linear units having a common set of inputs we get a **dense** layer.\n", "\n", "
\n", - "\"A\n", + "\"A\n", "
A dense layer of two linear units receiving two inputs and a bias.\n", "
\n", "
\n", @@ -32,7 +32,7 @@ "It turns out, however, that two dense layers with nothing in between are no better than a single dense layer by itself. Dense layers by themselves can never move us out of the world of lines and planes. What we need is something *nonlinear*. What we need are activation functions.\n", "\n", "
\n", - "\"\n", + "\"\n", "
Without activation functions, neural networks can only learn linear relationships. In order to fit curves, we'll need to use activation functions. \n", "
\n", "
\n", @@ -40,7 +40,7 @@ "An **activation function** is simply some function we apply to each of a layer's outputs (its *activations*). The most common is the *rectifier* function $max(0, x)$.\n", "\n", "
\n", - "\"A0 and y=0 when x<0, making a 'hinge' shape like '_/'.\">\n", + "\"A0 and y=0 when x<0, making a 'hinge' shape like '_/'.\">\n", "
\n", "
\n", "
\n", @@ -50,7 +50,7 @@ "When we attach the rectifier to a linear unit, we get a **rectified linear unit** or **ReLU**. (For this reason, it's common to call the rectifier function the \"ReLU function\".) Applying a ReLU activation to a linear unit means the output becomes `max(0, w * x + b)`, which we might draw in a diagram like:\n", "\n", "
\n", - "\"Diagram\n", + "\"Diagram\n", "
A rectified linear unit.\n", "
\n", "
" @@ -65,7 +65,7 @@ "Now that we have some nonlinearity, let's see how we can stack layers to get complex data transformations.\n", "\n", "
\n", - "\n", + "\n", "
A stack of dense layers makes a \"fully-connected\" network.\n", "
\n", "
\n", diff --git a/notebooks/deep_learning_intro/raw/tut3.ipynb b/notebooks/deep_learning_intro/raw/tut3.ipynb index e38eac556..dd7ee03c8 100644 --- a/notebooks/deep_learning_intro/raw/tut3.ipynb +++ b/notebooks/deep_learning_intro/raw/tut3.ipynb @@ -27,7 +27,7 @@ "The total MAE loss on a dataset is the mean of all these absolute differences.\n", "\n", "
\n", - "\"A\n", + "\"A\n", "
The mean absolute error is the average length between the fitted curve and the data points.\n", "
\n", "
\n", @@ -48,7 +48,7 @@ "Then just do this over and over until the loss is as small as you like (or until it won't decrease any further.)\n", "\n", "
\n", - "\"Fitting\n", + "\"Fitting\n", "
Training a neural network with Stochastic Gradient Descent.\n", "
\n", "
\n", diff --git a/notebooks/deep_learning_intro/raw/tut4.ipynb b/notebooks/deep_learning_intro/raw/tut4.ipynb index e75f45430..2717b9ba5 100644 --- a/notebooks/deep_learning_intro/raw/tut4.ipynb +++ b/notebooks/deep_learning_intro/raw/tut4.ipynb @@ -17,7 +17,7 @@ "When we train a model we've been plotting the loss on the training set epoch by epoch. To this we'll add a plot the validation data too. These plots we call the **learning curves**. To train deep learning models effectively, we need to be able to interpret them.\n", "\n", "
\n", - "\"A\n", + "\"A\n", "
The validation loss gives an estimate of the expected error on unseen data.\n", "
\n", "
\n", @@ -27,7 +27,7 @@ "Ideally, we would create models that learn all of the signal and none of the noise. This will practically never happen. Instead we make a trade. We can get the model to learn more signal at the cost of learning more noise. So long as the trade is in our favor, the validation loss will continue to decrease. After a certain point, however, the trade can turn against us, the cost exceeds the benefit, and the validation loss begins to rise.\n", "\n", "
\n", - "\"Two\n", + "\"Two\n", "
Underfitting and overfitting.\n", "
\n", "
\n", @@ -67,7 +67,7 @@ "We mentioned that when a model is too eagerly learning noise, the validation loss may start to increase during training. To prevent this, we can simply stop the training whenever it seems the validation loss isn't decreasing anymore. Interrupting the training this way is called **early stopping**.\n", "\n", "
\n", - "\"A\n", + "\"A\n", "
We keep the model where the validation loss is at a minimum.\n", "
\n", "
\n", diff --git a/notebooks/deep_learning_intro/raw/tut5.ipynb b/notebooks/deep_learning_intro/raw/tut5.ipynb index c65702e17..c34266790 100644 --- a/notebooks/deep_learning_intro/raw/tut5.ipynb +++ b/notebooks/deep_learning_intro/raw/tut5.ipynb @@ -19,7 +19,7 @@ "This is the idea behind **dropout**. To break up these conspiracies, we randomly *drop out* some fraction of a layer's input units every step of training, making it much harder for the network to learn those spurious patterns in the training data. Instead, it has to search for broad, general patterns, whose weight patterns tend to be more robust.\n", "\n", "
\n", - "\"An\n", + "\"An\n", "
Here, 50% dropout has been added between the two hidden layers.
\n", "
\n", "\n", diff --git a/notebooks/deep_learning_intro/raw/tut6.ipynb b/notebooks/deep_learning_intro/raw/tut6.ipynb index 503920bdd..909d6dfac 100644 --- a/notebooks/deep_learning_intro/raw/tut6.ipynb +++ b/notebooks/deep_learning_intro/raw/tut6.ipynb @@ -25,7 +25,7 @@ "For classification, what we want instead is a distance between *probabilities*, and this is what cross-entropy provides. **Cross-entropy** is a sort of measure for the distance from one probability distribution to another.\n", "\n", "
\n", - "\"Graphs\n", + "\"Graphs\n", "
Cross-entropy penalizes incorrect probability predictions.
\n", "
\n", "\n", @@ -38,7 +38,7 @@ "The cross-entropy and accuracy functions both require probabilities as inputs, meaning, numbers from 0 to 1. To covert the real-valued outputs produced by a dense layer into probabilities, we attach a new kind of activation function, the **sigmoid activation**.\n", "\n", "
\n", - "\"The\n", + "\"The\n", "
The sigmoid function maps real numbers into the interval $[0, 1]$.
\n", "
\n", "\n", diff --git a/notebooks/embeddings/raw/1-embeddings.ipynb b/notebooks/embeddings/raw/1-embeddings.ipynb index 35aaaf087..a6398e0eb 100644 --- a/notebooks/embeddings/raw/1-embeddings.ipynb +++ b/notebooks/embeddings/raw/1-embeddings.ipynb @@ -185,7 +185,7 @@ "\n", "I want my model to look something like this:\n", "\n", - "![Imgur](https://i.imgur.com/Z1eVQu9.png)" + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/Z1eVQu9.png)" ] }, { diff --git a/notebooks/embeddings/raw/2-factorization.ipynb b/notebooks/embeddings/raw/2-factorization.ipynb index 6cde71b52..f632b82d8 100644 --- a/notebooks/embeddings/raw/2-factorization.ipynb +++ b/notebooks/embeddings/raw/2-factorization.ipynb @@ -8,7 +8,7 @@ "\n", "In the previous lesson, we trained a model to predict the ratings assigned to movies by users in the [MovieLens dataset](https://www.kaggle.com/grouplens/movielens-20m-dataset/home). As a reminder the model looked something like this:\n", "\n", - "![Imgur](https://i.imgur.com/Z1eVQu9.png)\n", + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/Z1eVQu9.png)\n", "\n", "We look up an embedding vector for the movie and user, concatenate them together. Then we add some hidden layers. Finally these come together at a single output node to predict a rating.\n", "\n", @@ -16,7 +16,7 @@ "\n", "Here's what our matrix factorization model will look like:\n", "\n", - "![Imgur](https://i.imgur.com/lUzvCHj.png)" + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/lUzvCHj.png)" ] }, { diff --git a/notebooks/ethics/raw/tut3.ipynb b/notebooks/ethics/raw/tut3.ipynb index 8ca4956da..6ae9372c6 100644 --- a/notebooks/ethics/raw/tut3.ipynb +++ b/notebooks/ethics/raw/tut3.ipynb @@ -76,7 +76,7 @@ "source": [ "We can visually represent these different types of bias, which occur at different stages in the ML workflow:\n", "\n", - "[![visual representation of types of bias](https://i.imgur.com/bvArGRY.png)](https://arxiv.org/pdf/1901.10002.pdf)\n", + "[![visual representation of types of bias](https://storage.googleapis.com/kaggle-media/learn/images/bvArGRY.png)](https://arxiv.org/pdf/1901.10002.pdf)\n", "\n", "Note that these are *not mutually exclusive*: that is, an ML application can easily suffer from more than one type of bias. For example, as Rachel Thomas describes in a [recent research talk](https://www.youtube.com/watch?v=1Uyc9SPeYkA&list=PLe6zdIMe5B7IR0oDOobXBDBlYY1eqLYPx&index=10&t=41s), ML applications in wearable fitness devices can suffer from:\n", "- **Representation bias** (if the dataset used to train the models exclude darker skin tones), \n", diff --git a/notebooks/ethics/raw/tut4.ipynb b/notebooks/ethics/raw/tut4.ipynb index d3d301f5d..900ce67b0 100644 --- a/notebooks/ethics/raw/tut4.ipynb +++ b/notebooks/ethics/raw/tut4.ipynb @@ -82,21 +82,21 @@ "\n", "We'll work with a small example to illustrate the differences between the four different types of fairness. We'll use a **confusion matrix**, which is a common tool used to understand the performance of a ML model. This tool is depicted in the example below, which depicts a model with 80% accuracy (since 8/10 people were correctly classified) and has an 83% true positive rate (since 5/6 \"positives\" were correctly classified).\n", "\n", - "![](https://i.imgur.com/xFZG5fF.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/xFZG5fF.png)\n", "\n", "To understand how a model's performance varies across groups, we can construct a different confusion matrix for each group. In this small example, we'll assume that we have data from only 20 people, equally split between two groups (10 from Group A, and 10 from Group B). \n", "\n", "The next image shows what the confusion matrices could look like, if the model satisfies **demographic parity** fairness. 10 people from each group (50% from Group A, and 50% from Group B) were considered by the model. 14 people, also equally split across groups (50% from Group A, and 50% from Group B) were approved by the model. \n", "\n", - "![](https://i.imgur.com/e32gcDh.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/e32gcDh.png)\n", "\n", "For ****equal opportunity**** fairness, the TPR for each group should be the same; in the example below, it is 66% in each case.\n", "\n", - "![](https://i.imgur.com/aInWboA.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/aInWboA.png)\n", "\n", "Next, we can see how the confusion matrices might look for ****equal accuracy**** fairness. For each group, the model was 80% accurate.\n", "\n", - "![](https://i.imgur.com/fIOJovc.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/fIOJovc.png)\n", "\n", "Note that ****group unaware**** fairness cannot be detected from the confusion matrix, and is more concerned with removing group membership information from the dataset.\n", "\n", diff --git a/notebooks/feature_engineering/raw/ex1.ipynb b/notebooks/feature_engineering/raw/ex1.ipynb index 2416046e5..dd7120fa6 100644 --- a/notebooks/feature_engineering/raw/ex1.ipynb +++ b/notebooks/feature_engineering/raw/ex1.ipynb @@ -8,7 +8,7 @@ "\n", "In the exercise, you will work with data from the TalkingData AdTracking competition. The goal of the competition is to predict if a user will download an app after clicking through an ad. \n", "\n", - "
\n", + "
\n", "\n", "For this course you will use a small sample of the data, dropping 99% of negative records (where the app wasn't downloaded) to make the target more balanced.\n", "\n", diff --git a/notebooks/feature_engineering_new/raw/tut1.ipynb b/notebooks/feature_engineering_new/raw/tut1.ipynb index 8e0efbca0..92aca0aeb 100644 --- a/notebooks/feature_engineering_new/raw/tut1.ipynb +++ b/notebooks/feature_engineering_new/raw/tut1.ipynb @@ -40,7 +40,7 @@ "The key idea here is that a transformation you apply to a feature becomes in essence a part of the model itself. Say you were trying to predict the `Price` of square plots of land from the `Length` of one side. Fitting a linear model directly to `Length` gives poor results: the relationship is not linear.\n", "\n", "
\n", - "\"A\n", + "\"A\n", "
A linear model fits poorly with only Length as feature.\n", "
\n", "
\n", @@ -48,7 +48,7 @@ "If we square the `Length` feature to get `'Area'`, however, we create a linear relationship. Adding `Area` to the feature set means this linear model can now fit a parabola. Squaring a feature, in other words, gave the linear model the ability to fit squared features.\n", "\n", "
\n", - "\"Left:\n", + "\"Left:\n", "
Left: The fit to Area is much better. Right: Which makes the fit to Length better as well.\n", "
\n", "
\n", diff --git a/notebooks/feature_engineering_new/raw/tut2.ipynb b/notebooks/feature_engineering_new/raw/tut2.ipynb index 28660bb54..cf2c80529 100644 --- a/notebooks/feature_engineering_new/raw/tut2.ipynb +++ b/notebooks/feature_engineering_new/raw/tut2.ipynb @@ -26,7 +26,7 @@ "Here's an example from the *Ames Housing* data. The figure shows the relationship between the exterior quality of a house and the price it sold for. Each point represents a house.\n", "\n", "
\n", - "\"Four\n", + "\"Four\n", "
Knowing the exterior quality of a house reduces uncertainty about its sale price.\n", "
\n", "
\n", @@ -42,7 +42,7 @@ "The next figure will give you an idea of how MI values correspond to the kind and degree of association a feature has with the target.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Left: Mutual information increases as the dependence between feature and target becomes tighter. Right: Mutual information can capture any kind of association (not just linear, like correlation.)\n", "
\n", "
\n", diff --git a/notebooks/feature_engineering_new/raw/tut4.ipynb b/notebooks/feature_engineering_new/raw/tut4.ipynb index 17371b5ff..e9de1b4c1 100644 --- a/notebooks/feature_engineering_new/raw/tut4.ipynb +++ b/notebooks/feature_engineering_new/raw/tut4.ipynb @@ -17,7 +17,7 @@ "Applied to a single real-valued feature, clustering acts like a traditional \"binning\" or [\"discretization\"](https://scikit-learn.org/stable/auto_examples/preprocessing/plot_discretization_classification.html) transform. On multiple features, it's like \"multi-dimensional binning\" (sometimes called *vector quantization*).\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Left: Clustering a single feature. Right: Clustering across two features.\n", "
\n", "
\n", @@ -41,7 +41,7 @@ "The motivating idea for adding cluster labels is that the clusters will break up complicated relationships across features into simpler chunks. Our model can then just learn the simpler chunks one-by-one instead having to learn the complicated whole all at once. It's a \"divide and conquer\" strategy.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Clustering the YearBuilt feature helps this linear model learn its relationship to SalePrice.\n", "
\n", "
\n", @@ -59,7 +59,7 @@ "The clustering on the [*Ames*](https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data) dataset above is a k-means clustering. Here is the same figure with the tessallation and centroids shown.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
K-means clustering creates a Voronoi tessallation of the feature space.\n", "
\n", "
\n", @@ -77,7 +77,7 @@ "The animation below shows the algorithm in action. It illustrates the dependence of the result on the initial centroids and the importance of iterating until convergence.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
The K-means clustering algorithm on Airbnb rentals in NYC.\n", "
\n", "
\n", diff --git a/notebooks/feature_engineering_new/raw/tut5.ipynb b/notebooks/feature_engineering_new/raw/tut5.ipynb index 206f88da3..d3b1d0e19 100644 --- a/notebooks/feature_engineering_new/raw/tut5.ipynb +++ b/notebooks/feature_engineering_new/raw/tut5.ipynb @@ -17,7 +17,7 @@ "You could imagine that within this data are \"axes of variation\" that describe the ways the abalone tend to differ from one another. Pictorially, these axes appear as perpendicular lines running along the natural dimensions of the data, one axis for each original feature.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
\n", "
\n", "
" @@ -32,7 +32,7 @@ "Notice that instead of describing abalones by their `'Height'` and `'Diameter'`, we could just as well describe them by their `'Size'` and `'Shape'`. This, in fact, is the whole idea of PCA: instead of describing the data with the original features, we describe it with its axes of variation. The axes of variation become the new features.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
The principal components become the new features by a rotation of the dataset in the feature space.\n", "
\n", "
\n", @@ -63,7 +63,7 @@ "PCA also tells us the *amount* of variation in each component. We can see from the figures that there is more variation in the data along the `Size` component than along the `Shape` component. PCA makes this precise through each component's **percent of explained variance**.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Size accounts for about 96% and the Shape for about 4% of the variance between Height and Diameter.\n", "
\n", "
\n", diff --git a/notebooks/feature_engineering_new/raw/tut6.ipynb b/notebooks/feature_engineering_new/raw/tut6.ipynb index 0220d439e..5ec5199b0 100644 --- a/notebooks/feature_engineering_new/raw/tut6.ipynb +++ b/notebooks/feature_engineering_new/raw/tut6.ipynb @@ -72,7 +72,7 @@ "where `n` is the total number of times that category occurs in the data. The parameter `m` determines the \"smoothing factor\". Larger values of `m` put more weight on the overall estimate.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
\n", "
\n", "
\n", diff --git a/notebooks/game_ai/raw/ex2.ipynb b/notebooks/game_ai/raw/ex2.ipynb index fa68c14e3..f8a5f9b10 100644 --- a/notebooks/game_ai/raw/ex2.ipynb +++ b/notebooks/game_ai/raw/ex2.ipynb @@ -31,7 +31,7 @@ "The heuristic from the tutorial looks at all groups of four adjacent grid locations on the same row, column, or diagonal and assigns points for each occurrence of the following patterns:\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "In the image above, we assume that the agent is the red player, and the opponent plays yellow discs.\n", @@ -51,7 +51,7 @@ "In this tutorial, you'll change the heuristic to the following (where you decide the number of points to apply in each of `A`, `B`, `C`, `D`, and `E`). You will define these values in the code cell below.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", " \n", "\n", @@ -142,7 +142,7 @@ "Consider the game board below. \n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Say the agent uses red discs, and it's the agent's turn. \n", diff --git a/notebooks/game_ai/raw/ex3.ipynb b/notebooks/game_ai/raw/ex3.ipynb index 985725242..5b0cd8e6d 100644 --- a/notebooks/game_ai/raw/ex3.ipynb +++ b/notebooks/game_ai/raw/ex3.ipynb @@ -29,19 +29,19 @@ "The heuristic from the tutorial looks at all groups of four adjacent grid locations on the same row, column, or diagonal and assigns points for each occurrence of the following patterns:\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Is it really necessary to use so many numbers to define the heuristic? Consider simplifying it, as in the image below.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "How would each heuristic score the potential moves in the example below (where, in this case, the agent looks only one step ahead)? Which heuristic would lead to the agent selecting the better move?\n", "\n", "
\n", - "
\n", + "
\n", "
" ] }, @@ -74,7 +74,7 @@ "In the tutorial, we worked with a small game tree.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "The game tree above has 8 leaf nodes that appear at the bottom of the tree. By definition, \"leaf nodes\" in a game tree are nodes that don't have nodes below them.\n", @@ -134,7 +134,7 @@ "\n", "Consider the toy example below of a game tree that the agent will use to select its next move. \n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Which move will the agent select? Use your answer to set the value of the `selected_move` variable below. Your answer should be one of `1`, `2`, or `3`." diff --git a/notebooks/game_ai/raw/ex4.ipynb b/notebooks/game_ai/raw/ex4.ipynb index cbd832bca..0b0091b81 100644 --- a/notebooks/game_ai/raw/ex4.ipynb +++ b/notebooks/game_ai/raw/ex4.ipynb @@ -84,7 +84,7 @@ "To play this game in Google Search, click on the **[Play]** button at [this link](https://www.google.com/search?q=minesweeper). \n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "With each move, one of the following is true:\n", @@ -119,7 +119,7 @@ "First, we'll need to make sure that your Kaggle Notebook is set up to run the code. Begin by looking at the \"Settings\" menu to the right of your notebook. Your menu will look like one of the following:\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "If your \"Internet\" setting appears as a \"Requires phone verification\" link, click on this link. This will bring you to a new window; then, follow the instructions to verify your account. After following this step, your \"Internet\" setting will appear \"Off\", as in the example to the right.\n", @@ -127,7 +127,7 @@ "Once your \"Internet\" setting appears as \"Off\", click to turn it on. You'll see a pop-up window that you'll need to \"Accept\" in order to complete the process and have the setting switched to \"On\". Once the Internet is turned \"On\", you're ready to proceed!\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Begin by running the code cell below. " diff --git a/notebooks/game_ai/raw/tut1.ipynb b/notebooks/game_ai/raw/tut1.ipynb index 063cd3fe0..44e858d98 100644 --- a/notebooks/game_ai/raw/tut1.ipynb +++ b/notebooks/game_ai/raw/tut1.ipynb @@ -9,7 +9,7 @@ "**[Connect Four](https://en.wikipedia.org/wiki/Connect_Four)** is a game where two players alternate turns dropping colored discs into a vertical grid. Each player uses a different color (usually red or yellow), and the objective of the game is to be the first player to get four discs in a row. \n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "In this course, you will build your own intelligent agents to play the game.\n", @@ -24,7 +24,7 @@ "To join the competition, open a new window with **[the competition page](https://www.kaggle.com/c/connectx/overview)**, and click on the **\"Join Competition\"** button. (_If you see a \"Submit Agent\" button instead of a \"Join Competition\" button, you have already joined the competition, and don't need to do so again._)\n", "\n", "
\n", - "
\n", + "
\n", "
\n", " \n", "This takes you to the rules acceptance page. You must accept the competition rules in order to participate. These rules govern how many submissions you can make per day, the maximum team size, and other competition-specific details. Then, click on **\"I Understand and Accept\"** to indicate that you will abide by the competition rules.\n", @@ -139,7 +139,7 @@ "`obs.board` is a Python list that shows the locations of the discs, where the first row appears first, followed by the second row, and so on. We use `1` to track player 1's discs, and `2` to track player 2's discs. For instance, for this game board:\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "`obs.board` would be `[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 2, 1, 2, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 2, 1, 2, 0, 2, 0]`.\n", diff --git a/notebooks/game_ai/raw/tut2.ipynb b/notebooks/game_ai/raw/tut2.ipynb index 7cd9f5fd9..d2f8230f2 100644 --- a/notebooks/game_ai/raw/tut2.ipynb +++ b/notebooks/game_ai/raw/tut2.ipynb @@ -17,7 +17,7 @@ "We can formalize this idea and represent all possible outcomes in a **(complete) game tree**. \n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "The game tree represents each possible move (by agent and opponent), starting with an empty board. The first row shows all possible moves the agent (red player) can make. Next, we record each move the opponent (yellow player) can make in response, and so on, until each branch reaches the end of the game. (_The game tree for Connect Four is quite large, so we show only a small preview in the image above_.)\n", @@ -38,13 +38,13 @@ "This is also represented in the image below.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "And how exactly will the agent use the heuristic? Consider it's the agent's turn, and it's trying to plan a move for the game board shown at the top of the figure below. There are seven possible moves (one for each column). For each move, we record the resulting game board.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Then we use the heuristic to assign a score to each board. To do this, we search the grid and look for all occurrences of the pattern in the heuristic, similar to a [word search](https://en.wikipedia.org/wiki/Word_search) puzzle. Each occurrence modifies the score. For instance,\n", diff --git a/notebooks/game_ai/raw/tut3.ipynb b/notebooks/game_ai/raw/tut3.ipynb index 130cdc029..4557945f8 100644 --- a/notebooks/game_ai/raw/tut3.ipynb +++ b/notebooks/game_ai/raw/tut3.ipynb @@ -9,7 +9,7 @@ "In the previous tutorial, you learned how to build an agent with one-step lookahead. This agent performs reasonably well, but definitely still has room for improvement! For instance, consider the potential moves in the figure below. (_Note that we use zero-based numbering for the columns, so the leftmost column corresponds to `col=0`, the next column corresponds to `col=1`, and so on._)\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "With one-step lookahead, the red player picks one of column 5 or 6, each with 50% probability. But, column 5 is clearly a bad move, as it lets the opponent win the game in only one more turn. Unfortunately, the agent doesn't know this, because it can only look one move into the future. \n", @@ -26,7 +26,7 @@ "We'll work with a visual example. For simplicity, we assume that at each turn, both the agent and opponent have only two possible moves. Each of the blue rectangles in the figure below corresponds to a different game board.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "We have labeled each of the \"leaf nodes\" at the bottom of the tree with the score from the heuristic. (_We use made-up scores in the figure. In the code, we'll use the same heuristic from the previous tutorial._) As before, the current game board is at the top of the figure, and the agent's goal is to end up with a score that's as high as possible. \n", @@ -44,7 +44,7 @@ "So, in practice, how does the agent use this assumption to select a move? We illustrate the agent's thought process in the figure below.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "In the example, minimax assigns the move on the left a score of -1, and the move on the right is assigned a score of +10. So, the agent will select the move on the right. \n", @@ -114,7 +114,7 @@ "We'll also need to slightly modify the heuristic from the previous tutorial, since the opponent is now able to modify the game board.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "In particular, we need to check if the opponent has won the game by playing a disc. The new heuristic looks at each group of four adjacent locations in a (horizontal, vertical, or diagonal) line and assigns:\n", @@ -224,7 +224,7 @@ "We won't describe the minimax implementation in detail, but if you want to read more technical pseudocode, here's the description [from Wikipedia](https://en.wikipedia.org/wiki/Minimax#Pseudocode). (_Note that the pseudocode can be safely skipped!_)\n", "\n", "
\n", - "\n", + "\n", "
\n", "\n", "Finally, we implement the minimax agent in the competition format. The `N_STEPS` variable is used to set the depth of the tree." diff --git a/notebooks/game_ai/raw/tut4.ipynb b/notebooks/game_ai/raw/tut4.ipynb index dcac53c24..21fbf10b5 100644 --- a/notebooks/game_ai/raw/tut4.ipynb +++ b/notebooks/game_ai/raw/tut4.ipynb @@ -23,7 +23,7 @@ "The network accepts the current board as input. And, it outputs a probability for each possible move.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Then, the agent selects a move by sampling from these probabilities. For instance, for the game board in the image above, the agent selects column 4 with 50% probability.\n", diff --git a/notebooks/game_ai/raw/tut_halite.ipynb b/notebooks/game_ai/raw/tut_halite.ipynb index 31503ab55..fda9974cf 100644 --- a/notebooks/game_ai/raw/tut_halite.ipynb +++ b/notebooks/game_ai/raw/tut_halite.ipynb @@ -49,19 +49,19 @@ "Grid locations with **halite** are indicated by a light blue icon, where larger icons indicate more available halite.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Players use **ships** to navigate the world and collect halite. A ship can only collect halite from its current position. When a ship decides to collect halite, it collects 25% of the halite available in its cell. This collected halite is added to the ship's \"cargo\". \n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Halite in ship cargo is not counted towards final scores. In order for halite to be counted, ships need to deposit their cargo into a **shipyard** of the same color. A ship can deposit all of its cargo in a single timestep simply by navigating to a cell containing a shipyard.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Players start the game with no shipyards. To get a shipyard, a player must convert a ship into a shipyard, which costs 500 halite. Also, shipyards can spawn (or create) new ships, which deducts 500 halite (per ship) from the player.\n", @@ -71,7 +71,7 @@ "- the other ship survives and instantly collects the destroyed ship's cargo.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "If you view the full game rules, you'll notice that there are more types of collisions that can occur in the game (for instance, ships can collide with enemy shipyards, which destroys the ship, the ship's cargo, and the enemy shipyard). \n", @@ -91,7 +91,7 @@ "\n", "Both are illustrated in the figure below. The \"cargo\" that is tracked in the player's scoreboard contains the total cargo, summed over all of the player's ships.\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "This raises some questions that you'll have to answer when commanding ships:\n", @@ -107,7 +107,7 @@ "These are illustrated in the image below.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "With more ships and shipyards, you can collect halite at a faster rate. But each additional ship and shipyard costs you halite: how will you decide when it might be beneficial to create more?" @@ -129,7 +129,7 @@ "\n", "Next, click on **\"Create\"**. (_Don't change the default settings: so, **\"Python\"** should appear under **\"Select language\"**, and you should have **\"Notebook\"** selected under **\"Select type\"**._)\n", "\n", - "![](https://i.imgur.com/qUVvr8k.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/qUVvr8k.png)\n", "\n", "You now have a notebook where you'll develop your first agent! If you're not sure how to use Kaggle Notebooks, we strongly recommend that you walk through **[this notebook](https://www.kaggle.com/alexisbcook/getting-started-with-titanic)** before proceeding. It teaches you how to run code in the notebook.\n", "\n", diff --git a/notebooks/geospatial/raw/ex1.ipynb b/notebooks/geospatial/raw/ex1.ipynb index 9740c44bd..255830e05 100644 --- a/notebooks/geospatial/raw/ex1.ipynb +++ b/notebooks/geospatial/raw/ex1.ipynb @@ -9,7 +9,7 @@ "[Kiva.org](https://www.kiva.org/) is an online crowdfunding platform extending financial services to poor people around the world. Kiva lenders have provided over $1 billion dollars in loans to over 2 million people.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Kiva reaches some of the most remote places in the world through their global network of \"Field Partners\". These partners are local organizations working in communities to vet borrowers, provide services, and administer loans.\n", diff --git a/notebooks/geospatial/raw/ex2.ipynb b/notebooks/geospatial/raw/ex2.ipynb index 6aeca4bef..0adf69c7c 100644 --- a/notebooks/geospatial/raw/ex2.ipynb +++ b/notebooks/geospatial/raw/ex2.ipynb @@ -9,7 +9,7 @@ "You are a bird conservation expert and want to understand migration patterns of purple martins. In your research, you discover that these birds typically spend the summer breeding season in the eastern United States, and then migrate to South America for the winter. But since this bird is under threat of endangerment, you'd like to take a closer look at the locations that these birds are more likely to visit.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "There are several [protected areas](https://www.iucn.org/theme/protected-areas/about) in South America, which operate under special regulations to ensure that species that migrate (or live) there have the best opportunity to thrive. You'd like to know if purple martins tend to visit these areas. To answer this question, you'll use some recently collected data that tracks the year-round location of eleven different birds.\n", diff --git a/notebooks/geospatial/raw/ex3.ipynb b/notebooks/geospatial/raw/ex3.ipynb index 63872bfbf..801b1d0c5 100644 --- a/notebooks/geospatial/raw/ex3.ipynb +++ b/notebooks/geospatial/raw/ex3.ipynb @@ -9,7 +9,7 @@ "You are an urban safety planner in Japan, and you are analyzing which areas of Japan need extra earthquake reinforcement. Which areas are both high in population density and prone to earthquakes?\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Before you get started, run the code cell below to set everything up." diff --git a/notebooks/geospatial/raw/ex4.ipynb b/notebooks/geospatial/raw/ex4.ipynb index b33c5bf24..a77d0d780 100644 --- a/notebooks/geospatial/raw/ex4.ipynb +++ b/notebooks/geospatial/raw/ex4.ipynb @@ -9,7 +9,7 @@ "You are a Starbucks big data analyst ([that’s a real job!](https://www.forbes.com/sites/bernardmarr/2018/05/28/starbucks-using-big-data-analytics-and-artificial-intelligence-to-boost-performance/#130c7d765cdc)) looking to find the next store into a [Starbucks Reserve Roastery](https://www.businessinsider.com/starbucks-reserve-roastery-compared-regular-starbucks-2018-12#also-on-the-first-floor-was-the-main-coffee-bar-five-hourglass-like-units-hold-the-freshly-roasted-coffee-beans-that-are-used-in-each-order-the-selection-rotates-seasonally-5). These roasteries are much larger than a typical Starbucks store and have several additional features, including various food and wine options, along with upscale lounge areas. You'll investigate the demographics of various counties in the state of California, to determine potentially suitable locations.\n", "\n", "
\n", - "

\n", + "

\n", "
\n", "\n", "Before you get started, run the code cell below to set everything up." diff --git a/notebooks/geospatial/raw/ex5.ipynb b/notebooks/geospatial/raw/ex5.ipynb index 30cd626bb..c535a0672 100644 --- a/notebooks/geospatial/raw/ex5.ipynb +++ b/notebooks/geospatial/raw/ex5.ipynb @@ -9,7 +9,7 @@ "You are part of a crisis response team, and you want to identify how hospitals have been responding to crash collisions in New York City.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Before you get started, run the code cell below to set everything up." diff --git a/notebooks/geospatial/raw/tut1.ipynb b/notebooks/geospatial/raw/tut1.ipynb index 6e2749939..276220e17 100644 --- a/notebooks/geospatial/raw/tut1.ipynb +++ b/notebooks/geospatial/raw/tut1.ipynb @@ -9,7 +9,7 @@ "In this micro-course, you'll learn about different methods to wrangle and visualize **geospatial data**, or data with a geographic location.\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Along the way, you'll offer solutions to several real-world problems like:\n", @@ -181,7 +181,7 @@ "source": [ "While this column can contain a variety of different datatypes, each entry will typically be a **Point**, **LineString**, or **Polygon**.\n", "\n", - "![](https://i.imgur.com/N1llefr.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/N1llefr.png)\n", "\n", "The \"geometry\" column in our dataset contains 2983 different Polygon objects, each corresponding to a different shape in the plot above.\n", "\n", diff --git a/notebooks/geospatial/raw/tut2.ipynb b/notebooks/geospatial/raw/tut2.ipynb index f8a4ae6f2..74fdddfea 100644 --- a/notebooks/geospatial/raw/tut2.ipynb +++ b/notebooks/geospatial/raw/tut2.ipynb @@ -13,7 +13,7 @@ "- the *equidistant* projections (like \"Azimuthal Equidistant projection\") preserve distance. This would be a good choice for calculating flight distance.\n", "\n", "
\n", - "\n", + "\n", "List of map projections (Source)

\n", "
\n", "\n", diff --git a/notebooks/geospatial/raw/tut4.ipynb b/notebooks/geospatial/raw/tut4.ipynb index 2cf4f09ee..bd4b064b7 100644 --- a/notebooks/geospatial/raw/tut4.ipynb +++ b/notebooks/geospatial/raw/tut4.ipynb @@ -33,7 +33,7 @@ "\n", "**Geocoding** is the process of converting the name of a place or an address to a location on a map. If you have ever looked up a geographic location based on a landmark description with [Google Maps](https://www.google.com/maps), [Bing Maps](https://www.bing.com/maps), or [Baidu Maps](https://map.baidu.com/), for instance, then you have used a geocoder!\n", "\n", - "![](https://i.imgur.com/1IrgZQq.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/1IrgZQq.png)\n", "\n", "We'll use geopy to do all of our geocoding." ] diff --git a/notebooks/intro_to_programming/deprecated/ex4_old.ipynb b/notebooks/intro_to_programming/deprecated/ex4_old.ipynb index 74073e678..fe6feff0b 100644 --- a/notebooks/intro_to_programming/deprecated/ex4_old.ipynb +++ b/notebooks/intro_to_programming/deprecated/ex4_old.ipynb @@ -39,7 +39,7 @@ "\n", "In Mexico, foods and beverages that are high in saturated fat, trans fat, sugar, sodium, and/or calories appear with warning labels that are designed to help consumers make healthy food choices.\n", "\n", - "
\"drawing\"
\n", + "
\"drawing\"
\n", "\n", "For instance, the [box of cookies](https://world.openfoodfacts.org/product/7501000673209/florentinas-gamesa) in the image above appears with two labels (in the upper right corner):\n", "- EXCESO CALORÍAS (in English, EXCESS CALORIES)\n", @@ -496,7 +496,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The next code cell demonstrates how to use `get_labels()` to get the warning labels that the food item should contain. We begin with [bologna](https://world.openfoodfacts.org/product/4099100179378/bologna). Here is [an image](https://imgur.com/Cfcx72e) with all of the nutritional information. Note that for this food,\n", + "The next code cell demonstrates how to use `get_labels()` to get the warning labels that the food item should contain. We begin with [bologna](https://world.openfoodfacts.org/product/4099100179378/bologna). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/Cfcx72e) with all of the nutritional information. Note that for this food,\n", "- `food_type = \"solid\"` (because bologna is a solid and not a liquid)\n", "- `serving_size = 32` (the serving size is 32 grams)\n", "- `calories_per_serving = 110` (there are 110 calories per serving)\n", @@ -532,7 +532,7 @@ "source": [ "This bologna has three labels, printed in the output above.\n", "\n", - "Now it's your turn to determine the labels from more foods. In the next code cell, fill in the values for [this cereal](https://world.openfoodfacts.org/product/7501008023624/zucaritas-kellogg-s). Here is [an image](https://imgur.com/a/MUxzHVU) with all of the nutritional information.\n", + "Now it's your turn to determine the labels from more foods. In the next code cell, fill in the values for [this cereal](https://world.openfoodfacts.org/product/7501008023624/zucaritas-kellogg-s). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/MUxzHVU) with all of the nutritional information.\n", "\n", "**Note**: running the line of code below as-is will return an error. You have to fill in the nutritional values first." ] @@ -560,7 +560,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Next, try [these mozzarella sticks](https://world-es.openfoodfacts.org/producto/0062325540104/mozzarella-cheese-sticks). Here is [an image](https://imgur.com/rcdB7VH) with all of the nutritional information." + "Next, try [these mozzarella sticks](https://world-es.openfoodfacts.org/producto/0062325540104/mozzarella-cheese-sticks). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/rcdB7VH) with all of the nutritional information." ] }, { @@ -586,7 +586,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Next, try [these cookies](https://world.openfoodfacts.org/product/0069700118545/biscuits-au-sucre-pretraches). Here is [an image](https://imgur.com/2Bc271o) with all of the nutritional information." + "Next, try [these cookies](https://world.openfoodfacts.org/product/0069700118545/biscuits-au-sucre-pretraches). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/2Bc271o) with all of the nutritional information." ] }, { @@ -612,7 +612,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Finally, try [this soda](https://world-es.openfoodfacts.org/producto/0078000113464/orange-soda-sunkist). Here is [an image](https://imgur.com/RsBYa8E) with all of the nutritional information." + "Finally, try [this soda](https://world-es.openfoodfacts.org/producto/0078000113464/orange-soda-sunkist). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/RsBYa8E) with all of the nutritional information." ] }, { diff --git a/notebooks/intro_to_programming/raw/ex4.ipynb b/notebooks/intro_to_programming/raw/ex4.ipynb index 086fe44eb..53bec97ff 100644 --- a/notebooks/intro_to_programming/raw/ex4.ipynb +++ b/notebooks/intro_to_programming/raw/ex4.ipynb @@ -484,7 +484,7 @@ "- EXCESO CALORÍAS (in English, EXCESS CALORIES)\n", "- EXCESO AZÚCARES (in English, EXCESS SUGAR)\n", "\n", - "
\"drawing\"
\n", + "
\"drawing\"
\n", "\n", "In this question, you'll work with a function `get_labels()` that takes the nutritional details about a food item and prints the needed warning labels. This function takes several inputs:\n", "- `food_type` = one of `\"solid\"` or `\"liquid\"`\n", @@ -533,7 +533,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The next code cell demonstrates how to use `get_labels()` to get the warning labels that the food item should contain. We begin with [bologna](https://world.openfoodfacts.org/product/4099100179378/bologna). Here is [an image](https://imgur.com/Cfcx72e) with all of the nutritional information. Note that for this food,\n", + "The next code cell demonstrates how to use `get_labels()` to get the warning labels that the food item should contain. We begin with [bologna](https://world.openfoodfacts.org/product/4099100179378/bologna). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/Cfcx72e) with all of the nutritional information. Note that for this food,\n", "- `food_type = \"solid\"` (because bologna is a solid and not a liquid)\n", "- `serving_size = 32` (the serving size is 32 grams)\n", "- `calories_per_serving = 110` (there are 110 calories per serving)\n", @@ -573,7 +573,7 @@ "\n", "In general, as you continue coding in Python, you will often be running code that other people have written. This is common practice for advanced programmers.\n", "\n", - "In the next code cell, fill in the values for [this cereal](https://world.openfoodfacts.org/product/7501008023624/zucaritas-kellogg-s). Here is [an image](https://imgur.com/a/MUxzHVU) with all of the nutritional information.\n", + "In the next code cell, fill in the values for [this cereal](https://world.openfoodfacts.org/product/7501008023624/zucaritas-kellogg-s). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/MUxzHVU) with all of the nutritional information.\n", "\n", "**Note**: running the line of code below as-is will return an error. You have to fill in the nutritional values first." ] @@ -601,7 +601,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Next, try [these mozzarella sticks](https://world-es.openfoodfacts.org/producto/0062325540104/mozzarella-cheese-sticks). Here is [an image](https://imgur.com/rcdB7VH) with all of the nutritional information." + "Next, try [these mozzarella sticks](https://world-es.openfoodfacts.org/producto/0062325540104/mozzarella-cheese-sticks). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/rcdB7VH) with all of the nutritional information." ] }, { @@ -628,8 +628,8 @@ "metadata": {}, "source": [ "Feel free to skip to the end of the notebook now and run `q5.check()` to complete the exercise. If you want to try more foods, \n", - "- try [these cookies](https://world.openfoodfacts.org/product/0069700118545/biscuits-au-sucre-pretraches). Here is [an image](https://imgur.com/2Bc271o) with all of the nutritional information.\n", - "- try [this soda](https://world-es.openfoodfacts.org/producto/0078000113464/orange-soda-sunkist). Here is [an image](https://imgur.com/RsBYa8E) with all of the nutritional information.\n", + "- try [these cookies](https://world.openfoodfacts.org/product/0069700118545/biscuits-au-sucre-pretraches). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/2Bc271o) with all of the nutritional information.\n", + "- try [this soda](https://world-es.openfoodfacts.org/producto/0078000113464/orange-soda-sunkist). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/RsBYa8E) with all of the nutritional information.\n", "\n", "Use the two code cells below for this." ] diff --git a/notebooks/intro_to_programming/raw/tut2.ipynb b/notebooks/intro_to_programming/raw/tut2.ipynb index bae6628c9..6be970ec2 100644 --- a/notebooks/intro_to_programming/raw/tut2.ipynb +++ b/notebooks/intro_to_programming/raw/tut2.ipynb @@ -65,7 +65,7 @@ "source": [ "Every function is composed of two pieces: a **header** and **body**.\n", "\n", - "![](https://i.imgur.com/gu0AWhK.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/gu0AWhK.png)\n", "\n", "## Header\n", "The function **header** defines the name of the function and its argument(s). \n", @@ -133,7 +133,7 @@ "tags": [] }, "source": [ - "![](https://i.imgur.com/hlUbxQE.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/hlUbxQE.png)\n", "\n", "In more detail,\n", "- `add_three(10)` is the value that we get as output when we supply `10` as the value for `input_var` and call the `add_three()` function. When the function runs, it runs all of the code in its body, from top to bottom:\n", diff --git a/notebooks/machine_learning/raw/ex7.ipynb b/notebooks/machine_learning/raw/ex7.ipynb index de19923f7..a45a7d907 100644 --- a/notebooks/machine_learning/raw/ex7.ipynb +++ b/notebooks/machine_learning/raw/ex7.ipynb @@ -187,7 +187,7 @@ "\n", "To test your results, you'll need to join the competition (if you haven't already). So open a new window by clicking on **[this link](https://www.kaggle.com/c/home-data-for-ml-course)**. Then click on the **Join Competition** button.\n", "\n", - "![join competition image](https://i.imgur.com/axBzctl.png)\n", + "![join competition image](https://storage.googleapis.com/kaggle-media/learn/images/axBzctl.png)\n", "\n", "Next, follow the instructions below:\n", "#$SUBMIT_TO_COMP$\n", diff --git a/notebooks/machine_learning/raw/ex_automl.ipynb b/notebooks/machine_learning/raw/ex_automl.ipynb index f5367fd81..4df239a62 100644 --- a/notebooks/machine_learning/raw/ex_automl.ipynb +++ b/notebooks/machine_learning/raw/ex_automl.ipynb @@ -24,7 +24,7 @@ "To begin, we'll need to make sure that your notebook is set up to run the code. Begin by looking at the **\"Settings\"** menu to the right of your notebook. Your menu will look like one of the following:\n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "If your **\"Internet\"** setting appears as a **\"Requires phone verification\"** link, click on this link. This will bring you to a new window; then, follow the instructions to verify your account. After following this step, your **\"Internet\"** setting will appear **\"Off\"**, as in the example to the right.\n", @@ -32,7 +32,7 @@ "Once your **\"Internet\"** setting appears as **\"Off\"**, click to turn it on. You'll see a pop-up window that you'll need to **\"Accept\"** in order to complete the process and have the setting switched to **\"On\"**. \n", "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "Once you have followed the steps above, you're ready to proceed!" @@ -49,11 +49,11 @@ "\n", "Then, connect your Google Cloud account to this notebook by selecting **Add-ons > Google Cloud Services**.\n", "\n", - "![](https://i.imgur.com/UHB4P5o.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/UHB4P5o.png)\n", "\n", "In the pop-up window, select **Cloud Storage** and **AutoML (beta)**. Then click on **Link Account**.\n", "\n", - "![](https://i.imgur.com/IlbdbHD.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/IlbdbHD.png)\n", "\n", "You'll see another pop-up that tells you about Google AutoML pricing. Once you have reviewed this information, click on **ENABLE**. Then, sign in with the e-mail address that is linked to your Google Cloud account. \n", "\n", diff --git a/notebooks/machine_learning/raw/tut1.ipynb b/notebooks/machine_learning/raw/tut1.ipynb index 8943369e7..728643ac6 100644 --- a/notebooks/machine_learning/raw/tut1.ipynb +++ b/notebooks/machine_learning/raw/tut1.ipynb @@ -17,7 +17,7 @@ "\n", "For simplicity, we'll start with the simplest possible decision tree. \n", "\n", - "![First Decision Trees](http://i.imgur.com/7tsb5b1.png)\n", + "![First Decision Trees](https://storage.googleapis.com/kaggle-media/learn/images/7tsb5b1.png)\n", "\n", "It divides houses into only two categories. The predicted price for any house under consideration is the historical average price of houses in the same category.\n", "\n", @@ -29,13 +29,13 @@ "# Improving the Decision Tree\n", "Which of the following two decision trees is more likely to result from fitting the real estate training data?\n", "\n", - "![First Decision Trees](http://i.imgur.com/prAjgku.png)\n", + "![First Decision Trees](https://storage.googleapis.com/kaggle-media/learn/images/prAjgku.png)\n", "\n", "\n", "The decision tree on the left (Decision Tree 1) probably makes more sense, because it captures the reality that houses with more bedrooms tend to sell at higher prices than houses with fewer bedrooms. The biggest shortcoming of this model is that it doesn't capture most factors affecting home price, like number of bathrooms, lot size, location, etc. \n", "\n", "You can capture more factors using a tree that has more \"splits.\" These are called \"deeper\" trees. A decision tree that also considers the total size of each house's lot might look like this: \n", - "![Depth 2 Tree](http://i.imgur.com/R3ywQsR.png)\n", + "![Depth 2 Tree](https://storage.googleapis.com/kaggle-media/learn/images/R3ywQsR.png)\n", "\n", "You predict the price of any house by tracing through the decision tree, always picking the path corresponding to that house's characteristics. The predicted price for the house is at the bottom of the tree. The point at the bottom where we make a prediction is called a **leaf.** \n", "\n", diff --git a/notebooks/machine_learning/raw/tut5.ipynb b/notebooks/machine_learning/raw/tut5.ipynb index 308a633a0..202a8627b 100644 --- a/notebooks/machine_learning/raw/tut5.ipynb +++ b/notebooks/machine_learning/raw/tut5.ipynb @@ -12,7 +12,7 @@ "\n", "You can see in scikit-learn's [documentation](http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html) that the decision tree model has many options (more than you'll want or need for a long time). The most important options determine the tree's depth. Recall from [the first lesson in this course](https://www.kaggle.com/dansbecker/how-models-work) that a tree's depth is a measure of how many splits it makes before coming to a prediction. This is a relatively shallow tree\n", "\n", - "![Depth 2 Tree](http://i.imgur.com/R3ywQsR.png)\n", + "![Depth 2 Tree](https://storage.googleapis.com/kaggle-media/learn/images/R3ywQsR.png)\n", "\n", "In practice, it's not uncommon for a tree to have 10 splits between the top level (all houses) and a leaf. As the tree gets deeper, the dataset gets sliced up into leaves with fewer houses. If a tree only had 1 split, it divides the data into 2 groups. If each group is split again, we would get 4 groups of houses. Splitting each of those again would create 8 groups. If we keep doubling the number of groups by adding more splits at each level, we'll have \\\\(2^{10}\\\\) groups of houses by the time we get to the 10th level. That's 1024 leaves. \n", "\n", @@ -24,7 +24,7 @@ "\n", "Since we care about accuracy on new data, which we estimate from our validation data, we want to find the sweet spot between underfitting and overfitting. Visually, we want the low point of the (red) validation curve in the figure below.\n", "\n", - "![underfitting_overfitting](http://i.imgur.com/AXSEOfI.png)\n", + "![underfitting_overfitting](https://storage.googleapis.com/kaggle-media/learn/images/AXSEOfI.png)\n", "\n", "# Example\n", "There are a few alternatives for controlling the tree depth, and many allow for some routes through the tree to have greater depth than other routes. But the *max_leaf_nodes* argument provides a very sensible way to control overfitting vs underfitting. The more leaves we allow the model to make, the more we move from the underfitting area in the above graph to the overfitting area.\n", diff --git a/notebooks/machine_learning/raw/tut7.ipynb b/notebooks/machine_learning/raw/tut7.ipynb index dd67b50f5..dc657704e 100644 --- a/notebooks/machine_learning/raw/tut7.ipynb +++ b/notebooks/machine_learning/raw/tut7.ipynb @@ -8,7 +8,7 @@ "\n", "In the next exercise, you will create and submit predictions for the [House Prices Competition for Kaggle Learn Users](https://www.kaggle.com/c/home-data-for-ml-course). \n", "\n", - "![join competition image](https://i.imgur.com/axBzctl.png)\n", + "![join competition image](https://storage.googleapis.com/kaggle-media/learn/images/axBzctl.png)\n", "\n", "# Your Turn \n", "Use what you've learned in the course to **[create a submission](#$NEXT_NOTEBOOK_URL$)** to a Kaggle competition!" diff --git a/notebooks/machine_learning/raw/tut8.ipynb b/notebooks/machine_learning/raw/tut8.ipynb index 88efb5155..6d5aa31bc 100644 --- a/notebooks/machine_learning/raw/tut8.ipynb +++ b/notebooks/machine_learning/raw/tut8.ipynb @@ -413,7 +413,7 @@ "source": [ "It's a little easier to understand as a nice little figure like so:\n", "\n", - "\"example\n", + "\"example\n", "\n", "The rows of the confusion matrix are the true class and the columns are the predicted class. The diagonal tells us how many of each class the model predicted correctly. The off-diagonals show where the model is making wrong predictions, where it is \"confused.\"" ] diff --git a/notebooks/machine_learning/raw/tut_automl.ipynb b/notebooks/machine_learning/raw/tut_automl.ipynb index b14da435b..ccd78433a 100644 --- a/notebooks/machine_learning/raw/tut_automl.ipynb +++ b/notebooks/machine_learning/raw/tut_automl.ipynb @@ -8,7 +8,7 @@ "\n", "When applying machine learning to real-world data, there are a lot of steps involved in the process -- starting with collecting the data and ending with generating predictions. (*We work with the seven steps of machine learning, as defined by Yufeng Guo **[here](https://towardsdatascience.com/the-7-steps-of-machine-learning-2877d7e5548e)**.*)\n", "\n", - "![](https://i.imgur.com/mqTCqBR.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/mqTCqBR.png)\n", "\n", "It all begins with **Step 1: Gather the data**. In industry, there are important considerations you need to take into account when building a dataset, such as **[target leakage](https://www.kaggle.com/alexisbcook/data-leakage)**. When participating in a Kaggle competition, this step is already completed for you.\n", "\n", @@ -32,7 +32,7 @@ "\n", "In this notebook, you'll learn how to use [**Google Cloud AutoML Tables**](https://cloud.google.com/automl-tables/docs/beginners-guide) to automate the machine learning process. While Kaggle has already taken care of the data collection, AutoML Tables will take care of all remaining steps.\n", "\n", - "![](https://i.imgur.com/5SekA3O.png)" + "![](https://storage.googleapis.com/kaggle-media/learn/images/5SekA3O.png)" ] }, { diff --git a/notebooks/machine_learning/raw/tut_titanic.ipynb b/notebooks/machine_learning/raw/tut_titanic.ipynb index 7c403cf74..45af790a1 100644 --- a/notebooks/machine_learning/raw/tut_titanic.ipynb +++ b/notebooks/machine_learning/raw/tut_titanic.ipynb @@ -16,7 +16,7 @@ "\n", "The first thing to do is to join the competition! Open a new window with **[the competition page](https://www.kaggle.com/c/titanic)**, and click on the **\"Join Competition\"** button, if you haven't already. (_If you see a \"Submit Predictions\" button instead of a \"Join Competition\" button, you have already joined the competition, and don't need to do so again._)\n", "\n", - "![](https://i.imgur.com/rRFchA8.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/rRFchA8.png)\n", "\n", "This takes you to the rules acceptance page. You must accept the competition rules in order to participate. These rules govern how many submissions you can make per day, the maximum team size, and other competition-specific details. Then, click on **\"I Understand and Accept\"** to indicate that you will abide by the competition rules.\n", "\n", @@ -28,7 +28,7 @@ "\n", "To take a look at the competition data, click on the **Data tab** at the top of the competition page. Then, scroll down to find the list of files. \n", "\n", - "![](https://i.imgur.com/LiM3JA7.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/LiM3JA7.png)\n", "\n", "There are three files in the data: (1) **train.csv**, (2) **test.csv**, and (3) **gender_submission.csv**.\n", "\n", @@ -36,11 +36,11 @@ "\n", "**train.csv** contains the details of a subset of the passengers on board (891 passengers, to be exact -- where each passenger gets a different row in the table). To investigate this data, click on the name of the file under the **\"Data Sources\"** column (on the left of the screen). Once you've done this, all of the column names (along with a brief description of what they contain) are listed to the right of the screen, under the **\"Columns\"** heading. \n", "\n", - "![](https://i.imgur.com/w5HFxp8.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/w5HFxp8.png)\n", "\n", "You can view all of the data in the same window. \n", "\n", - "![](https://i.imgur.com/CEPZi6z.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/CEPZi6z.png)\n", "\n", "The values in the second column (**\"Survived\"**) can be used to determine whether each passenger survived or not: \n", "- if it's a \"1\", the passenger survived.\n", @@ -64,7 +64,7 @@ "\n", "As a benchmark, you'll download the **gender_submission.csv** file and submit it to the competition. Begin by clicking on the download link to the right of the name of the file. \n", "\n", - "![](https://i.imgur.com/Pl1DIA8.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/Pl1DIA8.png)\n", "\n", "This downloads the file to your computer. Then:\n", "- Click on the blue **\"Submit Predictions\"** button in the top right corner of the competition page. (_This button now appears where the **\"Join Competition\"** button was._)\n", @@ -87,25 +87,25 @@ "\n", "Begin by clicking on the **Notebooks tab** on the competition page. Then, click on **\"New Notebook\"**.\n", "\n", - "![](https://i.imgur.com/DHPyh7s.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/DHPyh7s.png)\n", "\n", "Next, click on **\"Create\"**. (_Don't change the default settings: so, **\"Python\"** should appear under \"Select language\", and you should have **\"Notebook\"** selected under \"Select type\"._)\n", "\n", - "![](https://i.imgur.com/qUVvr8k.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/qUVvr8k.png)\n", "\n", "Your notebook will take a few seconds to load. In the top left corner, you can see the name of your notebook -- something like **\"kernel2daed3cd79\"**.\n", "\n", - "![](https://i.imgur.com/64ZFT1L.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/64ZFT1L.png)\n", "\n", "You can edit the name by clicking on it. Change it to something more descriptive, like **\"Getting Started with Titanic\"**. \n", "\n", - "![](https://i.imgur.com/uwyvzXq.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/uwyvzXq.png)\n", "\n", "## Your first lines of code\n", "\n", "When you start a new notebook, it has two gray boxes for storing code. We refer to these gray boxes as \"code cells\".\n", "\n", - "![](https://i.imgur.com/q9mwkZM.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/q9mwkZM.png)\n", "\n", "The first code cell already has some code in it. To run this code, put your cursor in the code cell. (_If your cursor is in the right place, you'll notice a blue vertical line to the left of the gray box._) Then, either hit the play button (which appears to the left of the blue line), or hit **[Shift] + [Enter]** on your keyboard.\n", "\n", @@ -148,7 +148,7 @@ "\n", "The second code cell in your notebook now appears below the three lines of output with the file locations.\n", "\n", - "![](https://i.imgur.com/OQBax9n.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/OQBax9n.png)\n", "\n", "Type the two lines of code below into your second code cell. Then, once you're done, either click on the blue play button, or hit **[Shift] + [Enter]**. " ] @@ -174,7 +174,7 @@ "> If you're not already familiar with Python (and pandas), the code shouldn't make sense to you -- but don't worry! The point of this tutorial is to (quickly!) make your first submission to the competition. At the end of the tutorial, we suggest resources to continue your learning.\n", "\n", "At this point, you should have at least three code cells in your notebook. \n", - "![](https://i.imgur.com/ReLhYca.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/ReLhYca.png)\n", "\n", "Copy the code below into the third code cell of your notebook to load the contents of the **test.csv** file. Don't forget to click on the play button (or hit **[Shift] + [Enter]**)!" ] @@ -261,7 +261,7 @@ "\n", "We'll build a [**random forest model**](https://www.kaggle.com/dansbecker/random-forests). This model is constructed of several \"trees\" (there are three trees in the picture below, but we'll construct 100!) that will individually consider each passenger's data and vote on whether the individual survived. Then, the random forest model makes a democratic decision: the outcome with the most votes wins!\n", "\n", - "![](https://i.imgur.com/AC9Bq63.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/AC9Bq63.png)\n", "\n", "The code cell below looks for patterns in four different columns (**\"Pclass\"**, **\"Sex\"**, **\"SibSp\"**, and **\"Parch\"**) of the data. It constructs the trees in the random forest model based on patterns in the **train.csv** file, before generating predictions for the passengers in **test.csv**. The code also saves these new predictions in a CSV file **my_submission.csv**.\n", "\n", @@ -305,7 +305,7 @@ "- This generates a window in the bottom left corner of the notebook. After it has finished running, click on the number to the right of the **\"Save Version\"** button. This pulls up a list of versions on the right of the screen. Click on the ellipsis **(...)** to the right of the most recent version, and select **Open in Viewer**. \n", "- Click on the **Output** tab on the right of the screen. Then, click on the **\"Submit to Competition\"** button to submit your results.\n", "\n", - "![](https://i.imgur.com/kKKnHpx.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/kKKnHpx.png)\n", "\n", "Once your file is successfully submitted, you should receive a message saying that you've moved up the leaderboard. Great work!" ] diff --git a/notebooks/microchallenges/raw/tut2.ipynb b/notebooks/microchallenges/raw/tut2.ipynb index 155aa5179..a31641979 100644 --- a/notebooks/microchallenges/raw/tut2.ipynb +++ b/notebooks/microchallenges/raw/tut2.ipynb @@ -8,7 +8,7 @@ "\n", "Optimization gets less attention than it deserves. So this micro-challenge will test your optimization skills as you write a function to improve how airlines set prices.\n", "\n", - "![Imgur](https://i.imgur.com/AKrbLMR.jpg)\n", + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/AKrbLMR.jpg)\n", "\n", "# Your turn\n", "\n", diff --git a/notebooks/ml_explainability/raw/ex5_shap_advanced.ipynb b/notebooks/ml_explainability/raw/ex5_shap_advanced.ipynb index c17c1e5d8..a09923023 100644 --- a/notebooks/ml_explainability/raw/ex5_shap_advanced.ipynb +++ b/notebooks/ml_explainability/raw/ex5_shap_advanced.ipynb @@ -225,7 +225,7 @@ "\n", "The x-axis shows `feature_of_interest` and the points are colored based on `other_feature`.\n", "\n", - "![Imgur](https://i.imgur.com/zFdHneM.png)\n", + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/zFdHneM.png)\n", "\n", "Is there an interaction between `feature_of_interest` and `other_feature`? \n", "If so, does `feature_of_interest` have a more positive impact on predictions when `other_feature` is high or when `other_feature` is low?\n", diff --git a/notebooks/ml_explainability/raw/tut2_perm_importance.ipynb b/notebooks/ml_explainability/raw/tut2_perm_importance.ipynb index dccfdee1d..5a8a7139c 100644 --- a/notebooks/ml_explainability/raw/tut2_perm_importance.ipynb +++ b/notebooks/ml_explainability/raw/tut2_perm_importance.ipynb @@ -24,7 +24,7 @@ "\n", "Consider data with the following format:\n", "\n", - "![Data](https://i.imgur.com/wjMAysV.png)\n", + "![Data](https://storage.googleapis.com/kaggle-media/learn/images/wjMAysV.png)\n", "\n", "We want to predict a person's height when they become 20 years old, using data that is available at age 10.\n", "\n", @@ -34,7 +34,7 @@ "\n", "Instead we will ask the following question: If I randomly shuffle a single column of the validation data, leaving the target and all other columns in place, how would that affect the accuracy of predictions in that now-shuffled data?\n", "\n", - "![Shuffle](https://i.imgur.com/h17tMUU.png)\n", + "![Shuffle](https://storage.googleapis.com/kaggle-media/learn/images/h17tMUU.png)\n", "\n", "Randomly re-ordering a single column should cause less accurate predictions, since the resulting data no longer corresponds to anything observed in the real world. Model accuracy especially suffers if we shuffle a column that the model relied on heavily for predictions. In this case, shuffling `height at age 10` would cause terrible predictions. If we shuffled `socks owned` instead, the resulting predictions wouldn't suffer nearly as much.\n", "\n", diff --git a/notebooks/ml_explainability/raw/tut4_shap_basic.ipynb b/notebooks/ml_explainability/raw/tut4_shap_basic.ipynb index 85407a730..06b2ea122 100644 --- a/notebooks/ml_explainability/raw/tut4_shap_basic.ipynb +++ b/notebooks/ml_explainability/raw/tut4_shap_basic.ipynb @@ -37,9 +37,9 @@ "\n", "That is, the SHAP values of all features sum up to explain why my prediction was different from the baseline. This allows us to decompose a prediction in a graph like this:\n", "\n", - "![Imgur](https://i.imgur.com/JVD2U7k.png)\n", + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/JVD2U7k.png)\n", "\n", - "*If you want a larger view of this graph, [here is a link](https://i.imgur.com/JVD2U7k.png)*" + "*If you want a larger view of this graph, [here is a link](https://storage.googleapis.com/kaggle-media/learn/images/JVD2U7k.png)*" ] }, { diff --git a/notebooks/ml_explainability/raw/tut5_shap_advanced.ipynb b/notebooks/ml_explainability/raw/tut5_shap_advanced.ipynb index c580360a4..08683ec19 100644 --- a/notebooks/ml_explainability/raw/tut5_shap_advanced.ipynb +++ b/notebooks/ml_explainability/raw/tut5_shap_advanced.ipynb @@ -22,9 +22,9 @@ "\n", "These are harder to calculate with the sophisticated models we use in practice. But through some algorithmic cleverness, Shap values allow us to decompose any prediction into the sum of effects of each feature value, yielding a graph like this:\n", "\n", - "![Imgur](https://i.imgur.com/JVD2U7k.png)\n", + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/JVD2U7k.png)\n", "\n", - "[Link to larger view](https://i.imgur.com/JVD2U7k.png)*\n", + "[Link to larger view](https://storage.googleapis.com/kaggle-media/learn/images/JVD2U7k.png)*\n", "\n", "In addition to this nice breakdown for each prediction, the [Shap library](https://github.com/slundberg/shap) offers great visualizations of groups of Shap values. We will focus on two of these visualizations. These visualizations have conceptual similarities to permutation importance and partial dependence plots. So multiple threads from the previous exercises will come together here.\n", "\n", @@ -38,7 +38,7 @@ "\n", "SHAP summary plots give us a birds-eye view of feature importance and what is driving it. We'll walk through an example plot for the soccer data:\n", "\n", - "![Imgur](https://i.imgur.com/Ew9X3su.png)\n", + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/Ew9X3su.png)\n", "\n", "This plot is made of many dots. Each dot has three characteristics:\n", "- Vertical location shows what feature it is depicting\n", @@ -126,13 +126,13 @@ "\n", "But there's a lot they don't show. For instance, what is the distribution of effects? Is the effect of having a certain value pretty constant, or does it vary a lot depending on the values of other feaures. SHAP dependence contribution plots provide a similar insight to PDP's, but they add a lot more detail.\n", "\n", - "![Imgur](https://i.imgur.com/uQ2JmBm.png)\n", + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/uQ2JmBm.png)\n", "\n", "Start by focusing on the shape, and we'll come back to color in a minute. Each dot represents a row of the data. The horizontal location is the actual value from the dataset, and the vertical location shows what having that value did to the prediction. The fact this slopes upward says that the more you possess the ball, the higher the model's prediction is for winning the *Man of the Match* award.\n", "\n", "The spread suggests that other features must interact with Ball Possession %. For example, here we have highlighted two points with similar ball possession values. That value caused one prediction to increase, and it caused the other prediction to decrease.\n", "\n", - "![Imgur](https://i.imgur.com/tFzp6jc.png)\n", + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/tFzp6jc.png)\n", "\n", "For comparison, a simple linear regression would produce plots that are perfect lines, without this spread.\n", "\n", @@ -140,7 +140,7 @@ "\n", "Consider the following very narrow example for concreteness.\n", "\n", - "![Imgur](https://i.imgur.com/NVB3eNW.png)\n", + "![Imgur](https://storage.googleapis.com/kaggle-media/learn/images/NVB3eNW.png)\n", "\n", "These two points stand out spatially as being far away from the upward trend. They are both colored purple, indicating the team scored one goal. You can interpret this to say **In general, having the ball increases a team's chance of having their player win the award. But if they only score one goal, that trend reverses and the award judges may penalize them for having the ball so much if they score that little.**\n", "\n", diff --git a/notebooks/ml_intermediate/raw/ex1.ipynb b/notebooks/ml_intermediate/raw/ex1.ipynb index bf5451faf..7c25185ea 100644 --- a/notebooks/ml_intermediate/raw/ex1.ipynb +++ b/notebooks/ml_intermediate/raw/ex1.ipynb @@ -34,7 +34,7 @@ "source": [ "You will work with data from the [Housing Prices Competition for Kaggle Learn Users](https://www.kaggle.com/c/home-data-for-ml-course) to predict home prices in Iowa using 79 explanatory variables describing (almost) every aspect of the homes. \n", "\n", - "![Ames Housing dataset image](https://i.imgur.com/lTJVG4e.png)\n", + "![Ames Housing dataset image](https://storage.googleapis.com/kaggle-media/learn/images/lTJVG4e.png)\n", "\n", "Run the next code cell without changes to load the training and validation features in `X_train` and `X_valid`, along with the prediction targets in `y_train` and `y_valid`. The test features are loaded in `X_test`. (_If you need to review **features** and **prediction targets**, please check out [this short tutorial](https://www.kaggle.com/dansbecker/your-first-machine-learning-model). To read about model **validation**, look [here](https://www.kaggle.com/dansbecker/model-validation). Alternatively, if you'd prefer to look through a full course to review all of these topics, start [here](https://www.kaggle.com/learn/machine-learning).)_" ] diff --git a/notebooks/ml_intermediate/raw/ex2.ipynb b/notebooks/ml_intermediate/raw/ex2.ipynb index c5739e765..72a56f187 100644 --- a/notebooks/ml_intermediate/raw/ex2.ipynb +++ b/notebooks/ml_intermediate/raw/ex2.ipynb @@ -34,7 +34,7 @@ "source": [ "In this exercise, you will work with data from the [Housing Prices Competition for Kaggle Learn Users](https://www.kaggle.com/c/home-data-for-ml-course). \n", "\n", - "![Ames Housing dataset image](https://i.imgur.com/lTJVG4e.png)\n", + "![Ames Housing dataset image](https://storage.googleapis.com/kaggle-media/learn/images/lTJVG4e.png)\n", "\n", "Run the next code cell without changes to load the training and validation sets in `X_train`, `X_valid`, `y_train`, and `y_valid`. The test set is loaded in `X_test`." ] @@ -608,7 +608,7 @@ "\n", "First, you'll need to join the competition if you haven't already. So open a new window by clicking on [this link](https://www.kaggle.com/c/home-data-for-ml-course). Then click on the **Join Competition** button.\n", "\n", - "![join competition image](https://i.imgur.com/wLmFtH3.png)\n", + "![join competition image](https://storage.googleapis.com/kaggle-media/learn/images/wLmFtH3.png)\n", "\n", "Next, follow the instructions below:\n", "#$SUBMIT_TO_COMP$\n", diff --git a/notebooks/ml_intermediate/raw/ex3.ipynb b/notebooks/ml_intermediate/raw/ex3.ipynb index 5f2794f8c..ec1979b80 100644 --- a/notebooks/ml_intermediate/raw/ex3.ipynb +++ b/notebooks/ml_intermediate/raw/ex3.ipynb @@ -34,7 +34,7 @@ "source": [ "In this exercise, you will work with data from the [Housing Prices Competition for Kaggle Learn Users](https://www.kaggle.com/c/home-data-for-ml-course). \n", "\n", - "![Ames Housing dataset image](https://i.imgur.com/lTJVG4e.png)\n", + "![Ames Housing dataset image](https://storage.googleapis.com/kaggle-media/learn/images/lTJVG4e.png)\n", "\n", "Run the next code cell without changes to load the training and validation sets in `X_train`, `X_valid`, `y_train`, and `y_valid`. The test set is loaded in `X_test`." ] diff --git a/notebooks/ml_intermediate/raw/ex4.ipynb b/notebooks/ml_intermediate/raw/ex4.ipynb index 36306137c..6083901e6 100644 --- a/notebooks/ml_intermediate/raw/ex4.ipynb +++ b/notebooks/ml_intermediate/raw/ex4.ipynb @@ -34,7 +34,7 @@ "source": [ "You will work with data from the [Housing Prices Competition for Kaggle Learn Users](https://www.kaggle.com/c/home-data-for-ml-course). \n", "\n", - "![Ames Housing dataset image](https://i.imgur.com/lTJVG4e.png)\n", + "![Ames Housing dataset image](https://storage.googleapis.com/kaggle-media/learn/images/lTJVG4e.png)\n", "\n", "Run the next code cell without changes to load the training and validation sets in `X_train`, `X_valid`, `y_train`, and `y_valid`. The test set is loaded in `X_test`." ] diff --git a/notebooks/ml_intermediate/raw/ex5.ipynb b/notebooks/ml_intermediate/raw/ex5.ipynb index e423597fd..95f4ad5f1 100644 --- a/notebooks/ml_intermediate/raw/ex5.ipynb +++ b/notebooks/ml_intermediate/raw/ex5.ipynb @@ -34,7 +34,7 @@ "source": [ "You will work with the [Housing Prices Competition for Kaggle Learn Users](https://www.kaggle.com/c/home-data-for-ml-course) from the previous exercise. \n", "\n", - "![Ames Housing dataset image](https://i.imgur.com/lTJVG4e.png)\n", + "![Ames Housing dataset image](https://storage.googleapis.com/kaggle-media/learn/images/lTJVG4e.png)\n", "\n", "Run the next code cell without changes to load the training and test data in `X` and `X_test`. For simplicity, we drop categorical variables." ] diff --git a/notebooks/ml_intermediate/raw/ex6.ipynb b/notebooks/ml_intermediate/raw/ex6.ipynb index 7c157a0fa..2d4cc0786 100644 --- a/notebooks/ml_intermediate/raw/ex6.ipynb +++ b/notebooks/ml_intermediate/raw/ex6.ipynb @@ -34,7 +34,7 @@ "source": [ "You will work with the [Housing Prices Competition for Kaggle Learn Users](https://www.kaggle.com/c/home-data-for-ml-course) dataset from the previous exercise. \n", "\n", - "![Ames Housing dataset image](https://i.imgur.com/lTJVG4e.png)\n", + "![Ames Housing dataset image](https://storage.googleapis.com/kaggle-media/learn/images/lTJVG4e.png)\n", "\n", "Run the next code cell without changes to load the training and validation sets in `X_train`, `X_valid`, `y_train`, and `y_valid`. The test set is loaded in `X_test`." ] diff --git a/notebooks/ml_intermediate/raw/tut2.ipynb b/notebooks/ml_intermediate/raw/tut2.ipynb index 5105ff150..a605fd2bd 100644 --- a/notebooks/ml_intermediate/raw/tut2.ipynb +++ b/notebooks/ml_intermediate/raw/tut2.ipynb @@ -26,7 +26,7 @@ "\n", "The simplest option is to drop columns with missing values. \n", "\n", - "![tut2_approach1](https://i.imgur.com/Sax80za.png)\n", + "![tut2_approach1](https://storage.googleapis.com/kaggle-media/learn/images/Sax80za.png)\n", "\n", "Unless most values in the dropped columns are missing, the model loses access to a lot of (potentially useful!) information with this approach. As an extreme example, consider a dataset with 10,000 rows, where one important column is missing a single entry. This approach would drop the column entirely!\n", "\n", @@ -34,7 +34,7 @@ "\n", "**Imputation** fills in the missing values with some number. For instance, we can fill in the mean value along each column. \n", "\n", - "![tut2_approach2](https://i.imgur.com/4BpnlPA.png)\n", + "![tut2_approach2](https://storage.googleapis.com/kaggle-media/learn/images/4BpnlPA.png)\n", "\n", "The imputed value won't be exactly right in most cases, but it usually leads to more accurate models than you would get from dropping the column entirely.\n", "\n", @@ -42,7 +42,7 @@ "\n", "Imputation is the standard approach, and it usually works well. However, imputed values may be systematically above or below their actual values (which weren't collected in the dataset). Or rows with missing values may be unique in some other way. In that case, your model would make better predictions by considering which values were originally missing. \n", "\n", - "![tut3_approach3](https://i.imgur.com/UWOyg4a.png)\n", + "![tut3_approach3](https://storage.googleapis.com/kaggle-media/learn/images/UWOyg4a.png)\n", "\n", "In this approach, we impute the missing values, as before. And, additionally, for each column with missing entries in the original dataset, we add a new column that shows the location of the imputed entries.\n", "\n", diff --git a/notebooks/ml_intermediate/raw/tut3.ipynb b/notebooks/ml_intermediate/raw/tut3.ipynb index e1b8e316d..9410846f3 100644 --- a/notebooks/ml_intermediate/raw/tut3.ipynb +++ b/notebooks/ml_intermediate/raw/tut3.ipynb @@ -26,7 +26,7 @@ "\n", "**Ordinal encoding** assigns each unique value to a different integer.\n", "\n", - "![tut3_ordinalencode](https://i.imgur.com/tEogUAr.png)\n", + "![tut3_ordinalencode](https://storage.googleapis.com/kaggle-media/learn/images/tEogUAr.png)\n", "\n", "This approach assumes an ordering of the categories: \"Never\" (0) < \"Rarely\" (1) < \"Most days\" (2) < \"Every day\" (3).\n", "\n", @@ -36,7 +36,7 @@ "\n", "**One-hot encoding** creates new columns indicating the presence (or absence) of each possible value in the original data. To understand this, we'll work through an example.\n", "\n", - "![tut3_onehot](https://i.imgur.com/TW5m0aJ.png)\n", + "![tut3_onehot](https://storage.googleapis.com/kaggle-media/learn/images/TW5m0aJ.png)\n", "\n", "In the original dataset, \"Color\" is a categorical variable with three categories: \"Red\", \"Yellow\", and \"Green\". The corresponding one-hot encoding contains one column for each possible value, and one row for each row in the original dataset. Wherever the original value was \"Red\", we put a 1 in the \"Red\" column; if the original value was \"Yellow\", we put a 1 in the \"Yellow\" column, and so on. \n", "\n", diff --git a/notebooks/ml_intermediate/raw/tut5.ipynb b/notebooks/ml_intermediate/raw/tut5.ipynb index 25a4968ed..7951995f4 100644 --- a/notebooks/ml_intermediate/raw/tut5.ipynb +++ b/notebooks/ml_intermediate/raw/tut5.ipynb @@ -29,7 +29,7 @@ "\n", "For example, we could begin by dividing the data into 5 pieces, each 20% of the full dataset. In this case, we say that we have broken the data into 5 \"**folds**\". \n", "\n", - "![tut5_crossval](https://i.imgur.com/9k60cVA.png)\n", + "![tut5_crossval](https://storage.googleapis.com/kaggle-media/learn/images/9k60cVA.png)\n", "\n", "Then, we run one experiment for each fold:\n", "- In **Experiment 1**, we use the first fold as a validation (or holdout) set and everything else as training data. This gives us a measure of model quality based on a 20% holdout set. \n", diff --git a/notebooks/ml_intermediate/raw/tut6.ipynb b/notebooks/ml_intermediate/raw/tut6.ipynb index 7e5ed77c9..d18b6806f 100644 --- a/notebooks/ml_intermediate/raw/tut6.ipynb +++ b/notebooks/ml_intermediate/raw/tut6.ipynb @@ -27,7 +27,7 @@ "- Finally, we add the new model to ensemble, and ...\n", "- ... repeat!\n", "\n", - "![tut6_boosting](https://i.imgur.com/MvCGENh.png)\n", + "![tut6_boosting](https://storage.googleapis.com/kaggle-media/learn/images/MvCGENh.png)\n", "\n", "\n", "# Example\n", diff --git a/notebooks/ml_intermediate/raw/tut7.ipynb b/notebooks/ml_intermediate/raw/tut7.ipynb index 22fa01ef5..5616004dc 100644 --- a/notebooks/ml_intermediate/raw/tut7.ipynb +++ b/notebooks/ml_intermediate/raw/tut7.ipynb @@ -35,7 +35,7 @@ "\n", "To prevent this type of data leakage, any variable updated (or created) after the target value is realized should be excluded. \n", "\n", - "![tut7_leakydata](https://i.imgur.com/y7hfTYe.png)" + "![tut7_leakydata](https://storage.googleapis.com/kaggle-media/learn/images/y7hfTYe.png)" ] }, { diff --git a/notebooks/nlp/raw/ex1.ipynb b/notebooks/nlp/raw/ex1.ipynb index 4c2c827d8..a069492ed 100644 --- a/notebooks/nlp/raw/ex1.ipynb +++ b/notebooks/nlp/raw/ex1.ipynb @@ -9,7 +9,7 @@ "You're a consultant for [DelFalco's Italian Restaurant](https://defalcosdeli.com/index.html).\n", "The owner asked you to identify whether there are any foods on their menu that diners find disappointing. \n", "\n", - "\"Meatball\n", + "\"Meatball\n", "\n", "Before getting started, run the following cell to set up code checking." ] diff --git a/notebooks/pandas/raw/ex_0.ipynb b/notebooks/pandas/raw/ex_0.ipynb index 796cc2c09..120c492f5 100644 --- a/notebooks/pandas/raw/ex_0.ipynb +++ b/notebooks/pandas/raw/ex_0.ipynb @@ -39,7 +39,7 @@ "\n", "In the cell below, create a DataFrame `fruits` that looks like this:\n", "\n", - "![](https://i.imgur.com/Ax3pp2A.png)" + "![](https://storage.googleapis.com/kaggle-media/learn/images/Ax3pp2A.png)" ] }, { @@ -126,7 +126,7 @@ "\n", "Create a dataframe `fruit_sales` that matches the diagram below:\n", "\n", - "![](https://i.imgur.com/CHPn7ZF.png)" + "![](https://storage.googleapis.com/kaggle-media/learn/images/CHPn7ZF.png)" ] }, { @@ -273,7 +273,7 @@ "\n", "Read the following csv dataset of wine reviews into a DataFrame called `reviews`:\n", "\n", - "![](https://i.imgur.com/74RCZtU.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/74RCZtU.png)\n", "\n", "The filepath to the csv file is `../input/wine-reviews/winemag-data_first150k.csv`. The first few lines look like:\n", "\n", diff --git a/notebooks/pandas/raw/ex_1.ipynb b/notebooks/pandas/raw/ex_1.ipynb index 8891cdcf3..fd6953ebd 100644 --- a/notebooks/pandas/raw/ex_1.ipynb +++ b/notebooks/pandas/raw/ex_1.ipynb @@ -394,7 +394,7 @@ "\n", "In other words, generate the following DataFrame:\n", "\n", - "![](https://i.imgur.com/sHZvI1O.png)" + "![](https://storage.googleapis.com/kaggle-media/learn/images/sHZvI1O.png)" ] }, { @@ -456,7 +456,7 @@ "\n", "Create a variable `df` containing the `country`, `province`, `region_1`, and `region_2` columns of the records with the index labels `0`, `1`, `10`, and `100`. In other words, generate the following DataFrame:\n", "\n", - "![](https://i.imgur.com/FUCGiKP.png)" + "![](https://storage.googleapis.com/kaggle-media/learn/images/FUCGiKP.png)" ] }, { diff --git a/notebooks/sql/raw/ex6.ipynb b/notebooks/sql/raw/ex6.ipynb index 874a0931e..ad19658fe 100644 --- a/notebooks/sql/raw/ex6.ipynb +++ b/notebooks/sql/raw/ex6.ipynb @@ -182,7 +182,7 @@ "\n", "A **WHERE** clause can limit your results to rows with certain text using the **LIKE** feature. For example, to select just the third row of the `pets` table from the tutorial, we could use the query in the picture below.\n", "\n", - "![](https://i.imgur.com/RccsXBr.png) \n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/RccsXBr.png) \n", "\n", "You can also use `%` as a \"wildcard\" for any number of characters. So you can also get the third row with:\n", "\n", diff --git a/notebooks/sql/raw/tut1.ipynb b/notebooks/sql/raw/tut1.ipynb index 4e0dcc579..354d92af3 100644 --- a/notebooks/sql/raw/tut1.ipynb +++ b/notebooks/sql/raw/tut1.ipynb @@ -114,7 +114,7 @@ "source": [ "In the next section, you'll explore the contents of this table in more detail. For now, take the time to use the image below to consolidate what you've learned so far.\n", "\n", - "![first_commands](https://i.imgur.com/biYqbUB.png)" + "![first_commands](https://storage.googleapis.com/kaggle-media/learn/images/biYqbUB.png)" ] }, { diff --git a/notebooks/sql/raw/tut2.ipynb b/notebooks/sql/raw/tut2.ipynb index a60167d1d..d9863c59e 100644 --- a/notebooks/sql/raw/tut2.ipynb +++ b/notebooks/sql/raw/tut2.ipynb @@ -12,7 +12,7 @@ "\n", "For clarity, we'll work with a small imaginary dataset `pet_records` which contains just one table, called `pets`. \n", "\n", - "![](https://i.imgur.com/fI5Pvvp.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/fI5Pvvp.png)\n", "\n", "# SELECT ... FROM\n", "\n", @@ -22,7 +22,7 @@ "\n", "For instance, to select the `Name` column (from the `pets` table in the `pet_records` database in the `bigquery-public-data` project), our query would appear as follows: \n", "\n", - "![](https://i.imgur.com/c3GxYRt.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/c3GxYRt.png)\n", "\n", "Note that when writing an SQL query, the argument we pass to **FROM** is *not* in single or double quotation marks (' or \"). It is in backticks (\\`).\n", "\n", @@ -32,7 +32,7 @@ "\n", "The query below returns the entries from the `Name` column that are in rows where the `Animal` column has the text `'Cat'`. \n", "\n", - "![](https://i.imgur.com/HJOT8Kb.png)" + "![](https://storage.googleapis.com/kaggle-media/learn/images/HJOT8Kb.png)" ] }, { diff --git a/notebooks/sql/raw/tut3.ipynb b/notebooks/sql/raw/tut3.ipynb index 6733a9a42..a906d52e4 100644 --- a/notebooks/sql/raw/tut3.ipynb +++ b/notebooks/sql/raw/tut3.ipynb @@ -13,7 +13,7 @@ "\n", "To do this, you'll learn about three new techniques: **GROUP BY**, **HAVING** and **COUNT()**. Once again, we'll use this made-up table of information on pets. \n", "\n", - "![](https://i.imgur.com/fI5Pvvp.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/fI5Pvvp.png)\n", "\n", "# COUNT()\n", "\n", @@ -21,7 +21,7 @@ "\n", "For instance, if we **SELECT** the **COUNT()** of the `ID` column in the `pets` table, it will return 4, because there are 4 ID's in the table.\n", "\n", - "![](https://i.imgur.com/Eu5HkXq.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/Eu5HkXq.png)\n", "\n", "**COUNT()** is an example of an **aggregate function**, which takes many values and returns one. (Other examples of aggregate functions include **SUM()**, **AVG()**, **MIN()**, and **MAX()**.) As you'll notice in the picture above, aggregate functions introduce strange column names (like `f0__`). Later in this tutorial, you'll learn how to change the name to something more descriptive.\n", " \n", @@ -32,7 +32,7 @@ "\n", "For example, say we want to know how many of each type of animal we have in the `pets` table. We can use **GROUP BY** to group together rows that have the same value in the `Animal` column, while using **COUNT()** to find out how many ID's we have in each group. \n", "\n", - "![](https://i.imgur.com/tqE9Eh8.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/tqE9Eh8.png)\n", "\n", "It returns a table with three rows (one for each distinct animal). We can see that the `pets` table contains 1 rabbit, 1 dog, and 2 cats.\n", "\n", @@ -42,7 +42,7 @@ "\n", "So this query, for example, will only include groups that have more than one ID in them.\n", "\n", - "![](https://i.imgur.com/2ImXfHQ.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/2ImXfHQ.png)\n", "\n", "Since only one group meets the specified criterion, the query will return a table with only one row. \n", "\n", diff --git a/notebooks/sql/raw/tut4.ipynb b/notebooks/sql/raw/tut4.ipynb index 8a890c249..6f69cc501 100644 --- a/notebooks/sql/raw/tut4.ipynb +++ b/notebooks/sql/raw/tut4.ipynb @@ -10,7 +10,7 @@ "\n", "Now you'll learn how to change the order of your results using the **ORDER BY** clause, and you'll explore a popular use case by applying ordering to dates. To illustrate what you'll learn in this tutorial, we'll work with a slightly modified version of our familiar `pets` table.\n", "\n", - "![](https://i.imgur.com/b99zTLv.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/b99zTLv.png)\n", "\n", "# ORDER BY\n", "\n", @@ -18,15 +18,15 @@ "\n", "Notice that the rows are not ordered by the `ID` column. We can quickly remedy this with the query below.\n", "\n", - "![](https://i.imgur.com/6o9LuTA.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/6o9LuTA.png)\n", "\n", "The **ORDER BY** clause also works for columns containing text, where the results show up in alphabetical order.\n", "\n", - "![](https://i.imgur.com/ooxuzw3.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/ooxuzw3.png)\n", "\n", "You can reverse the order using the **DESC** argument (short for 'descending'). The next query sorts the table by the `Animal` column, where the values that are last in alphabetic order are returned first.\n", "\n", - "![](https://i.imgur.com/IElLJrR.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/IElLJrR.png)\n", " \n", "# Dates\n", "\n", @@ -50,15 +50,15 @@ "\n", "Often you'll want to look at part of a date, like the year or the day. You can do this with **EXTRACT**. We'll illustrate this with a slightly different table, called `pets_with_date`.\n", "\n", - "![](https://i.imgur.com/vhvHIh0.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/vhvHIh0.png)\n", "\n", "The query below returns two columns, where column `Day` contains the day corresponding to each entry the `Date` column from the `pets_with_date` table: \n", " \n", - "![](https://i.imgur.com/PhoWBO0.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/PhoWBO0.png)\n", "\n", "SQL is very smart about dates, and we can ask for information beyond just extracting part of the cell. For example, this query returns one column with just the week in the year (between 1 and 53) for each date in the `Date` column: \n", "\n", - "![](https://i.imgur.com/A5hqGxY.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/A5hqGxY.png)\n", "\n", "You can find all the functions you can use with dates in BigQuery in [this documentation](https://cloud.google.com/bigquery/docs/reference/legacy-sql#datetimefunctions) under \"Date and time functions\". " ] diff --git a/notebooks/sql/raw/tut5.ipynb b/notebooks/sql/raw/tut5.ipynb index 15c32ec9c..8fceced83 100644 --- a/notebooks/sql/raw/tut5.ipynb +++ b/notebooks/sql/raw/tut5.ipynb @@ -12,7 +12,7 @@ "\n", "Along the way, we'll use the familiar `pets` table, but now it includes the ages of the animals. \n", "\n", - "![](https://i.imgur.com/MXrsiAZ.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/MXrsiAZ.png)\n", "\n", "# AS\n", "\n", @@ -20,11 +20,11 @@ "\n", "To use **AS** in SQL, insert it right after the column you select. Here's an example of a query _without_ an **AS** clause:\n", "\n", - "![](https://i.imgur.com/VelX9tP.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/VelX9tP.png)\n", "\n", "And here's an example of the same query, but _with_ **AS**.\n", "\n", - "![](https://i.imgur.com/teF84tU.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/teF84tU.png)\n", "\n", "These queries return the same information, but in the second query the column returned by the **COUNT()** function will be called `Number`, rather than the default name of `f0__`.\n", "\n", @@ -36,13 +36,13 @@ "\n", "For instance, you might want to use the `pets` table to ask questions about older animals in particular. So you can start by creating a CTE which only contains information about animals more than five years old like this:\n", "\n", - "![](https://i.imgur.com/0Kz8q4x.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/0Kz8q4x.png)\n", " \n", "While this incomplete query above won't return anything, it creates a CTE that we can then refer to (as `Seniors`) while writing the rest of the query. \n", "\n", "We can finish the query by pulling the information that we want from the CTE. The complete query below first creates the CTE, and then returns all of the IDs from it. \n", "\n", - "![](https://i.imgur.com/3xQZM4p.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/3xQZM4p.png)\n", " \n", "You could do this without a CTE, but if this were the first part of a very long query, removing the CTE would make it much harder to follow.\n", "\n", diff --git a/notebooks/sql/raw/tut6.ipynb b/notebooks/sql/raw/tut6.ipynb index c357c3755..5d1b6221b 100644 --- a/notebooks/sql/raw/tut6.ipynb +++ b/notebooks/sql/raw/tut6.ipynb @@ -22,11 +22,11 @@ "- `Name` - name of the owner\n", "- `Pet_ID` - ID number for the pet that belongs to the owner (which matches the ID number for the pet in the `pets` table)\n", "\n", - "![](https://i.imgur.com/Rx6L4m1.png) \n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/Rx6L4m1.png) \n", "\n", "To get information that applies to a certain pet, we match the `ID` column in the `pets` table to the `Pet_ID` column in the `owners` table. \n", "\n", - "![](https://i.imgur.com/eXvIORm.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/eXvIORm.png)\n", "\n", "For example, \n", "- the `pets` table shows that Dr. Harris Bonkers is the pet with ID 1. \n", @@ -40,7 +40,7 @@ "\n", "Using **JOIN**, we can write a query to create a table with just two columns: the name of the pet and the name of the owner. \n", "\n", - "![](https://i.imgur.com/fLlng42.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/fLlng42.png)\n", "\n", "We combine information from both tables by matching rows where the `ID` column in the `pets` table matches the `Pet_ID` column in the `owners` table.\n", "\n", @@ -158,7 +158,7 @@ "source": [ "It's a big query, and so we'll investigate each piece separately.\n", "\n", - "![](https://i.imgur.com/QeufD01.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/QeufD01.png)\n", " \n", "We'll begin with the **JOIN** (highlighted in blue above). This specifies the sources of data and how to join them. We use **ON** to specify that we combine the tables by matching the values in the `repo_name` columns in the tables.\n", "\n", diff --git a/notebooks/sql_advanced/raw/ex1.ipynb b/notebooks/sql_advanced/raw/ex1.ipynb index bd80e2799..3986bb08c 100644 --- a/notebooks/sql_advanced/raw/ex1.ipynb +++ b/notebooks/sql_advanced/raw/ex1.ipynb @@ -279,11 +279,11 @@ "\n", "Now you'll address a more realistic (and complex!) scenario. To answer this question, you'll need to pull information from *three* different tables! This syntax very similar to the case when we have to join only two tables. For instance, consider the three tables below.\n", "\n", - "![three tables](https://i.imgur.com/OyhYtD1.png)\n", + "![three tables](https://storage.googleapis.com/kaggle-media/learn/images/OyhYtD1.png)\n", "\n", "We can use two different **JOINs** to link together information from all three tables, in a single query.\n", "\n", - "![double join](https://i.imgur.com/G6buS7P.png)\n", + "![double join](https://storage.googleapis.com/kaggle-media/learn/images/G6buS7P.png)\n", "\n", "With this in mind, say you're interested in understanding users who joined the site in January 2019. You want to track their activity on the site: when did they post their first questions and answers, if ever?\n", "\n", diff --git a/notebooks/sql_advanced/raw/ex2.ipynb b/notebooks/sql_advanced/raw/ex2.ipynb index e563eb777..42ccca73a 100644 --- a/notebooks/sql_advanced/raw/ex2.ipynb +++ b/notebooks/sql_advanced/raw/ex2.ipynb @@ -225,7 +225,7 @@ "\n", "Some sample results are shown below, where all rows correspond to the same driver (or `taxi_id`). Take the time now to make sure that the values in the `prev_break` column make sense to you!\n", "\n", - "![first_commands](https://i.imgur.com/qjvQzg8.png)\n", + "![first_commands](https://storage.googleapis.com/kaggle-media/learn/images/qjvQzg8.png)\n", "\n", "Note that the first trip of the day for each driver should have a value of **NaN** (not a number) in the `prev_break` column." ] diff --git a/notebooks/sql_advanced/raw/ex3.ipynb b/notebooks/sql_advanced/raw/ex3.ipynb index d1d77290b..f4db56d65 100644 --- a/notebooks/sql_advanced/raw/ex3.ipynb +++ b/notebooks/sql_advanced/raw/ex3.ipynb @@ -192,11 +192,11 @@ "source": [ "Assume for the moment that you have access to a table called `sample_languages` that contains only a very small subset of the rows from the `languages` table: in fact, it contains only three rows! This table is depicted in the image below.\n", "\n", - "![](https://i.imgur.com/qAb5lZ2.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/qAb5lZ2.png)\n", "\n", "How many rows are in the table returned by the query below?\n", "\n", - "![](https://i.imgur.com/Q5qYAtz.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/Q5qYAtz.png)\n", "\n", "Fill in your answer in the next code cell." ] diff --git a/notebooks/sql_advanced/raw/ex4.ipynb b/notebooks/sql_advanced/raw/ex4.ipynb index a281e1f20..84600a945 100644 --- a/notebooks/sql_advanced/raw/ex4.ipynb +++ b/notebooks/sql_advanced/raw/ex4.ipynb @@ -91,7 +91,7 @@ "\n", "You have the following two tables:\n", "\n", - "![](https://i.imgur.com/E9jikOQ.png)\n", + "![](https://storage.googleapis.com/kaggle-media/learn/images/E9jikOQ.png)\n", "\n", "The `CostumeLocations` table shows timestamped GPS data for all of the pet costumes in the database, where `CostumeID` is a unique identifier for each costume. \n", "\n", diff --git a/notebooks/sql_advanced/raw/tut1.ipynb b/notebooks/sql_advanced/raw/tut1.ipynb index 4a7e5cf30..48b6ea143 100644 --- a/notebooks/sql_advanced/raw/tut1.ipynb +++ b/notebooks/sql_advanced/raw/tut1.ipynb @@ -10,7 +10,7 @@ "\n", "Along the way, we'll work with two imaginary tables, called `owners` and `pets`. \n", "\n", - "![two tables](https://i.imgur.com/dYVwS4T.png)\n", + "![two tables](https://storage.googleapis.com/kaggle-media/learn/images/dYVwS4T.png)\n", "\n", "Each row of the `owners` table identifies a different pet owner, where the `ID` column is a unique identifier. The `Pet_ID` column (in the `owners` table) contains the ID for the pet that belongs to the owner (this number matches the ID for the pet from the `pets` table).\n", "\n", @@ -24,13 +24,13 @@ "\n", "Recall that we can use an **INNER JOIN** to pull rows from both tables where the value in the `Pet_ID` column in the `owners` table has a match in the `ID` column of the `pets` table.\n", "\n", - "![...](https://i.imgur.com/C5wimKT.png)\n", + "![...](https://storage.googleapis.com/kaggle-media/learn/images/C5wimKT.png)\n", "\n", "In this case, Veronica Dunn and Maisie are not included in the results. But what if we instead want to create a table containing all pets, regardless of whether they have owners? Or, what if we want to combine all of the rows in both tables? In these cases, we need only use a different type of **JOIN**.\n", "\n", "For instance, to create a table containing all rows from the `owners` table, we use a **LEFT JOIN**. In this case, \"left\" refers to the table that appears before the **JOIN** in the query. (\"Right\" refers to the table that is after the **JOIN**.)\n", "\n", - "![...](https://i.imgur.com/tnOqw2S.png)\n", + "![...](https://storage.googleapis.com/kaggle-media/learn/images/tnOqw2S.png)\n", "\n", "Replacing **INNER JOIN** in the query above with **LEFT JOIN** returns all rows where the two tables have matching entries, along with all of the rows in the left table (whether there is a match or not). \n", "\n", @@ -38,14 +38,14 @@ "\n", "Finally, a **FULL JOIN** returns all rows from both tables. Note that in general, any row that does not have a match in both tables will have NULL entries for the missing values. You can see this in the image below.\n", "\n", - "![...](https://i.imgur.com/1Dvmg8S.png)\n", + "![...](https://storage.googleapis.com/kaggle-media/learn/images/1Dvmg8S.png)\n", "\n", "\n", "# UNIONs\n", "\n", "As you've seen, **JOINs** horizontally combine results from different tables. If you instead would like to vertically concatenate columns, you can do so with a **UNION**. The example query below combines the `Age` columns from both tables.\n", "\n", - "![...](https://i.imgur.com/oa6VDig.png)\n", + "![...](https://storage.googleapis.com/kaggle-media/learn/images/oa6VDig.png)\n", "\n", "Note that with a **UNION**, the data types of both columns must be the same, but the column names can be different. (So, for instance, we cannot take the **UNION** of the `Age` column from the `owners` table and the `Pet_Name` column from the `pets` table.) \n", "\n", diff --git a/notebooks/sql_advanced/raw/tut2.ipynb b/notebooks/sql_advanced/raw/tut2.ipynb index afc066c4e..e261ebdf4 100644 --- a/notebooks/sql_advanced/raw/tut2.ipynb +++ b/notebooks/sql_advanced/raw/tut2.ipynb @@ -14,14 +14,14 @@ "\n", "To understand how to write analytic functions, we'll work with a small table containing data from two different people who are training for a race. The `id` column identifies each runner, the `date` column holds the day of the training session, and `time` shows the time (in minutes) that the runner dedicated to training. Say we'd like to calculate a moving average of the training times for each runner, where we always take the average of the current and previous training sessions. We can do this with the following query:\n", "\n", - "![first_query](https://i.imgur.com/rehp8HM.png)\n", + "![first_query](https://storage.googleapis.com/kaggle-media/learn/images/rehp8HM.png)\n", "\n", "All analytic functions have an **OVER** clause, which defines the sets of rows used in each calculation. The **OVER** clause has three (optional) parts:\n", "- The **PARTITION BY** clause divides the rows of the table into different groups. In the query above, we divide by `id` so that the calculations are separated by runner.\n", "- The **ORDER BY** clause defines an ordering within each partition. In the sample query, ordering by the `date` column ensures that earlier training sessions appear first.\n", "- The final clause (`ROWS BETWEEN 1 PRECEDING AND CURRENT ROW`) is known as a **window frame** clause. It identifies the set of rows used in each calculation. We can refer to this group of rows as a **window**. (_Actually, analytic functions are sometimes referred to as **analytic window functions** or simply **window functions**!_) \n", "\n", - "![first_query](https://i.imgur.com/GjiKlA7.png)\n", + "![first_query](https://storage.googleapis.com/kaggle-media/learn/images/GjiKlA7.png)\n", "\n", "# (More on) window frame clauses\n", "\n", diff --git a/notebooks/sql_advanced/raw/tut3.ipynb b/notebooks/sql_advanced/raw/tut3.ipynb index 2410938cd..95a54721b 100644 --- a/notebooks/sql_advanced/raw/tut3.ipynb +++ b/notebooks/sql_advanced/raw/tut3.ipynb @@ -14,20 +14,20 @@ "\n", "Another option in BigQuery is to organize all of the information in a single table, similar to the `pets_and_toys` table below. \n", "\n", - "![nested data](https://i.imgur.com/wxuogYA.png)\n", + "![nested data](https://storage.googleapis.com/kaggle-media/learn/images/wxuogYA.png)\n", "\n", "In this case, all of the information from the `toys` table is collapsed into a single column (the \"Toy\" column in the `pets_and_toys` table). We refer to the \"Toy\" column in the `pets_and_toys` table as a **nested** column, and say that the \"Name\" and \"Type\" fields are nested inside of it. \n", "\n", "Nested columns have type **STRUCT** (or type **RECORD**). This is reflected in the table schema below.\n", "> Recall that we refer to the structure of a table as its **schema**. If you need to review how to interpret table schema, feel free to check out [this lesson](https://www.kaggle.com/dansbecker/getting-started-with-sql-and-bigquery) from the Intro to SQL micro-course.\n", "\n", - "![nested data](https://i.imgur.com/epXFXdb.png)\n", + "![nested data](https://storage.googleapis.com/kaggle-media/learn/images/epXFXdb.png)\n", "\n", "To query a column with nested data, we need to identify each field in the context of the column that contains it: \n", "- `Toy.Name` refers to the \"Name\" field in the \"Toy\" column, and\n", "- `Toy.Type` refers to the \"Type\" field in the \"Toy\" column. \n", "\n", - "![nested data](https://i.imgur.com/eE2Gt62.png)\n", + "![nested data](https://storage.googleapis.com/kaggle-media/learn/images/eE2Gt62.png)\n", "\n", "Otherwise, our usual rules remain the same - we need not change anything else about our queries.\n", "\n", @@ -35,35 +35,35 @@ "\n", "Now consider the (more realistic!) case where each pet can have multiple toys. In this case, to collapse this information into a single table, we need to leverage a different datatype.\n", "\n", - "![repeated data](https://i.imgur.com/S93FJTE.png)\n", + "![repeated data](https://storage.googleapis.com/kaggle-media/learn/images/S93FJTE.png)\n", "\n", "We say that the \"Toys\" column contains **repeated data**, because it permits more than one value for each row. This is reflected in the table schema below, where the mode of the \"Toys\" column appears as **'REPEATED'**.\n", "\n", - "![repeated data](https://i.imgur.com/KlrjpDM.png)\n", + "![repeated data](https://storage.googleapis.com/kaggle-media/learn/images/KlrjpDM.png)\n", "\n", "Each entry in a repeated field is an **ARRAY**, or an ordered list of (zero or more) values with the same datatype. For instance, the entry in the \"Toys\" column for Moon the Dog is **[Frisbee, Bone, Rope]**, which is an ARRAY with three values.\n", "\n", "When querying repeated data, we need to put the name of the column containing the repeated data inside an **UNNEST()** function. \n", "\n", - "![repeated data](https://i.imgur.com/p3fXPxY.png)\n", + "![repeated data](https://storage.googleapis.com/kaggle-media/learn/images/p3fXPxY.png)\n", "\n", "This essentially flattens the repeated data (which is then appended to the right side of the table) so that we have one element on each row. For an illustration of this, check out the image below.\n", "\n", - "![repeated data](https://i.imgur.com/8j4XK8f.png)\n", + "![repeated data](https://storage.googleapis.com/kaggle-media/learn/images/8j4XK8f.png)\n", "\n", "# Nested and repeated data\n", "\n", "Now, what if pets can have multiple toys, _and_ we'd like to keep track of both the name and type of each toy? In this case, we can make the \"Toys\" column both **nested** and **repeated**.\n", "\n", - "![repeated data](https://i.imgur.com/psKtza2.png)\n", + "![repeated data](https://storage.googleapis.com/kaggle-media/learn/images/psKtza2.png)\n", "\n", "In the `more_pets_and_toys` table above, \"Name\" and \"Type\" are both fields contained within the \"Toys\" STRUCT, and each entry in both \"Toys.Name\" and \"Toys.Type\" is an ARRAY.\n", "\n", - "![repeated data](https://i.imgur.com/fO5OymI.png)\n", + "![repeated data](https://storage.googleapis.com/kaggle-media/learn/images/fO5OymI.png)\n", "\n", "Let's look at a sample query.\n", "\n", - "![repeated data](https://i.imgur.com/DiMCZaO.png)\n", + "![repeated data](https://storage.googleapis.com/kaggle-media/learn/images/DiMCZaO.png)\n", "\n", "Since the \"Toys\" column is repeated, we flatten it with the **UNNEST()** function. And, since we give the flattened column an alias of `t`, we can refer to the \"Name\" and \"Type\" fields in the \"Toys\" column as `t.Name` and `t.Type`, respectively. \n", "\n", diff --git a/notebooks/sql_advanced/raw/tut4.ipynb b/notebooks/sql_advanced/raw/tut4.ipynb index ca6579f5c..c2b21308c 100644 --- a/notebooks/sql_advanced/raw/tut4.ipynb +++ b/notebooks/sql_advanced/raw/tut4.ipynb @@ -130,15 +130,15 @@ "\n", "Most of the JOINs that you have executed in this course have been **1:1 JOINs**. In this case, each row in each table has at most one match in the other table.\n", "\n", - "![JOIN](https://i.imgur.com/fp7oMLq.png)\n", + "![JOIN](https://storage.googleapis.com/kaggle-media/learn/images/fp7oMLq.png)\n", "\n", "Another type of JOIN is an **N:1 JOIN**. Here, each row in one table matches potentially many rows in the other table. \n", "\n", - "![JOIN](https://i.imgur.com/7PxE0Mr.png)\n", + "![JOIN](https://storage.googleapis.com/kaggle-media/learn/images/7PxE0Mr.png)\n", "\n", "Finally, an **N:N JOIN** is one where a group of rows in one table can match a group of rows in the other table. Note that in general, all other things equal, this type of JOIN produces a table with many more rows than either of the two (original) tables that are being JOINed.\n", "\n", - "![JOIN](https://i.imgur.com/UsNZZoz.png)\n", + "![JOIN](https://storage.googleapis.com/kaggle-media/learn/images/UsNZZoz.png)\n", "\n", "Now we'll work with an example from a real dataset. Both examples below count the number of distinct committers and the number of files in several GitHub repositories." ] diff --git a/notebooks/time_series/raw/tut2.ipynb b/notebooks/time_series/raw/tut2.ipynb index 2c18dd740..7f7c7247b 100644 --- a/notebooks/time_series/raw/tut2.ipynb +++ b/notebooks/time_series/raw/tut2.ipynb @@ -10,7 +10,7 @@ "The **trend** component of a time series represents a persistent, long-term change in the mean of the series. The trend is the slowest-moving part of a series, the part representing the largest time scale of importance. In a time series of product sales, an increasing trend might be the effect of a market expansion as more people become aware of the product year by year.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Trend patterns in four time series.
\n", "
\n", "\n", @@ -21,7 +21,7 @@ "To see what kind of trend a time series might have, we can use a **moving average plot**. To compute a moving average of a time series, we compute the average of the values within a sliding window of some defined width. Each point on the graph represents the average of all the values in the series that fall within the window on either side. The idea is to smooth out any short-term fluctuations in the series so that only long-term changes remain.\n", "\n", "
\n", - "\"An\n", + "\"An\n", "
A moving average plot illustrating a linear trend. Each point on the curve (blue) is the average of the points (red) within a window of size 12.\n", "
\n", "
\n", @@ -51,7 +51,7 @@ "The trend curves in the figure below were both fit using these kinds of features and scikit-learn's `LinearRegression`:\n", "\n", "
\n", - "\"Above,\n", + "\"Above,\n", "
Top: Series with a linear trend. Below: Series with a quadratic trend.\n", "
\n", "
\n", diff --git a/notebooks/time_series/raw/tut3.ipynb b/notebooks/time_series/raw/tut3.ipynb index 7d66aa8b7..adb005434 100644 --- a/notebooks/time_series/raw/tut3.ipynb +++ b/notebooks/time_series/raw/tut3.ipynb @@ -10,7 +10,7 @@ "We say that a time series exhibits **seasonality** whenever there is a regular, periodic change in the mean of the series. Seasonal changes generally follow the clock and calendar -- repetitions over a day, a week, or a year are common. Seasonality is often driven by the cycles of the natural world over days and years or by conventions of social behavior surrounding dates and times.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Seasonal patterns in four time series.\n", "
\n", "
\n", @@ -24,7 +24,7 @@ "A seasonal plot shows segments of the time series plotted against some common period, the period being the \"season\" you want to observe. The figure shows a seasonal plot of the daily views of Wikipedia's article on *Trigonometry*: the article's daily views plotted over a common *weekly* period.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
There is a clear weekly seasonal pattern in this series, higher on weekdays and falling towards the weekend.\n", "
\n", "
\n", @@ -56,7 +56,7 @@ "Adding seasonal indicators to the training data helps models distinguish means within a seasonal period:\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Ordinary linear regression learns the mean values at each time in the season.
\n", "
\n", "\n", @@ -69,7 +69,7 @@ "Let's take a look at a plot for the annual season in *Trigonometry*. Notice the repetitions of various frequencies: a long up-and-down movement three times a year, short weekly movements 52 times a year, and perhaps others.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Annual seasonality in the Wiki Trigonometry series.
\n", "
\n", "\n", @@ -78,14 +78,14 @@ "**Fourier features** are pairs of sine and cosine curves, one pair for each potential frequency in the season starting with the longest. Fourier pairs modeling annual seasonality would have frequencies: once per year, twice per year, three times per year, and so on.\n", "\n", "
\n", - "\"A\n", + "\"A\n", "
The first two Fourier pairs for annual seasonality. Top: Frequency of once per year. Bottom: Frequency of twice per year.
\n", "
\n", "\n", "If we add a set of these sine / cosine curves to our training data, the linear regression algorithm will figure out the weights that will fit the seasonal component in the target series. The figure illustrates how linear regression used four Fourier pairs to model the annual seasonality in the *Wiki Trigonometry* series.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Top: Curves for four Fourier pairs, a sum of sine and cosine with regression coefficients. Each curve models a different frequency. Bottom: The sum of these curves approximates the seasonal pattern.
\n", "
\n", "\n", @@ -96,7 +96,7 @@ "How many Fourier pairs should we actually include in our feature set? We can answer this question with the periodogram. The **periodogram** tells you the strength of the frequencies in a time series. Specifically, the value on the y-axis of the graph is `(a ** 2 + b ** 2) / 2`, where `a` and `b` are the coefficients of the sine and cosine at that frequency (as in the *Fourier Components* plot above).\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Periodogram for the Wiki Trigonometry series.
\n", "
\n", "\n", diff --git a/notebooks/time_series/raw/tut4.ipynb b/notebooks/time_series/raw/tut4.ipynb index 4ffb36fc6..50ae4032f 100644 --- a/notebooks/time_series/raw/tut4.ipynb +++ b/notebooks/time_series/raw/tut4.ipynb @@ -11,7 +11,7 @@ "In earlier lessons, we investigated properties of time series that were most easily modeled as *time dependent* properties, that is, with features we could derive directly from the time index. Some time series properties, however, can only be modeled as *serially dependent* properties, that is, using as features past values of the target series. The structure of these time series may not be apparent from a plot over time; plotted against past values, however, the structure becomes clear -- as we see in the figure below below.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
These two series have serial dependence, but not time dependence. Points on the right have coordinates (value at time t-1, value at time t).\n", "
\n", "
\n", @@ -23,7 +23,7 @@ "One especially common way for serial dependence to manifest is in **cycles**. Cycles are patterns of growth and decay in a time series associated with how the value in a series at one time depends on values at previous times, but not necessarily on the time step itself. Cyclic behavior is characteristic of systems that can affect themselves or whose reactions persist over time. Economies, epidemics, animal populations, volcano eruptions, and similar natural phenomena often display cyclic behavior.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Four time series with cyclic behavior.\n", "
\n", "
\n", @@ -76,7 +76,7 @@ "A **lag plot** of a time series shows its values plotted against its lags. Serial dependence in a time series will often become apparent by looking at a lag plot. We can see from this lag plot of *US Unemployment* that there is a strong and apparently linear relationship between the current unemployment rate and past rates.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Lag plot of US Unemployment with autocorrelations indicated.\n", "
\n", "
\n", @@ -90,7 +90,7 @@ "The **partial autocorrelation** tells you the correlation of a lag accounting for all of the previous lags -- the amount of \"new\" correlation the lag contributes, so to speak. Plotting the partial autocorrelation can help you choose which lag features to use. In the figure below, lag 1 through lag 6 fall outside the intervals of \"no correlation\" (in blue), so we might choose lags 1 through lag 6 as features for *US Unemployment*. (Lag 11 is likely a false positive.)\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Partial autocorrelations of US Unemployment through lag 12 with 95% confidence intervals of no correlation.\n", "
\n", "
\n", @@ -100,7 +100,7 @@ "Finally, we need to be mindful that autocorrelation and partial autocorrelation are measures of *linear* dependence. Because real-world time series often have substantial non-linear dependences, it's best to look at a lag plot (or use some more general measure of dependence, like [mutual information](https://www.kaggle.com/ryanholbrook/mutual-information)) when choosing lag features. The *Sunspots* series has lags with non-linear dependence which we might overlook with autocorrelation.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Lag plot of the Sunspots series.\n", "
\n", "
\n", diff --git a/notebooks/time_series/raw/tut5.ipynb b/notebooks/time_series/raw/tut5.ipynb index ae932ef2f..8ea0c3508 100644 --- a/notebooks/time_series/raw/tut5.ipynb +++ b/notebooks/time_series/raw/tut5.ipynb @@ -23,7 +23,7 @@ "The **residuals** of a model are the difference between the target the model was trained on and the predictions the model makes -- the difference between the actual curve and the fitted curve, in other words. Plot the residuals against a feature, and you get the \"left over\" part of the target, or what the model failed to learn about the target from that feature.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
The difference between the target series and the predictions (blue) gives the series of residuals.\n", "
\n", "
\n", @@ -33,7 +33,7 @@ "We could imagine learning the components of a time series as an iterative process: first learn the trend and subtract it out from the series, then learn the seasonality from the detrended residuals and subtract the seasons out, then learn the cycles and subtract the cycles out, and finally only the unpredictable error remains.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Learning the components of Mauna Loa CO2 step by step. Subtract the fitted curve (blue) from its series to get the series in the next step.\n", "
\n", "
\n", @@ -41,7 +41,7 @@ "Add together all the components we learned and we get the complete model. This is essentially what linear regression would do if you trained it on a complete set of features modeling trend, seasons, and cycles.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
Add the learned components to get a complete model.\n", "
\n", "
\n", @@ -79,7 +79,7 @@ "The important thing is this: feature transformers generally can **extrapolate** target values beyond the training set given appropriate features as inputs, but the predictions of target transformers will always be bound within the range of the training set. If the time dummy continues counting time steps, linear regression continues drawing the trend line. Given the same time dummy, a decision tree will predict the trend indicated by the last step of the training data into the future forever. *Decision trees cannot extrapolate trends.* Random forests and gradient boosted decision trees (like XGBoost) are ensembles of decision trees, so they also cannot extrapolate trends.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
A decision tree will fail to extrapolate a trend beyond the training set.\n", "
\n", "
\n", diff --git a/notebooks/time_series/raw/tut6.ipynb b/notebooks/time_series/raw/tut6.ipynb index 53eb60226..29db4dec1 100644 --- a/notebooks/time_series/raw/tut6.ipynb +++ b/notebooks/time_series/raw/tut6.ipynb @@ -24,7 +24,7 @@ "The **forecast horizon** is the time for which you are making a forecast. We often describe a forecast by the number of time steps in its horizon: a \"1-step\" forecast or \"5-step\" forecast, say. The forecast horizon describes the target.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
A three-step forecast horizon with a two-step lead time, using four lag features. The figure represents what would be a single row of training data -- data for a single prediction, in other words.\n", "
\n", "
\n", @@ -96,7 +96,7 @@ "Use a model that produces multiple outputs naturally. Linear regression and neural networks can both produce multiple outputs. This strategy is simple and efficient, but not possible for every algorithm you might want to use. XGBoost can't do this, for instance.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
\n", "
\n", "
\n", @@ -106,7 +106,7 @@ "Train a separate model for each step in the horizon: one model forecasts 1-step ahead, another 2-steps ahead, and so on. Forecasting 1-step ahead is a different problem than 2-steps ahead (and so on), so it can help to have a different model make forecasts for each step. The downside is that training lots of models can be computationally expensive.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
\n", "
\n", "
\n", @@ -116,7 +116,7 @@ "Train a single one-step model and use its forecasts to update the lag features for the next step. With the recursive method, we feed a model's 1-step forecast back in to that same model to use as a lag feature for the next forecasting step. We only need to train one model, but since errors will propagate from step to step, forecasts can be inaccurate for long horizons.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
\n", "
\n", "
\n", @@ -126,7 +126,7 @@ "A combination of the direct and recursive strategies: train a model for each step and use forecasts from previous steps as *new* lag features. Step by step, each model gets an additional lag input. Since each model always has an up-to-date set of lag features, the DirRec strategy can capture serial dependence better than Direct, but it can also suffer from error propagation like Recursive.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
\n", "
\n", "
\n", From 66a802aba3cd61fcdbb870bc3f0976d0523c5e69 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Tue, 11 Apr 2023 16:10:23 -0400 Subject: [PATCH 02/55] removing pyearth from ex2, ex5 --- notebooks/time_series/raw/ex1.ipynb | 30 ++++++++++ notebooks/time_series/raw/ex2.ipynb | 87 +++++++++++------------------ notebooks/time_series/raw/ex5.ipynb | 3 +- notebooks/time_series/raw/ex6.ipynb | 29 +++++++++- 4 files changed, 93 insertions(+), 56 deletions(-) diff --git a/notebooks/time_series/raw/ex1.ipynb b/notebooks/time_series/raw/ex1.ipynb index 39a812781..de1e095f9 100644 --- a/notebooks/time_series/raw/ex1.ipynb +++ b/notebooks/time_series/raw/ex1.ipynb @@ -2,6 +2,7 @@ "cells": [ { "cell_type": "markdown", + "id": "d15fa19b", "metadata": {}, "source": [ "# Introduction #\n", @@ -12,6 +13,7 @@ { "cell_type": "code", "execution_count": null, + "id": "646100dd", "metadata": {}, "outputs": [], "source": [ @@ -64,6 +66,7 @@ }, { "cell_type": "markdown", + "id": "70c9c3dd", "metadata": {}, "source": [ "--------------------------------------------------------------------------------\n", @@ -76,6 +79,7 @@ { "cell_type": "code", "execution_count": null, + "id": "04d6403a", "metadata": {}, "outputs": [], "source": [ @@ -87,6 +91,7 @@ }, { "cell_type": "markdown", + "id": "ec197171", "metadata": {}, "source": [ "# 1) Interpret linear regression with the time dummy\n", @@ -97,6 +102,7 @@ { "cell_type": "code", "execution_count": null, + "id": "3157a040", "metadata": {}, "outputs": [], "source": [ @@ -107,6 +113,7 @@ { "cell_type": "code", "execution_count": null, + "id": "5d69b33d", "metadata": {}, "outputs": [], "source": [ @@ -117,6 +124,7 @@ }, { "cell_type": "markdown", + "id": "f3bc1b7b", "metadata": {}, "source": [ "-------------------------------------------------------------------------------\n", @@ -131,6 +139,7 @@ { "cell_type": "code", "execution_count": null, + "id": "89dd6f64", "metadata": {}, "outputs": [], "source": [ @@ -143,6 +152,7 @@ }, { "cell_type": "markdown", + "id": "78d4f8c9", "metadata": {}, "source": [ "One of these series has the equation `target = 0.95 * lag_1 + error` and the other has the equation `target = -0.95 * lag_1 + error`, differing only by the sign on the lag feature. Can you tell which equation goes with each series?" @@ -151,6 +161,7 @@ { "cell_type": "code", "execution_count": null, + "id": "3db4075f", "metadata": {}, "outputs": [], "source": [ @@ -161,6 +172,7 @@ { "cell_type": "code", "execution_count": null, + "id": "7a2f529f", "metadata": {}, "outputs": [], "source": [ @@ -171,6 +183,7 @@ }, { "cell_type": "markdown", + "id": "0a1080ac", "metadata": {}, "source": [ "-------------------------------------------------------------------------------\n", @@ -185,6 +198,7 @@ { "cell_type": "code", "execution_count": null, + "id": "2e2b9917", "metadata": {}, "outputs": [], "source": [ @@ -221,6 +235,7 @@ { "cell_type": "code", "execution_count": null, + "id": "4afb7bf9", "metadata": {}, "outputs": [], "source": [ @@ -234,6 +249,7 @@ { "cell_type": "code", "execution_count": null, + "id": "30fd1d14", "metadata": {}, "outputs": [], "source": [ @@ -260,6 +276,7 @@ { "cell_type": "code", "execution_count": null, + "id": "6ae3684c", "metadata": {}, "outputs": [], "source": [ @@ -286,6 +303,7 @@ { "cell_type": "code", "execution_count": null, + "id": "856f1f71", "metadata": {}, "outputs": [], "source": [ @@ -311,6 +329,7 @@ }, { "cell_type": "markdown", + "id": "0595c93c", "metadata": {}, "source": [ "Run this cell if you'd like to see a plot of the result." @@ -319,6 +338,7 @@ { "cell_type": "code", "execution_count": null, + "id": "08ba8a15", "metadata": {}, "outputs": [], "source": [ @@ -329,6 +349,7 @@ }, { "cell_type": "markdown", + "id": "666f8b5e", "metadata": {}, "source": [ "-------------------------------------------------------------------------------\n", @@ -341,6 +362,7 @@ { "cell_type": "code", "execution_count": null, + "id": "ef3899df", "metadata": {}, "outputs": [], "source": [ @@ -374,6 +396,7 @@ { "cell_type": "code", "execution_count": null, + "id": "ea6f20e3", "metadata": {}, "outputs": [], "source": [ @@ -385,6 +408,7 @@ { "cell_type": "code", "execution_count": null, + "id": "0e7fbb0d", "metadata": {}, "outputs": [], "source": [ @@ -411,6 +435,7 @@ { "cell_type": "code", "execution_count": null, + "id": "b2a26acb", "metadata": {}, "outputs": [], "source": [ @@ -437,6 +462,7 @@ { "cell_type": "code", "execution_count": null, + "id": "e495f9c8", "metadata": {}, "outputs": [], "source": [ @@ -464,6 +490,7 @@ { "cell_type": "code", "execution_count": null, + "id": "b11e7132", "metadata": {}, "outputs": [], "source": [ @@ -489,6 +516,7 @@ }, { "cell_type": "markdown", + "id": "2563720b", "metadata": {}, "source": [ "Run the next cell if you'd like to see the result." @@ -497,6 +525,7 @@ { "cell_type": "code", "execution_count": null, + "id": "7fad2b57", "metadata": {}, "outputs": [], "source": [ @@ -508,6 +537,7 @@ }, { "cell_type": "markdown", + "id": "d3d56312", "metadata": {}, "source": [ "# Keep Going #\n", diff --git a/notebooks/time_series/raw/ex2.ipynb b/notebooks/time_series/raw/ex2.ipynb index 2037c9319..1c7f64f6b 100644 --- a/notebooks/time_series/raw/ex2.ipynb +++ b/notebooks/time_series/raw/ex2.ipynb @@ -2,6 +2,7 @@ "cells": [ { "cell_type": "markdown", + "id": "51dedb79", "metadata": {}, "source": [ "\n", @@ -10,6 +11,7 @@ }, { "cell_type": "markdown", + "id": "5cd8962e", "metadata": {}, "source": [ "Run this cell to set everything up!" @@ -18,6 +20,7 @@ { "cell_type": "code", "execution_count": null, + "id": "b5dd3010", "metadata": { "lines_to_next_cell": 0 }, @@ -68,6 +71,7 @@ }, { "cell_type": "markdown", + "id": "41b26fd4", "metadata": {}, "source": [ "-------------------------------------------------------------------------------" @@ -75,6 +79,7 @@ }, { "cell_type": "markdown", + "id": "55e35c59", "metadata": {}, "source": [ "# 1) Determine trend with a moving average plot\n", @@ -85,6 +90,7 @@ { "cell_type": "code", "execution_count": null, + "id": "69fade2a", "metadata": {}, "outputs": [], "source": [ @@ -94,6 +100,7 @@ }, { "cell_type": "markdown", + "id": "32cc30bc", "metadata": {}, "source": [ "Now make a moving average plot to estimate the trend for this series." @@ -102,6 +109,7 @@ { "cell_type": "code", "execution_count": null, + "id": "7d32a154", "metadata": { "lines_to_next_cell": 0 }, @@ -122,6 +130,7 @@ { "cell_type": "code", "execution_count": null, + "id": "6001f3b3", "metadata": { "lines_to_next_cell": 0 }, @@ -137,6 +146,7 @@ { "cell_type": "code", "execution_count": null, + "id": "06dfe90d", "metadata": { "lines_to_next_cell": 0 }, @@ -151,6 +161,7 @@ { "cell_type": "code", "execution_count": null, + "id": "9e3212ac", "metadata": { "lines_to_next_cell": 0 }, @@ -169,6 +180,7 @@ { "cell_type": "code", "execution_count": null, + "id": "90f39c61", "metadata": { "lines_to_next_cell": 0 }, @@ -187,6 +199,7 @@ { "cell_type": "code", "execution_count": null, + "id": "85d6bc34", "metadata": { "lines_to_next_cell": 0 }, @@ -205,6 +218,7 @@ { "cell_type": "code", "execution_count": null, + "id": "c0f9ffd3", "metadata": { "lines_to_next_cell": 0 }, @@ -223,6 +237,7 @@ { "cell_type": "code", "execution_count": null, + "id": "e09d8b36", "metadata": {}, "outputs": [], "source": [ @@ -241,6 +256,7 @@ }, { "cell_type": "markdown", + "id": "41fdc36d", "metadata": {}, "source": [ "-------------------------------------------------------------------------------\n", @@ -255,6 +271,7 @@ { "cell_type": "code", "execution_count": null, + "id": "e7bd76d7", "metadata": {}, "outputs": [], "source": [ @@ -264,6 +281,7 @@ }, { "cell_type": "markdown", + "id": "97d60d38", "metadata": {}, "source": [ "-------------------------------------------------------------------------------\n", @@ -274,6 +292,7 @@ { "cell_type": "code", "execution_count": null, + "id": "6a785a07", "metadata": {}, "outputs": [], "source": [ @@ -289,6 +308,7 @@ }, { "cell_type": "markdown", + "id": "f5d4f219", "metadata": {}, "source": [ "# 3) Create a Trend Feature\n", @@ -299,6 +319,7 @@ { "cell_type": "code", "execution_count": null, + "id": "69c90391", "metadata": {}, "outputs": [], "source": [ @@ -353,6 +374,7 @@ { "cell_type": "code", "execution_count": null, + "id": "c1d73274", "metadata": {}, "outputs": [], "source": [ @@ -367,6 +389,7 @@ { "cell_type": "code", "execution_count": null, + "id": "6d0cc6b9", "metadata": {}, "outputs": [], "source": [ @@ -382,6 +405,7 @@ }, { "cell_type": "markdown", + "id": "6853e91d", "metadata": {}, "source": [ "You can see the a plot of the result by running the next cell." @@ -390,6 +414,7 @@ { "cell_type": "code", "execution_count": null, + "id": "62412e92", "metadata": {}, "outputs": [], "source": [ @@ -407,6 +432,7 @@ }, { "cell_type": "markdown", + "id": "bb9e9e2e", "metadata": {}, "source": [ "--------------------------------------------------------------------------------\n", @@ -417,6 +443,7 @@ { "cell_type": "code", "execution_count": null, + "id": "d1b5e6fa", "metadata": {}, "outputs": [], "source": [ @@ -437,6 +464,7 @@ }, { "cell_type": "markdown", + "id": "9bbb957a", "metadata": {}, "source": [ "# 4) Understand risks of forecasting with high-order polynomials\n", @@ -447,6 +475,7 @@ { "cell_type": "code", "execution_count": null, + "id": "b76bc932", "metadata": {}, "outputs": [], "source": [ @@ -457,6 +486,7 @@ { "cell_type": "code", "execution_count": null, + "id": "10459206", "metadata": {}, "outputs": [], "source": [ @@ -467,6 +497,7 @@ }, { "cell_type": "markdown", + "id": "25173a6c", "metadata": {}, "source": [ "Run this cell to see the same 90-day forecast using an order 11 polynomial. Does it confirm your intuition?" @@ -475,6 +506,7 @@ { "cell_type": "code", "execution_count": null, + "id": "4d0bc020", "metadata": {}, "outputs": [], "source": [ @@ -489,58 +521,7 @@ }, { "cell_type": "markdown", - "metadata": {}, - "source": [ - "--------------------------------------------------------------------------------\n", - "\n", - "# (Optional) Fit trend with splines\n", - "\n", - "*Splines* are a nice alternative to polynomials when you want to fit a trend. The *Multivariate Adaptive Regression Splines* (MARS) algorithm in the `pyearth` library is powerful and easy to use. There are a lot of hyperparameters you may want to investigate." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pyearth import Earth\n", - "\n", - "# Target and features are the same as before\n", - "y = average_sales.copy()\n", - "dp = DeterministicProcess(index=y.index, order=1)\n", - "X = dp.in_sample()\n", - "\n", - "# Fit a MARS model with `Earth`\n", - "model = Earth()\n", - "model.fit(X, y)\n", - "\n", - "y_pred = pd.Series(model.predict(X), index=X.index)\n", - "\n", - "ax = y.plot(**plot_params, title=\"Average Sales\", ylabel=\"items sold\")\n", - "ax = y_pred.plot(ax=ax, linewidth=3, label=\"Trend\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Forecasting complicated trends like this will typically be difficult (if not impossible). With historical data, however, you can use splines to isolate other patterns in a time series by *detrending*." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "y_detrended = y - y_pred # remove the trend from store_sales\n", - "\n", - "y_detrended.plot(**plot_params, title=\"Detrended Average Sales\");" - ] - }, - { - "cell_type": "markdown", + "id": "65213409", "metadata": {}, "source": [ "# Keep Going #\n", @@ -554,7 +535,7 @@ "formats": "md,ipynb" }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/notebooks/time_series/raw/ex5.ipynb b/notebooks/time_series/raw/ex5.ipynb index a4d69f683..e78bd90ea 100644 --- a/notebooks/time_series/raw/ex5.ipynb +++ b/notebooks/time_series/raw/ex5.ipynb @@ -536,7 +536,6 @@ "outputs": [], "source": [ "# Model 1 (trend)\n", - "from pyearth import Earth\n", "from sklearn.linear_model import ElasticNet, Lasso, Ridge\n", "\n", "# Model 2\n", @@ -632,7 +631,7 @@ "formats": "ipynb,md" }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/notebooks/time_series/raw/ex6.ipynb b/notebooks/time_series/raw/ex6.ipynb index 6cf759f62..31638b2b8 100644 --- a/notebooks/time_series/raw/ex6.ipynb +++ b/notebooks/time_series/raw/ex6.ipynb @@ -2,6 +2,7 @@ "cells": [ { "cell_type": "markdown", + "id": "4069c6cf", "metadata": {}, "source": [ "# Introduction #\n", @@ -12,6 +13,7 @@ { "cell_type": "code", "execution_count": null, + "id": "230571c2", "metadata": {}, "outputs": [], "source": [ @@ -79,6 +81,7 @@ }, { "cell_type": "markdown", + "id": "4b106f58", "metadata": {}, "source": [ "-------------------------------------------------------------------------------\n", @@ -95,6 +98,7 @@ { "cell_type": "code", "execution_count": null, + "id": "2863b28c", "metadata": {}, "outputs": [], "source": [ @@ -111,6 +115,7 @@ }, { "cell_type": "markdown", + "id": "a9367ba2", "metadata": {}, "source": [ "# 1) Match description to dataset\n", @@ -121,6 +126,7 @@ { "cell_type": "code", "execution_count": null, + "id": "8db73ebc", "metadata": {}, "outputs": [], "source": [ @@ -136,6 +142,7 @@ { "cell_type": "code", "execution_count": null, + "id": "25e7cf48", "metadata": {}, "outputs": [], "source": [ @@ -149,6 +156,7 @@ { "cell_type": "code", "execution_count": null, + "id": "8c9cad88", "metadata": {}, "outputs": [], "source": [ @@ -162,6 +170,7 @@ }, { "cell_type": "markdown", + "id": "8f8e3397", "metadata": {}, "source": [ "-------------------------------------------------------------------------------\n", @@ -172,6 +181,7 @@ { "cell_type": "code", "execution_count": null, + "id": "dd39272c", "metadata": {}, "outputs": [], "source": [ @@ -182,6 +192,7 @@ }, { "cell_type": "markdown", + "id": "0fe0f909", "metadata": {}, "source": [ "# 2) Identify the forecasting task for *Store Sales* competition\n", @@ -194,6 +205,7 @@ { "cell_type": "code", "execution_count": null, + "id": "de47b4f0", "metadata": {}, "outputs": [], "source": [ @@ -203,6 +215,7 @@ }, { "cell_type": "markdown", + "id": "326717ff", "metadata": {}, "source": [ "-------------------------------------------------------------------------------\n", @@ -217,6 +230,7 @@ { "cell_type": "code", "execution_count": null, + "id": "cedcfe41", "metadata": {}, "outputs": [], "source": [ @@ -239,6 +253,7 @@ { "cell_type": "code", "execution_count": null, + "id": "76fba79a", "metadata": {}, "outputs": [], "source": [ @@ -252,6 +267,7 @@ { "cell_type": "code", "execution_count": null, + "id": "58736792", "metadata": {}, "outputs": [], "source": [ @@ -266,6 +282,7 @@ }, { "cell_type": "markdown", + "id": "ef53a384", "metadata": {}, "source": [ "-------------------------------------------------------------------------------\n", @@ -278,6 +295,7 @@ { "cell_type": "code", "execution_count": null, + "id": "dc4c01d8", "metadata": {}, "outputs": [], "source": [ @@ -294,6 +312,7 @@ }, { "cell_type": "markdown", + "id": "63482d06", "metadata": {}, "source": [ "# 4) Forecast with the DirRec strategy\n", @@ -304,6 +323,7 @@ { "cell_type": "code", "execution_count": null, + "id": "4d296dd4", "metadata": {}, "outputs": [], "source": [ @@ -319,6 +339,7 @@ { "cell_type": "code", "execution_count": null, + "id": "e3d7aabb", "metadata": {}, "outputs": [], "source": [ @@ -332,6 +353,7 @@ { "cell_type": "code", "execution_count": null, + "id": "43c260bb", "metadata": {}, "outputs": [], "source": [ @@ -344,6 +366,7 @@ }, { "cell_type": "markdown", + "id": "b2644314", "metadata": {}, "source": [ "Run this cell if you'd like to train this model." @@ -352,6 +375,7 @@ { "cell_type": "code", "execution_count": null, + "id": "375b7d73", "metadata": {}, "outputs": [], "source": [ @@ -366,6 +390,7 @@ }, { "cell_type": "markdown", + "id": "3e8002d7", "metadata": {}, "source": [ "And use this code to see a sample of the 16-step predictions this model makes on the training data." @@ -374,6 +399,7 @@ { "cell_type": "code", "execution_count": null, + "id": "f654a839", "metadata": { "lines_to_next_cell": 2 }, @@ -394,6 +420,7 @@ }, { "cell_type": "markdown", + "id": "80022668", "metadata": {}, "source": [ "# Next Steps #\n", @@ -442,7 +469,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.12" + "version": "3.6.5" } }, "nbformat": 4, From b7efcb863044ba7978e075eb23eac4ece2348458 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Tue, 11 Apr 2023 18:27:25 -0400 Subject: [PATCH 03/55] fixing scikit learn --- notebooks/time_series/raw/tut4.ipynb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/notebooks/time_series/raw/tut4.ipynb b/notebooks/time_series/raw/tut4.ipynb index 4ffb36fc6..3f6831b8c 100644 --- a/notebooks/time_series/raw/tut4.ipynb +++ b/notebooks/time_series/raw/tut4.ipynb @@ -269,7 +269,8 @@ "\n", "\n", "X = make_lags(flu_trends.FluVisits, lags=4)\n", - "X = X.fillna(0.0)" + "X = X.fillna(0.0)\n", + "X.columns = X.columns.astype(str)" ] }, { @@ -445,7 +446,7 @@ "formats": "ipynb,md" }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -459,7 +460,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.9.12" } }, "nbformat": 4, From d36eab870e97523546eb04362d5098c002322bda Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Wed, 12 Apr 2023 12:40:22 -0400 Subject: [PATCH 04/55] fixing multiindex --- notebooks/time_series/raw/tut4.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/time_series/raw/tut4.ipynb b/notebooks/time_series/raw/tut4.ipynb index 3f6831b8c..8e960c617 100644 --- a/notebooks/time_series/raw/tut4.ipynb +++ b/notebooks/time_series/raw/tut4.ipynb @@ -269,8 +269,7 @@ "\n", "\n", "X = make_lags(flu_trends.FluVisits, lags=4)\n", - "X = X.fillna(0.0)\n", - "X.columns = X.columns.astype(str)" + "X = X.fillna(0.0)" ] }, { @@ -383,6 +382,7 @@ "\n", "# Create three lags for each search term\n", "X0 = make_lags(flu_trends[search_terms], lags=3)\n", + "X0.columns = [' '.join(col).strip() for col in X0.columns.values]\n", "\n", "# Create four lags for the target, as before\n", "X1 = make_lags(flu_trends['FluVisits'], lags=4)\n", From d87fe762ca4883d27c05b88c95ca6e4d0d1597f1 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Wed, 12 Apr 2023 15:21:22 -0400 Subject: [PATCH 05/55] scikit learn error --- notebooks/feature_engineering_new/raw/tut1.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/notebooks/feature_engineering_new/raw/tut1.ipynb b/notebooks/feature_engineering_new/raw/tut1.ipynb index 8e0efbca0..b54463829 100644 --- a/notebooks/feature_engineering_new/raw/tut1.ipynb +++ b/notebooks/feature_engineering_new/raw/tut1.ipynb @@ -102,7 +102,7 @@ "y = X.pop(\"CompressiveStrength\")\n", "\n", "# Train and score baseline model\n", - "baseline = RandomForestRegressor(criterion=\"mae\", random_state=0)\n", + "baseline = RandomForestRegressor(criterion=\"absolute_error\", random_state=0)\n", "baseline_score = cross_val_score(\n", " baseline, X, y, cv=5, scoring=\"neg_mean_absolute_error\"\n", ")\n", @@ -162,7 +162,7 @@ "formats": "ipynb" }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -176,7 +176,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.9.12" } }, "nbformat": 4, From af9067834242e29c3844ee8c5803ed284f9e7d36 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Wed, 12 Apr 2023 15:57:49 -0400 Subject: [PATCH 06/55] feature engineering -- criterion --- notebooks/feature_engineering_new/raw/tut1.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/feature_engineering_new/raw/tut1.ipynb b/notebooks/feature_engineering_new/raw/tut1.ipynb index b54463829..7f00a1af3 100644 --- a/notebooks/feature_engineering_new/raw/tut1.ipynb +++ b/notebooks/feature_engineering_new/raw/tut1.ipynb @@ -135,7 +135,7 @@ "X[\"WtrCmtRatio\"] = X[\"Water\"] / X[\"Cement\"]\n", "\n", "# Train and score model on dataset with additional ratio features\n", - "model = RandomForestRegressor(criterion=\"mae\", random_state=0)\n", + "model = RandomForestRegressor(criterion=\"absolute_error\", random_state=0)\n", "score = cross_val_score(\n", " model, X, y, cv=5, scoring=\"neg_mean_absolute_error\"\n", ")\n", From 1590f916b512ddc0acd08899ea0d62016aeb9535 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 14 Apr 2023 15:57:52 -0400 Subject: [PATCH 07/55] ronehot encoding no longer removes column names --- learntools/ml_intermediate/ex3.py | 5 ----- notebooks/ml_intermediate/raw/ex3.ipynb | 27 ++----------------------- 2 files changed, 2 insertions(+), 30 deletions(-) diff --git a/learntools/ml_intermediate/ex3.py b/learntools/ml_intermediate/ex3.py index 6c1ed1aa6..bf00ebf7d 100644 --- a/learntools/ml_intermediate/ex3.py +++ b/learntools/ml_intermediate/ex3.py @@ -129,10 +129,6 @@ class OneHot(CodingProblem): OH_cols_train = pd.DataFrame(OH_encoder.fit_transform(X_train[low_cardinality_cols])) OH_cols_valid = pd.DataFrame(OH_encoder.transform(X_valid[low_cardinality_cols])) -# One-hot encoding removed index; put it back -OH_cols_train.index = X_train.index -OH_cols_valid.index = X_valid.index - # Remove categorical columns (will replace with one-hot encoding) num_X_train = X_train.drop(object_cols, axis=1) num_X_valid = X_valid.drop(object_cols, axis=1) @@ -140,7 +136,6 @@ class OneHot(CodingProblem): # Add one-hot encoded columns to numerical features OH_X_train = pd.concat([num_X_train, OH_cols_train], axis=1) OH_X_valid = pd.concat([num_X_valid, OH_cols_valid], axis=1) - """) def check(self, OH_X_train, OH_X_valid): diff --git a/notebooks/ml_intermediate/raw/ex3.ipynb b/notebooks/ml_intermediate/raw/ex3.ipynb index 5f2794f8c..95dff8713 100644 --- a/notebooks/ml_intermediate/raw/ex3.ipynb +++ b/notebooks/ml_intermediate/raw/ex3.ipynb @@ -530,29 +530,6 @@ "step_4.check()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#%%RM_IF(PROD)%%\n", - "# Apply one-hot encoder to each column with categorical data\n", - "OH_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)\n", - "OH_cols_train = pd.DataFrame(OH_encoder.fit_transform(X_train[low_cardinality_cols]))\n", - "OH_cols_valid = pd.DataFrame(OH_encoder.transform(X_valid[low_cardinality_cols]))\n", - "\n", - "# Remove categorical columns (will replace with one-hot encoding)\n", - "num_X_train = X_train.drop(object_cols, axis=1)\n", - "num_X_valid = X_valid.drop(object_cols, axis=1)\n", - "\n", - "# Add one-hot encoded columns to numerical features\n", - "OH_X_train = pd.concat([num_X_train, OH_cols_train], axis=1)\n", - "OH_X_valid = pd.concat([num_X_valid, OH_cols_valid], axis=1)\n", - "\n", - "step_4.assert_check_failed()" - ] - }, { "cell_type": "code", "execution_count": null, @@ -647,7 +624,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -661,7 +638,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.9.12" } }, "nbformat": 4, From 4133418fde2b19d0273b04cb43dd9abe809d4f56 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 14 Apr 2023 15:59:57 -0400 Subject: [PATCH 08/55] amending tutorial to be consistent with exercise --- notebooks/ml_intermediate/raw/tut3.ipynb | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/notebooks/ml_intermediate/raw/tut3.ipynb b/notebooks/ml_intermediate/raw/tut3.ipynb index e1b8e316d..4a3709029 100644 --- a/notebooks/ml_intermediate/raw/tut3.ipynb +++ b/notebooks/ml_intermediate/raw/tut3.ipynb @@ -242,10 +242,6 @@ "OH_cols_train = pd.DataFrame(OH_encoder.fit_transform(X_train[object_cols]))\n", "OH_cols_valid = pd.DataFrame(OH_encoder.transform(X_valid[object_cols]))\n", "\n", - "# One-hot encoding removed index; put it back\n", - "OH_cols_train.index = X_train.index\n", - "OH_cols_valid.index = X_valid.index\n", - "\n", "# Remove categorical columns (will replace with one-hot encoding)\n", "num_X_train = X_train.drop(object_cols, axis=1)\n", "num_X_valid = X_valid.drop(object_cols, axis=1)\n", @@ -280,7 +276,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -294,7 +290,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.9.12" } }, "nbformat": 4, From 03bb3630d6fe459cdc2bb03638f6a204d30bc304 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 14 Apr 2023 17:17:14 -0400 Subject: [PATCH 09/55] tut3, ex3 changes to ml_intermediate --- learntools/ml_intermediate/ex3.py | 4 ++++ notebooks/ml_intermediate/raw/tut3.ipynb | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/learntools/ml_intermediate/ex3.py b/learntools/ml_intermediate/ex3.py index bf00ebf7d..a3a63dc4b 100644 --- a/learntools/ml_intermediate/ex3.py +++ b/learntools/ml_intermediate/ex3.py @@ -129,6 +129,10 @@ class OneHot(CodingProblem): OH_cols_train = pd.DataFrame(OH_encoder.fit_transform(X_train[low_cardinality_cols])) OH_cols_valid = pd.DataFrame(OH_encoder.transform(X_valid[low_cardinality_cols])) +# One-hot encoding removed index; put it back +OH_cols_train.index = X_train.index +OH_cols_valid.index = X_valid.index + # Remove categorical columns (will replace with one-hot encoding) num_X_train = X_train.drop(object_cols, axis=1) num_X_valid = X_valid.drop(object_cols, axis=1) diff --git a/notebooks/ml_intermediate/raw/tut3.ipynb b/notebooks/ml_intermediate/raw/tut3.ipynb index 4a3709029..cab179e43 100644 --- a/notebooks/ml_intermediate/raw/tut3.ipynb +++ b/notebooks/ml_intermediate/raw/tut3.ipynb @@ -242,6 +242,10 @@ "OH_cols_train = pd.DataFrame(OH_encoder.fit_transform(X_train[object_cols]))\n", "OH_cols_valid = pd.DataFrame(OH_encoder.transform(X_valid[object_cols]))\n", "\n", + "# One-hot encoding removed index; put it back\n", + "OH_cols_train.index = X_train.index\n", + "OH_cols_valid.index = X_valid.index\n", + "\n", "# Remove categorical columns (will replace with one-hot encoding)\n", "num_X_train = X_train.drop(object_cols, axis=1)\n", "num_X_valid = X_valid.drop(object_cols, axis=1)\n", @@ -250,6 +254,10 @@ "OH_X_train = pd.concat([num_X_train, OH_cols_train], axis=1)\n", "OH_X_valid = pd.concat([num_X_valid, OH_cols_valid], axis=1)\n", "\n", + "# Ensure all columns have string type\n", + "OH_X_train.columns = OH_X_train.columns.astype(str)\n", + "OH_X_valid.columns = OH_X_valid.columns.astype(str)\n", + "\n", "print(\"MAE from Approach 3 (One-Hot Encoding):\") \n", "print(score_dataset(OH_X_train, OH_X_valid, y_train, y_valid))" ] From 509f6ead3b937f5b34c29327439a21ce67ac2f90 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 14 Apr 2023 18:13:00 -0400 Subject: [PATCH 10/55] changing to string columns --- learntools/ml_intermediate/ex3.py | 4 ++++ notebooks/ml_intermediate/raw/ex3.ipynb | 4 ++++ notebooks/test.sh | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/learntools/ml_intermediate/ex3.py b/learntools/ml_intermediate/ex3.py index a3a63dc4b..bf3f55bde 100644 --- a/learntools/ml_intermediate/ex3.py +++ b/learntools/ml_intermediate/ex3.py @@ -140,6 +140,10 @@ class OneHot(CodingProblem): # Add one-hot encoded columns to numerical features OH_X_train = pd.concat([num_X_train, OH_cols_train], axis=1) OH_X_valid = pd.concat([num_X_valid, OH_cols_valid], axis=1) + +# Ensure all columns have string type +OH_X_train.columns = OH_X_train.columns.astype(str) +OH_X_valid.columns = OH_X_valid.columns.astype(str) """) def check(self, OH_X_train, OH_X_valid): diff --git a/notebooks/ml_intermediate/raw/ex3.ipynb b/notebooks/ml_intermediate/raw/ex3.ipynb index 95dff8713..b108f091a 100644 --- a/notebooks/ml_intermediate/raw/ex3.ipynb +++ b/notebooks/ml_intermediate/raw/ex3.ipynb @@ -554,6 +554,10 @@ "OH_X_train = pd.concat([num_X_train, OH_cols_train], axis=1)\n", "OH_X_valid = pd.concat([num_X_valid, OH_cols_valid], axis=1)\n", "\n", + "# Ensure all columns have string type\n", + "OH_X_train.columns = OH_X_train.columns.astype(str)\n", + "OH_X_valid.columns = OH_X_valid.columns.astype(str)\n", + "\n", "step_4.assert_check_passed()" ] }, diff --git a/notebooks/test.sh b/notebooks/test.sh index 3a020601b..ad70b09f2 100755 --- a/notebooks/test.sh +++ b/notebooks/test.sh @@ -12,7 +12,7 @@ fi # Filter by tracks if first argument set. TRACKS="ml_explainability intro_to_programming time_series ethics feature_engineering_new computer_vision deep_learning_intro pandas python machine_learning sql data_viz_to_coder ml_intermediate sql_advanced feature_engineering geospatial nlp game_ai data_cleaning" -TESTABLE_NOTEBOOK_TRACKS="ml_explainability intro_to_programming geospatial time_series ethics feature_engineering_new data_viz_to_coder ml_intermediate data_cleaning computer_vision deep_learning_intro python pandas machine_learning game_ai" +TESTABLE_NOTEBOOK_TRACKS="ml_intermediate ml_explainability intro_to_programming geospatial time_series ethics feature_engineering_new data_viz_to_coder data_cleaning computer_vision deep_learning_intro python pandas machine_learning game_ai" if [[ -n $1 && $1 != "all" ]]; then TRACKS=$1 From 3f39748f4ed2d92d2e85d7ce9aefeb29cb3b3bf9 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 14 Apr 2023 19:02:17 -0400 Subject: [PATCH 11/55] intermediate ml exercise 6 checking if model fit --- learntools/ml_intermediate/ex6.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/learntools/ml_intermediate/ex6.py b/learntools/ml_intermediate/ex6.py index d2f09154d..3c736b481 100644 --- a/learntools/ml_intermediate/ex6.py +++ b/learntools/ml_intermediate/ex6.py @@ -53,8 +53,10 @@ def check(self, my_model_1): ("Please instantiate the XGBoost model with default parameters, and set the random seed " "to 0 (e.g., `my_model_1 = XGBRegressor(random_state=0)`).") - assert my_model_1._Booster is not None, \ - "Please fit the model to the training data." + try: + my_model_1.get_booster() + except: + assert 0==1, "Please fit the model to the training data." class Model1B(CodingProblem): _var = 'predictions_1' From 1e3d604d824c9469bc9116d630282d0a59d1686a Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 14 Apr 2023 21:06:08 -0400 Subject: [PATCH 12/55] Update ex_0.ipynb --- notebooks/pandas/raw/ex_0.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/pandas/raw/ex_0.ipynb b/notebooks/pandas/raw/ex_0.ipynb index 796cc2c09..14897b7ec 100644 --- a/notebooks/pandas/raw/ex_0.ipynb +++ b/notebooks/pandas/raw/ex_0.ipynb @@ -18,7 +18,7 @@ "outputs": [], "source": [ "import pandas as pd\n", - "pd.set_option('max_rows', 5)\n", + "pd.set_option('display.max_rows', 5)\n", "from learntools.core import binder; binder.bind(globals())\n", "from learntools.pandas.creating_reading_and_writing import *\n", "print(\"Setup complete.\")" From 124fc28cb38a17a30d9ebcb6a53ff6fabece4954 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Sun, 16 Apr 2023 10:15:47 -0400 Subject: [PATCH 13/55] display.maxrows --- notebooks/pandas/raw/tut_1.ipynb | 6 +++--- notebooks/pandas/raw/tut_2.ipynb | 6 +++--- notebooks/pandas/raw/tut_4.ipynb | 6 +++--- notebooks/pandas/raw/tut_5.ipynb | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/notebooks/pandas/raw/tut_1.ipynb b/notebooks/pandas/raw/tut_1.ipynb index cf5ac5032..c2a8e806f 100644 --- a/notebooks/pandas/raw/tut_1.ipynb +++ b/notebooks/pandas/raw/tut_1.ipynb @@ -18,7 +18,7 @@ "#$HIDE_INPUT$\n", "import pandas as pd\n", "reviews = pd.read_csv(\"../input/wine-reviews/winemag-data-130k-v2.csv\", index_col=0)\n", - "pd.set_option('max_rows', 5)" + "pd.set_option('display.max_rows', 5)" ] }, { @@ -547,7 +547,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -561,7 +561,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/notebooks/pandas/raw/tut_2.ipynb b/notebooks/pandas/raw/tut_2.ipynb index 52e0dd3eb..b2ccc3baa 100644 --- a/notebooks/pandas/raw/tut_2.ipynb +++ b/notebooks/pandas/raw/tut_2.ipynb @@ -23,7 +23,7 @@ "source": [ "#$HIDE_INPUT$\n", "import pandas as pd\n", - "pd.set_option('max_rows', 5)\n", + "pd.set_option('display.max_rows', 5)\n", "import numpy as np\n", "reviews = pd.read_csv(\"../input/wine-reviews/winemag-data-130k-v2.csv\", index_col=0)" ] @@ -235,7 +235,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -249,7 +249,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/notebooks/pandas/raw/tut_4.ipynb b/notebooks/pandas/raw/tut_4.ipynb index 56ef112bf..df22adfac 100644 --- a/notebooks/pandas/raw/tut_4.ipynb +++ b/notebooks/pandas/raw/tut_4.ipynb @@ -26,7 +26,7 @@ "#$HIDE_INPUT$\n", "import pandas as pd\n", "reviews = pd.read_csv(\"../input/wine-reviews/winemag-data-130k-v2.csv\", index_col=0)\n", - "pd.set_option('max_rows', 5)" + "pd.set_option('display.max_rows', 5)" ] }, { @@ -186,7 +186,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -200,7 +200,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/notebooks/pandas/raw/tut_5.ipynb b/notebooks/pandas/raw/tut_5.ipynb index dc6ac7f65..616c636f0 100644 --- a/notebooks/pandas/raw/tut_5.ipynb +++ b/notebooks/pandas/raw/tut_5.ipynb @@ -25,7 +25,7 @@ "source": [ "#$HIDE_INPUT$\n", "import pandas as pd\n", - "pd.set_option('max_rows', 5)\n", + "pd.set_option('display.max_rows', 5)\n", "reviews = pd.read_csv(\"../input/wine-reviews/winemag-data-130k-v2.csv\", index_col=0)" ] }, @@ -140,7 +140,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -154,7 +154,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.9.12" } }, "nbformat": 4, From 7dc387ecf28447c1ea7f4013711dcbaaf3f1e92f Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Mon, 22 May 2023 13:58:11 -0400 Subject: [PATCH 14/55] [Game AI] install stable-baselines3 from repo --- notebooks/game_ai/raw/ex4.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/game_ai/raw/ex4.ipynb b/notebooks/game_ai/raw/ex4.ipynb index 36a00a9a8..83a3a2a35 100644 --- a/notebooks/game_ai/raw/ex4.ipynb +++ b/notebooks/game_ai/raw/ex4.ipynb @@ -190,7 +190,7 @@ "import torch as th\n", "import torch.nn as nn\n", "\n", - "!pip install \"stable-baselines3\"\n", + "!pip install git+https://github.com/DLR-RM/stable-baselines3 'shimmy>=0.2.1'\n", "from stable_baselines3 import PPO \n", "from stable_baselines3.common.torch_layers import BaseFeaturesExtractor\n", "\n", From 557d62f04b631c7bc32f25cc2ab817d05c16be24 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Mon, 22 May 2023 15:48:49 -0400 Subject: [PATCH 15/55] [Game AI] also need to update tutorial --- notebooks/game_ai/raw/tut4.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/game_ai/raw/tut4.ipynb b/notebooks/game_ai/raw/tut4.ipynb index 09a062f89..b268778ae 100644 --- a/notebooks/game_ai/raw/tut4.ipynb +++ b/notebooks/game_ai/raw/tut4.ipynb @@ -186,7 +186,7 @@ "import torch as th\n", "import torch.nn as nn\n", "\n", - "!pip install \"stable-baselines3\"\n", + "!pip install git+https://github.com/DLR-RM/stable-baselines3 'shimmy>=0.2.1'\n", "from stable_baselines3 import PPO \n", "from stable_baselines3.common.torch_layers import BaseFeaturesExtractor\n", "\n", From 74afd85cbaf3c8f8425d4eb6f767299a62ef2b53 Mon Sep 17 00:00:00 2001 From: Jim Plotts Date: Wed, 21 Jun 2023 00:52:46 +0000 Subject: [PATCH 16/55] Update timeout to avoid build failures. http://b/287252878 --- notebooks/geospatial/raw/tut4.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/geospatial/raw/tut4.ipynb b/notebooks/geospatial/raw/tut4.ipynb index bd4b064b7..62a0bea4c 100644 --- a/notebooks/geospatial/raw/tut4.ipynb +++ b/notebooks/geospatial/raw/tut4.ipynb @@ -66,7 +66,7 @@ "metadata": {}, "outputs": [], "source": [ - "geolocator = Nominatim(user_agent=\"kaggle_learn\")\n", + "geolocator = Nominatim(user_agent=\"kaggle_learn\", timeout=5)\n", "location = geolocator.geocode(\"Pyramid of Khufu\")\n", "\n", "print(location.point)\n", From 1a67b73b70737743146b58c9ad3f40550f7f405b Mon Sep 17 00:00:00 2001 From: Bob Fraser Date: Fri, 30 Jun 2023 17:39:25 +0000 Subject: [PATCH 17/55] Fix image links Intro to programming Lesson 4 had some missing image files. This change fixes the URLs in the exercise so that they point to the pngs. Bug: 289537275 --- notebooks/intro_to_programming/raw/ex4.ipynb | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/notebooks/intro_to_programming/raw/ex4.ipynb b/notebooks/intro_to_programming/raw/ex4.ipynb index 53bec97ff..14b7ba0d4 100644 --- a/notebooks/intro_to_programming/raw/ex4.ipynb +++ b/notebooks/intro_to_programming/raw/ex4.ipynb @@ -533,7 +533,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The next code cell demonstrates how to use `get_labels()` to get the warning labels that the food item should contain. We begin with [bologna](https://world.openfoodfacts.org/product/4099100179378/bologna). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/Cfcx72e) with all of the nutritional information. Note that for this food,\n", + "The next code cell demonstrates how to use `get_labels()` to get the warning labels that the food item should contain. We begin with [bologna](https://world.openfoodfacts.org/product/4099100179378/bologna). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/Cfcx72e.png) with all of the nutritional information. Note that for this food,\n", "- `food_type = \"solid\"` (because bologna is a solid and not a liquid)\n", "- `serving_size = 32` (the serving size is 32 grams)\n", "- `calories_per_serving = 110` (there are 110 calories per serving)\n", @@ -573,7 +573,7 @@ "\n", "In general, as you continue coding in Python, you will often be running code that other people have written. This is common practice for advanced programmers.\n", "\n", - "In the next code cell, fill in the values for [this cereal](https://world.openfoodfacts.org/product/7501008023624/zucaritas-kellogg-s). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/MUxzHVU) with all of the nutritional information.\n", + "In the next code cell, fill in the values for [this cereal](https://world.openfoodfacts.org/product/7501008023624/zucaritas-kellogg-s). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/MUxzHVU.png) with all of the nutritional information.\n", "\n", "**Note**: running the line of code below as-is will return an error. You have to fill in the nutritional values first." ] @@ -601,7 +601,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Next, try [these mozzarella sticks](https://world-es.openfoodfacts.org/producto/0062325540104/mozzarella-cheese-sticks). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/rcdB7VH) with all of the nutritional information." + "Next, try [these mozzarella sticks](https://world-es.openfoodfacts.org/producto/0062325540104/mozzarella-cheese-sticks). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/rcdB7VH.png) with all of the nutritional information." ] }, { @@ -624,12 +624,13 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Feel free to skip to the end of the notebook now and run `q5.check()` to complete the exercise. If you want to try more foods, \n", - "- try [these cookies](https://world.openfoodfacts.org/product/0069700118545/biscuits-au-sucre-pretraches). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/2Bc271o) with all of the nutritional information.\n", - "- try [this soda](https://world-es.openfoodfacts.org/producto/0078000113464/orange-soda-sunkist). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/RsBYa8E) with all of the nutritional information.\n", + "- try [these cookies](https://world.openfoodfacts.org/product/0069700118545/biscuits-au-sucre-pretraches). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/2Bc271o.png) with all of the nutritional information.\n", + "- try [this soda](https://world-es.openfoodfacts.org/producto/0078000113464/orange-soda-sunkist). Here is [an image](https://storage.googleapis.com/kaggle-media/learn/images/RsBYa8E.png) with all of the nutritional information.\n", "\n", "Use the two code cells below for this." ] From 93cf102cac95c2d8d7fb96ccc00ab5acc2c4bf05 Mon Sep 17 00:00:00 2001 From: Ryan Holbrook Date: Fri, 21 Jul 2023 11:58:56 -0500 Subject: [PATCH 18/55] Fix broken image link --- notebooks/feature_engineering_new/raw/tut4.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/feature_engineering_new/raw/tut4.ipynb b/notebooks/feature_engineering_new/raw/tut4.ipynb index e9de1b4c1..4b903a9f0 100644 --- a/notebooks/feature_engineering_new/raw/tut4.ipynb +++ b/notebooks/feature_engineering_new/raw/tut4.ipynb @@ -59,7 +59,7 @@ "The clustering on the [*Ames*](https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data) dataset above is a k-means clustering. Here is the same figure with the tessallation and centroids shown.\n", "\n", "
\n", - "\"\"\n", + "\"\"\n", "
K-means clustering creates a Voronoi tessallation of the feature space.\n", "
\n", "
\n", From 12ca9009b8a3a305b5ce23202125b01ea9eca782 Mon Sep 17 00:00:00 2001 From: Vincent Roseberry Date: Tue, 22 Aug 2023 16:30:51 +0000 Subject: [PATCH 19/55] Remove slack notifications http://b/297034826 --- Jenkinsfile | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index cf4e58809..ecf88e084 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -8,7 +8,7 @@ pipeline { GIT_COMMIT_SUBJECT = sh(returnStdout: true, script:"git log --format=%s -n 1 HEAD").trim() GIT_COMMIT_AUTHOR = sh(returnStdout: true, script:"git log --format='%an' -n 1 HEAD").trim() GIT_COMMIT_SUMMARY = "`` ${GIT_COMMIT_SUBJECT} - ${GIT_COMMIT_AUTHOR}" - SLACK_CHANNEL = "#learnops" + MATTERMOST_CHANNEL = "#learnops" KAGGLE_KEY = credentials('KAGGLE_API_KEY') KAGGLE_USERNAME = 'dansbecker' } @@ -35,16 +35,13 @@ pipeline { post { failure { - slackSend color: 'danger', message: "*<${env.BUILD_URL}console|${JOB_NAME} failed>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL - mattermostSend color: 'danger', message: "*<${env.BUILD_URL}console|${JOB_NAME} failed>* ${GIT_COMMIT_SUMMARY} @kernels-backend-ops", channel: env.SLACK_CHANNEL + mattermostSend color: 'danger', message: "*<${env.BUILD_URL}console|${JOB_NAME} failed>* ${GIT_COMMIT_SUMMARY} @kernels-backend-ops", channel: env.MATTERMOST_CHANNEL } success { - slackSend color: 'good', message: "*<${env.BUILD_URL}console|${JOB_NAME} passed>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL - mattermostSend color: 'good', message: "*<${env.BUILD_URL}console|${JOB_NAME} passed>* ${GIT_COMMIT_SUMMARY} @kernels-backend-ops", channel: env.SLACK_CHANNEL + mattermostSend color: 'good', message: "*<${env.BUILD_URL}console|${JOB_NAME} passed>* ${GIT_COMMIT_SUMMARY} @kernels-backend-ops", channel: env.MATTERMOST_CHANNEL } aborted { - slackSend color: 'warning', message: "*<${env.BUILD_URL}console|${JOB_NAME} aborted>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL - mattermostSend color: 'warning', message: "*<${env.BUILD_URL}console|${JOB_NAME} aborted>* ${GIT_COMMIT_SUMMARY} @kernels-backend-ops", channel: env.SLACK_CHANNEL + mattermostSend color: 'warning', message: "*<${env.BUILD_URL}console|${JOB_NAME} aborted>* ${GIT_COMMIT_SUMMARY} @kernels-backend-ops", channel: env.MATTERMOST_CHANNEL } } } From 8bfca82e72c4980ab976b488c69123dee9b771ca Mon Sep 17 00:00:00 2001 From: Vincent Roseberry Date: Tue, 29 Aug 2023 19:06:27 +0000 Subject: [PATCH 20/55] Fix utils#plot_periodogram Following pandas upgrade, support for ambiguous timedelta such as `1M` and `1Y` have been dropped. --- learntools/time_series/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learntools/time_series/utils.py b/learntools/time_series/utils.py index 56809ba6e..8aa805169 100644 --- a/learntools/time_series/utils.py +++ b/learntools/time_series/utils.py @@ -40,7 +40,7 @@ def seasonal_plot(X, y, period, freq, ax=None): def plot_periodogram(ts, detrend='linear', ax=None): from scipy.signal import periodogram - fs = pd.Timedelta("1Y") / pd.Timedelta("1D") + fs = pd.Timedelta("365D") / pd.Timedelta("1D") freqencies, spectrum = periodogram( ts, fs=fs, From 7aeb510daa4f60d439eaf52a9ec9ffb5b28849e2 Mon Sep 17 00:00:00 2001 From: Vincent Roseberry Date: Tue, 29 Aug 2023 19:52:49 +0000 Subject: [PATCH 21/55] more fixes --- learntools/time_series/ex3.py | 2 +- learntools/time_series/ex4.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/learntools/time_series/ex3.py b/learntools/time_series/ex3.py index 05022dae4..5e2e31c01 100644 --- a/learntools/time_series/ex3.py +++ b/learntools/time_series/ex3.py @@ -14,7 +14,7 @@ class Q2(CodingProblem): # Create seasonal features def check(self, dp, X): from statsmodels.tsa.deterministic import (CalendarFourier, DeterministicProcess) - y = load_average_sales()['2017'] + y = load_average_sales().loc['2017'] fourier = CalendarFourier(freq='M', order=4) dp = DeterministicProcess( index=y.index, diff --git a/learntools/time_series/ex4.py b/learntools/time_series/ex4.py index 24f28f508..c6fd86435 100644 --- a/learntools/time_series/ex4.py +++ b/learntools/time_series/ex4.py @@ -1,5 +1,5 @@ from learntools.core import * -from learntools.time_series.checking_utils import load_store_sales, load_family_sales +from learntools.time_series.checking_utils import load_family_sales from learntools.time_series.utils import make_lags, make_leads From 8e355c7eb8146af2bdd8d18961a3eaf9cc2b5627 Mon Sep 17 00:00:00 2001 From: Vincent Roseberry Date: Tue, 29 Aug 2023 20:26:00 +0000 Subject: [PATCH 22/55] more fixes --- notebooks/time_series/raw/tut3.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/time_series/raw/tut3.ipynb b/notebooks/time_series/raw/tut3.ipynb index adb005434..bfc636510 100644 --- a/notebooks/time_series/raw/tut3.ipynb +++ b/notebooks/time_series/raw/tut3.ipynb @@ -219,7 +219,7 @@ "\n", "def plot_periodogram(ts, detrend='linear', ax=None):\n", " from scipy.signal import periodogram\n", - " fs = pd.Timedelta(\"1Y\") / pd.Timedelta(\"1D\")\n", + " fs = pd.Timedelta(\"365D\") / pd.Timedelta(\"1D\")\n", " freqencies, spectrum = periodogram(\n", " ts,\n", " fs=fs,\n", From 05f5702a03bb8dfb10da297471cd21d0f2db8d88 Mon Sep 17 00:00:00 2001 From: Vincent Roseberry Date: Tue, 29 Aug 2023 22:22:13 +0000 Subject: [PATCH 23/55] Fix feature_engineering_new track --- notebooks/feature_engineering_new/raw/tut_bonus.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/feature_engineering_new/raw/tut_bonus.ipynb b/notebooks/feature_engineering_new/raw/tut_bonus.ipynb index 805ea432d..c9ddac641 100644 --- a/notebooks/feature_engineering_new/raw/tut_bonus.ipynb +++ b/notebooks/feature_engineering_new/raw/tut_bonus.ipynb @@ -217,7 +217,7 @@ " df[name] = df[name].astype(\"category\")\n", " # Add a None category for missing values\n", " if \"None\" not in df[name].cat.categories:\n", - " df[name].cat.add_categories(\"None\", inplace=True)\n", + " df[name] = df[name].cat.add_categories(\"None\")\n", " # Ordinal categories\n", " for name, levels in ordered_levels.items():\n", " df[name] = df[name].astype(CategoricalDtype(levels,\n", @@ -707,7 +707,7 @@ "source": [ "def corrplot(df, method=\"pearson\", annot=True, **kwargs):\n", " sns.clustermap(\n", - " df.corr(method),\n", + " df.corr(method, numeric_only=True),\n", " vmin=-1.0,\n", " vmax=1.0,\n", " cmap=\"icefire\",\n", From efd47d34c545a7599a78d97e1fdcfd3bee1287f9 Mon Sep 17 00:00:00 2001 From: Dustin Herbison Date: Wed, 27 Sep 2023 16:36:33 +0000 Subject: [PATCH 24/55] Migrate seaborn-whitegrid to seaborn-v0_8-whitegrid http://b/302323147 --- learntools/deep_learning_intro/dltools.py | 2 +- learntools/time_series/style.py | 2 +- notebooks/deep_learning_intro/raw/ex1.ipynb | 2 +- notebooks/deep_learning_intro/raw/ex2.ipynb | 2 +- notebooks/deep_learning_intro/raw/ex3.ipynb | 2 +- notebooks/deep_learning_intro/raw/ex4.ipynb | 2 +- notebooks/deep_learning_intro/raw/ex5.ipynb | 2 +- notebooks/deep_learning_intro/raw/ex6.ipynb | 2 +- notebooks/deep_learning_intro/raw/tut5.ipynb | 2 +- notebooks/deep_learning_intro/raw/tut_tpus.ipynb | 2 +- notebooks/feature_engineering_new/raw/ex2.ipynb | 2 +- notebooks/feature_engineering_new/raw/ex4.ipynb | 2 +- notebooks/feature_engineering_new/raw/ex5.ipynb | 2 +- notebooks/feature_engineering_new/raw/ex6.ipynb | 2 +- notebooks/feature_engineering_new/raw/tut2.ipynb | 2 +- notebooks/feature_engineering_new/raw/tut3.ipynb | 2 +- notebooks/feature_engineering_new/raw/tut4.ipynb | 2 +- notebooks/feature_engineering_new/raw/tut5.ipynb | 2 +- notebooks/feature_engineering_new/raw/tut6.ipynb | 2 +- notebooks/feature_engineering_new/raw/tut_bonus.ipynb | 2 +- .../raw/what_is_feature_engineering_ex.ipynb | 2 +- notebooks/time_series/raw/tut1.ipynb | 4 ++-- notebooks/time_series/raw/tut2.ipynb | 2 +- notebooks/time_series/raw/tut3.ipynb | 2 +- notebooks/time_series/raw/tut4.ipynb | 2 +- notebooks/time_series/raw/tut5.ipynb | 2 +- notebooks/time_series/raw/tut6.ipynb | 2 +- 27 files changed, 28 insertions(+), 28 deletions(-) diff --git a/learntools/deep_learning_intro/dltools.py b/learntools/deep_learning_intro/dltools.py index 75d2db2da..385b75b7e 100644 --- a/learntools/deep_learning_intro/dltools.py +++ b/learntools/deep_learning_intro/dltools.py @@ -7,7 +7,7 @@ import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib import animation -plt.style.use('seaborn-whitegrid') +plt.style.use('seaborn-v0_8-whitegrid') # NUM_EXAMPLES = 256 # BATCH_SIZE = 8 diff --git a/learntools/time_series/style.py b/learntools/time_series/style.py index b29c3bd08..d8adc3aa7 100644 --- a/learntools/time_series/style.py +++ b/learntools/time_series/style.py @@ -5,7 +5,7 @@ warnings.simplefilter("ignore") -plt.style.use("seaborn-whitegrid") +plt.style.use("seaborn-v0_8-whitegrid") plt.rc( "figure", autolayout=True, diff --git a/notebooks/deep_learning_intro/raw/ex1.ipynb b/notebooks/deep_learning_intro/raw/ex1.ipynb index 88c0a8e5c..a33790021 100644 --- a/notebooks/deep_learning_intro/raw/ex1.ipynb +++ b/notebooks/deep_learning_intro/raw/ex1.ipynb @@ -20,7 +20,7 @@ "# Setup plotting\n", "import matplotlib.pyplot as plt\n", "\n", - "plt.style.use('seaborn-whitegrid')\n", + "plt.style.use('seaborn-v0_8-whitegrid')\n", "# Set Matplotlib defaults\n", "plt.rc('figure', autolayout=True)\n", "plt.rc('axes', labelweight='bold', labelsize='large',\n", diff --git a/notebooks/deep_learning_intro/raw/ex2.ipynb b/notebooks/deep_learning_intro/raw/ex2.ipynb index ca773ab11..53c289bdc 100644 --- a/notebooks/deep_learning_intro/raw/ex2.ipynb +++ b/notebooks/deep_learning_intro/raw/ex2.ipynb @@ -22,7 +22,7 @@ "# Setup plotting\n", "import matplotlib.pyplot as plt\n", "\n", - "plt.style.use('seaborn-whitegrid')\n", + "plt.style.use('seaborn-v0_8-whitegrid')\n", "# Set Matplotlib defaults\n", "plt.rc('figure', autolayout=True)\n", "plt.rc('axes', labelweight='bold', labelsize='large',\n", diff --git a/notebooks/deep_learning_intro/raw/ex3.ipynb b/notebooks/deep_learning_intro/raw/ex3.ipynb index db452ed13..ae08d502b 100644 --- a/notebooks/deep_learning_intro/raw/ex3.ipynb +++ b/notebooks/deep_learning_intro/raw/ex3.ipynb @@ -20,7 +20,7 @@ "# Setup plotting\n", "import matplotlib.pyplot as plt\n", "from learntools.deep_learning_intro.dltools import animate_sgd\n", - "plt.style.use('seaborn-whitegrid')\n", + "plt.style.use('seaborn-v0_8-whitegrid')\n", "# Set Matplotlib defaults\n", "plt.rc('figure', autolayout=True)\n", "plt.rc('axes', labelweight='bold', labelsize='large',\n", diff --git a/notebooks/deep_learning_intro/raw/ex4.ipynb b/notebooks/deep_learning_intro/raw/ex4.ipynb index 7e34cb9c6..e838a2e43 100644 --- a/notebooks/deep_learning_intro/raw/ex4.ipynb +++ b/notebooks/deep_learning_intro/raw/ex4.ipynb @@ -19,7 +19,7 @@ "source": [ "# Setup plotting\n", "import matplotlib.pyplot as plt\n", - "plt.style.use('seaborn-whitegrid')\n", + "plt.style.use('seaborn-v0_8-whitegrid')\n", "# Set Matplotlib defaults\n", "plt.rc('figure', autolayout=True)\n", "plt.rc('axes', labelweight='bold', labelsize='large',\n", diff --git a/notebooks/deep_learning_intro/raw/ex5.ipynb b/notebooks/deep_learning_intro/raw/ex5.ipynb index 97aad09ff..33c8a28ce 100644 --- a/notebooks/deep_learning_intro/raw/ex5.ipynb +++ b/notebooks/deep_learning_intro/raw/ex5.ipynb @@ -19,7 +19,7 @@ "source": [ "# Setup plotting\n", "import matplotlib.pyplot as plt\n", - "plt.style.use('seaborn-whitegrid')\n", + "plt.style.use('seaborn-v0_8-whitegrid')\n", "# Set Matplotlib defaults\n", "plt.rc('figure', autolayout=True)\n", "plt.rc('axes', labelweight='bold', labelsize='large',\n", diff --git a/notebooks/deep_learning_intro/raw/ex6.ipynb b/notebooks/deep_learning_intro/raw/ex6.ipynb index a3feebda8..deb2bd8e6 100644 --- a/notebooks/deep_learning_intro/raw/ex6.ipynb +++ b/notebooks/deep_learning_intro/raw/ex6.ipynb @@ -19,7 +19,7 @@ "source": [ "# Setup plotting\n", "import matplotlib.pyplot as plt\n", - "plt.style.use('seaborn-whitegrid')\n", + "plt.style.use('seaborn-v0_8-whitegrid')\n", "# Set Matplotlib defaults\n", "plt.rc('figure', autolayout=True)\n", "plt.rc('axes', labelweight='bold', labelsize='large',\n", diff --git a/notebooks/deep_learning_intro/raw/tut5.ipynb b/notebooks/deep_learning_intro/raw/tut5.ipynb index c34266790..de1188930 100644 --- a/notebooks/deep_learning_intro/raw/tut5.ipynb +++ b/notebooks/deep_learning_intro/raw/tut5.ipynb @@ -82,7 +82,7 @@ "# Setup plotting\n", "import matplotlib.pyplot as plt\n", "\n", - "plt.style.use('seaborn-whitegrid')\n", + "plt.style.use('seaborn-v0_8-whitegrid')\n", "# Set Matplotlib defaults\n", "plt.rc('figure', autolayout=True)\n", "plt.rc('axes', labelweight='bold', labelsize='large',\n", diff --git a/notebooks/deep_learning_intro/raw/tut_tpus.ipynb b/notebooks/deep_learning_intro/raw/tut_tpus.ipynb index e900f9dc3..3ebaaf136 100644 --- a/notebooks/deep_learning_intro/raw/tut_tpus.ipynb +++ b/notebooks/deep_learning_intro/raw/tut_tpus.ipynb @@ -85,7 +85,7 @@ "import matplotlib.pyplot as plt\n", "\n", "# Matplotlib defaults\n", - "plt.style.use('seaborn-whitegrid')\n", + "plt.style.use('seaborn-v0_8-whitegrid')\n", "plt.rc('figure', autolayout=True)\n", "plt.rc('axes', labelweight='bold', labelsize='large',\n", " titleweight='bold', titlesize=18, titlepad=10)\n", diff --git a/notebooks/feature_engineering_new/raw/ex2.ipynb b/notebooks/feature_engineering_new/raw/ex2.ipynb index d463f671f..871f1dd33 100644 --- a/notebooks/feature_engineering_new/raw/ex2.ipynb +++ b/notebooks/feature_engineering_new/raw/ex2.ipynb @@ -29,7 +29,7 @@ "from sklearn.feature_selection import mutual_info_regression\n", "\n", "# Set Matplotlib defaults\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True)\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/feature_engineering_new/raw/ex4.ipynb b/notebooks/feature_engineering_new/raw/ex4.ipynb index 9e8b40952..a752b98e0 100644 --- a/notebooks/feature_engineering_new/raw/ex4.ipynb +++ b/notebooks/feature_engineering_new/raw/ex4.ipynb @@ -31,7 +31,7 @@ "from xgboost import XGBRegressor\n", "\n", "# Set Matplotlib defaults\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True)\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/feature_engineering_new/raw/ex5.ipynb b/notebooks/feature_engineering_new/raw/ex5.ipynb index 4a5cb1e3c..425e4b655 100644 --- a/notebooks/feature_engineering_new/raw/ex5.ipynb +++ b/notebooks/feature_engineering_new/raw/ex5.ipynb @@ -37,7 +37,7 @@ "from xgboost import XGBRegressor\n", "\n", "# Set Matplotlib defaults\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True)\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/feature_engineering_new/raw/ex6.ipynb b/notebooks/feature_engineering_new/raw/ex6.ipynb index 2e8d13787..d414fb937 100644 --- a/notebooks/feature_engineering_new/raw/ex6.ipynb +++ b/notebooks/feature_engineering_new/raw/ex6.ipynb @@ -32,7 +32,7 @@ "from xgboost import XGBRegressor\n", "\n", "# Set Matplotlib defaults\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True)\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/feature_engineering_new/raw/tut2.ipynb b/notebooks/feature_engineering_new/raw/tut2.ipynb index cf2c80529..7a8142825 100644 --- a/notebooks/feature_engineering_new/raw/tut2.ipynb +++ b/notebooks/feature_engineering_new/raw/tut2.ipynb @@ -71,7 +71,7 @@ "import pandas as pd\n", "import seaborn as sns\n", "\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "\n", "df = pd.read_csv(\"../input/fe-course-data/autos.csv\")\n", "df.head()" diff --git a/notebooks/feature_engineering_new/raw/tut3.ipynb b/notebooks/feature_engineering_new/raw/tut3.ipynb index c4640f563..6e0ab0f07 100644 --- a/notebooks/feature_engineering_new/raw/tut3.ipynb +++ b/notebooks/feature_engineering_new/raw/tut3.ipynb @@ -23,7 +23,7 @@ "import pandas as pd\n", "import seaborn as sns\n", "\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True)\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/feature_engineering_new/raw/tut4.ipynb b/notebooks/feature_engineering_new/raw/tut4.ipynb index 4b903a9f0..ad95d5bac 100644 --- a/notebooks/feature_engineering_new/raw/tut4.ipynb +++ b/notebooks/feature_engineering_new/raw/tut4.ipynb @@ -101,7 +101,7 @@ "import seaborn as sns\n", "from sklearn.cluster import KMeans\n", "\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True)\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/feature_engineering_new/raw/tut5.ipynb b/notebooks/feature_engineering_new/raw/tut5.ipynb index 222e3757e..f19e85cf8 100644 --- a/notebooks/feature_engineering_new/raw/tut5.ipynb +++ b/notebooks/feature_engineering_new/raw/tut5.ipynb @@ -116,7 +116,7 @@ "from sklearn.feature_selection import mutual_info_regression\n", "\n", "\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True)\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/feature_engineering_new/raw/tut6.ipynb b/notebooks/feature_engineering_new/raw/tut6.ipynb index 5ec5199b0..d84ef8d2a 100644 --- a/notebooks/feature_engineering_new/raw/tut6.ipynb +++ b/notebooks/feature_engineering_new/raw/tut6.ipynb @@ -111,7 +111,7 @@ "import seaborn as sns\n", "import warnings\n", "\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True)\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/feature_engineering_new/raw/tut_bonus.ipynb b/notebooks/feature_engineering_new/raw/tut_bonus.ipynb index c9ddac641..69d799da6 100644 --- a/notebooks/feature_engineering_new/raw/tut_bonus.ipynb +++ b/notebooks/feature_engineering_new/raw/tut_bonus.ipynb @@ -46,7 +46,7 @@ "\n", "\n", "# Set Matplotlib defaults\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True)\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/feature_engineering_new/raw/what_is_feature_engineering_ex.ipynb b/notebooks/feature_engineering_new/raw/what_is_feature_engineering_ex.ipynb index 399bddfc0..31a1f2d87 100644 --- a/notebooks/feature_engineering_new/raw/what_is_feature_engineering_ex.ipynb +++ b/notebooks/feature_engineering_new/raw/what_is_feature_engineering_ex.ipynb @@ -30,7 +30,7 @@ "from xgboost import XGBRegressor\n", "\n", "# Set Matplotlib defaults\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True)\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/time_series/raw/tut1.ipynb b/notebooks/time_series/raw/tut1.ipynb index e4549a0e2..6a30fc0dd 100644 --- a/notebooks/time_series/raw/tut1.ipynb +++ b/notebooks/time_series/raw/tut1.ipynb @@ -108,7 +108,7 @@ "#$HIDE_INPUT$\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\n", " \"figure\",\n", " autolayout=True,\n", @@ -224,7 +224,7 @@ "simplefilter(\"ignore\") # ignore warnings to clean up output cells\n", "\n", "# Set Matplotlib defaults\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True, figsize=(11, 4))\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/time_series/raw/tut2.ipynb b/notebooks/time_series/raw/tut2.ipynb index 7f7c7247b..1afc9175b 100644 --- a/notebooks/time_series/raw/tut2.ipynb +++ b/notebooks/time_series/raw/tut2.ipynb @@ -81,7 +81,7 @@ "simplefilter(\"ignore\") # ignore warnings to clean up output cells\n", "\n", "# Set Matplotlib defaults\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True, figsize=(11, 5))\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/time_series/raw/tut3.ipynb b/notebooks/time_series/raw/tut3.ipynb index bfc636510..55e21b97d 100644 --- a/notebooks/time_series/raw/tut3.ipynb +++ b/notebooks/time_series/raw/tut3.ipynb @@ -166,7 +166,7 @@ "simplefilter(\"ignore\")\n", "\n", "# Set Matplotlib defaults\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True, figsize=(11, 5))\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/time_series/raw/tut4.ipynb b/notebooks/time_series/raw/tut4.ipynb index ad5beacfb..45f3b9901 100644 --- a/notebooks/time_series/raw/tut4.ipynb +++ b/notebooks/time_series/raw/tut4.ipynb @@ -137,7 +137,7 @@ "simplefilter(\"ignore\")\n", "\n", "# Set Matplotlib defaults\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True, figsize=(11, 4))\n", "plt.rc(\n", " \"axes\",\n", diff --git a/notebooks/time_series/raw/tut5.ipynb b/notebooks/time_series/raw/tut5.ipynb index 8ea0c3508..447f76fc7 100644 --- a/notebooks/time_series/raw/tut5.ipynb +++ b/notebooks/time_series/raw/tut5.ipynb @@ -124,7 +124,7 @@ "simplefilter(\"ignore\")\n", "\n", "# Set Matplotlib defaults\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\n", " \"figure\",\n", " autolayout=True,\n", diff --git a/notebooks/time_series/raw/tut6.ipynb b/notebooks/time_series/raw/tut6.ipynb index 29db4dec1..362bd45bb 100644 --- a/notebooks/time_series/raw/tut6.ipynb +++ b/notebooks/time_series/raw/tut6.ipynb @@ -163,7 +163,7 @@ "simplefilter(\"ignore\")\n", "\n", "# Set Matplotlib defaults\n", - "plt.style.use(\"seaborn-whitegrid\")\n", + "plt.style.use(\"seaborn-v0_8-whitegrid\")\n", "plt.rc(\"figure\", autolayout=True, figsize=(11, 4))\n", "plt.rc(\n", " \"axes\",\n", From 36c72e9d8af3366f1cc30d7d0ac6493292af9f95 Mon Sep 17 00:00:00 2001 From: mcbex Date: Wed, 10 Jan 2024 14:49:28 -0500 Subject: [PATCH 25/55] change table to match updated dataset --- learntools/sql/ex3.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/learntools/sql/ex3.py b/learntools/sql/ex3.py index f9b0f90a7..311652140 100644 --- a/learntools/sql/ex3.py +++ b/learntools/sql/ex3.py @@ -8,8 +8,8 @@ # (1) ProlificCommenters prolific_commenters_query = """ - SELECT author, COUNT(id) AS NumPosts - FROM `bigquery-public-data.hacker_news.comments` + SELECT `by` AS author, COUNT(id) AS NumPosts + FROM `bigquery-public-data.hacker_news.full` GROUP BY author HAVING COUNT(id) > 10000 """ @@ -19,7 +19,7 @@ # (2) NumDeletedPosts deleted_posts_query = """ SELECT COUNT(1) AS num_deleted_posts - FROM `bigquery-public-data.hacker_news.comments` + FROM `bigquery-public-data.hacker_news.full` WHERE deleted = True """ query_job = client.query(deleted_posts_query) @@ -46,8 +46,8 @@ def check(self, results): _solution = CS(\ """ prolific_commenters_query = \""" - SELECT author, COUNT(1) AS NumPosts - FROM `bigquery-public-data.hacker_news.comments` + SELECT `by` AS author, COUNT(1) AS NumPosts + FROM `bigquery-public-data.hacker_news.full` GROUP BY author HAVING COUNT(1) > 10000 \""" @@ -62,7 +62,7 @@ class NumDeletedPosts(EqualityCheckProblem): # Query to determine how many posts were deleted deleted_posts_query = \""" SELECT COUNT(1) AS num_deleted_posts - FROM `bigquery-public-data.hacker_news.comments` + FROM `bigquery-public-data.hacker_news.full` WHERE deleted = True \""" From 3332b19a7d8c34e96d40cccbf9e292cd36501f2f Mon Sep 17 00:00:00 2001 From: mcbex Date: Wed, 10 Jan 2024 14:55:16 -0500 Subject: [PATCH 26/55] update exercise --- notebooks/sql/raw/ex3.ipynb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/notebooks/sql/raw/ex3.ipynb b/notebooks/sql/raw/ex3.ipynb index a41f7609d..4e0a33697 100644 --- a/notebooks/sql/raw/ex3.ipynb +++ b/notebooks/sql/raw/ex3.ipynb @@ -28,7 +28,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The code cell below fetches the `comments` table from the `hacker_news` dataset. We also preview the first five rows of the table." + "The code cell below fetches the `full` table from the `hacker_news` dataset. We also preview the first five rows of the table." ] }, { @@ -48,13 +48,13 @@ "# API request - fetch the dataset\n", "dataset = client.get_dataset(dataset_ref)\n", "\n", - "# Construct a reference to the \"comments\" table\n", - "table_ref = dataset_ref.table(\"comments\")\n", + "# Construct a reference to the \"full\" table\n", + "table_ref = dataset_ref.table(\"full\")\n", "\n", "# API request - fetch the table\n", "table = client.get_table(table_ref)\n", "\n", - "# Preview the first five lines of the \"comments\" table\n", + "# Preview the first five lines of the table\n", "client.list_rows(table, max_results=5).to_dataframe()" ] }, @@ -72,7 +72,7 @@ "```\n", "query = \"\"\"\n", " SELECT parent, COUNT(1) AS NumPosts\n", - " FROM `bigquery-public-data.hacker_news.comments`\n", + " FROM `bigquery-public-data.hacker_news.full`\n", " GROUP BY parent\n", " HAVING COUNT(1) > 10\n", " \"\"\"\n", @@ -86,7 +86,7 @@ "outputs": [], "source": [ "# Query to select prolific commenters and post counts\n", - "prolific_commenters_query = ____ # Your code goes here\n", + "prolific_commenters_query = \"\"\"____\"\"\" # Your code goes here\n", "\n", "# Set up the query (cancel the query if it would use too much of \n", "# your quota, with the limit set to 1 GB)\n", @@ -125,7 +125,7 @@ "source": [ "### 2) Deleted comments\n", "\n", - "How many comments have been deleted? (If a comment was deleted, the `deleted` column in the comments table will have the value `True`.)" + "How many comments have been deleted? (If a comment was deleted, the `deleted` column in the table will have the value `True`.)" ] }, { From 336be2b3c997a8b7a7807d0a7955412afb873bc2 Mon Sep 17 00:00:00 2001 From: mcbex Date: Wed, 10 Jan 2024 15:13:39 -0500 Subject: [PATCH 27/55] update tutorial and add comment about quoting reserved words --- notebooks/sql/raw/tut3.ipynb | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/notebooks/sql/raw/tut3.ipynb b/notebooks/sql/raw/tut3.ipynb index a906d52e4..dc624491d 100644 --- a/notebooks/sql/raw/tut3.ipynb +++ b/notebooks/sql/raw/tut3.ipynb @@ -50,7 +50,7 @@ "\n", "Ready to see an example on a real dataset? The Hacker News dataset contains information on stories and comments from the Hacker News social networking site. \n", "\n", - "We'll work with the `comments` table and begin by printing the first few rows. (_We have hidden the corresponding code. To take a peek, click on the \"Code\" button below._)" + "We'll work with the `full` table and begin by printing the first few rows. (_We have hidden the corresponding code. To take a peek, click on the \"Code\" button below._)" ] }, { @@ -73,13 +73,13 @@ "# API request - fetch the dataset\n", "dataset = client.get_dataset(dataset_ref)\n", "\n", - "# Construct a reference to the \"comments\" table\n", - "table_ref = dataset_ref.table(\"comments\")\n", + "# Construct a reference to the \"full\" table\n", + "table_ref = dataset_ref.table(\"full\")\n", "\n", "# API request - fetch the table\n", "table = client.get_table(table_ref)\n", "\n", - "# Preview the first five lines of the \"comments\" table\n", + "# Preview the first five lines of the table\n", "client.list_rows(table, max_results=5).to_dataframe()" ] }, @@ -105,7 +105,7 @@ "# Query to select comments that received more than 10 replies\n", "query_popular = \"\"\"\n", " SELECT parent, COUNT(id)\n", - " FROM `bigquery-public-data.hacker_news.comments`\n", + " FROM `bigquery-public-data.hacker_news.full`\n", " GROUP BY parent\n", " HAVING COUNT(id) > 10\n", " \"\"\"" @@ -160,7 +160,7 @@ "# Improved version of earlier query, now with aliasing & improved readability\n", "query_improved = \"\"\"\n", " SELECT parent, COUNT(1) AS NumPosts\n", - " FROM `bigquery-public-data.hacker_news.comments`\n", + " FROM `bigquery-public-data.hacker_news.full`\n", " GROUP BY parent\n", " HAVING COUNT(1) > 10\n", " \"\"\"\n", @@ -199,7 +199,7 @@ "source": [ "query_good = \"\"\"\n", " SELECT parent, COUNT(id)\n", - " FROM `bigquery-public-data.hacker_news.comments`\n", + " FROM `bigquery-public-data.hacker_news.full`\n", " GROUP BY parent\n", " \"\"\"" ] @@ -222,8 +222,8 @@ "outputs": [], "source": [ "query_bad = \"\"\"\n", - " SELECT author, parent, COUNT(id)\n", - " FROM `bigquery-public-data.hacker_news.comments`\n", + " SELECT `by` AS author, parent, COUNT(id)\n", + " FROM `bigquery-public-data.hacker_news.full`\n", " GROUP BY parent\n", " \"\"\"" ] @@ -234,6 +234,8 @@ "source": [ "If make this error, you'll get the error message `SELECT list expression references column (column's name) which is neither grouped nor aggregated at`.\n", "\n", + "You may notice the `` `by` `` column in this query is surrounded by backticks. This is because **BY** is a reserved keyword used in clauses including **GROUP BY**. In BigQuery reserved keywords used as identifiers must be quoted in backticks to avoid an error. We also make subsequent references to this column more readable by adding an alias to rename it to `author`.\n", + "\n", "# Your turn\n", "\n", "These aggregations let you write much more interesting queries. Try it yourself with **[these coding exercises](#$NEXT_NOTEBOOK_URL$)**." From 5fe9ba7413d5b73e08d3916a35088d43d4788de4 Mon Sep 17 00:00:00 2001 From: Jim Plotts Date: Wed, 24 Jan 2024 16:38:31 +0000 Subject: [PATCH 28/55] Update push documentation. --- notebooks/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/notebooks/README.md b/notebooks/README.md index 517d9140a..6385addc4 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -43,10 +43,14 @@ If you create further config files (e.g. `testing.yaml`), they will generate the `prepare_push.py` translates notebooks in `raw/` to publishable notebooks in `rendered/`. The logic for this step mostly lives in `lesson_preprocessor.py`. Most of its work is in expanding macros which look like `#$HIDE_OUTPUT$`, or `#$EXERCISE_URL(2)$`. See MACROS.txt for a listing of available macros. +`prepare_push.py` requires a few libraries that may need to be installed before continuing. + The Kaggle Kernels API requires a `kernel-metadata.json` file for any kernel being pushed to the site. `prepare_push.py` also generates these in the `kernels_api_metadata` subdirectory. ## Step 2: Pushing +The push process uses the Kaggle Kernels API, so it requires a valid API key with write permissions to the learn notebooks to update. + Use ./pushall.sh as ./pushall deep_learning/prod From 9e54093d6054e5d4ef8822e19b7535fc54e212ba Mon Sep 17 00:00:00 2001 From: Jim Plotts Date: Tue, 30 Jan 2024 19:16:45 +0000 Subject: [PATCH 29/55] Add more pushing documentation. --- notebooks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/README.md b/notebooks/README.md index 6385addc4..d57374e59 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -49,7 +49,7 @@ The Kaggle Kernels API requires a `kernel-metadata.json` file for any kernel bei ## Step 2: Pushing -The push process uses the Kaggle Kernels API, so it requires a valid API key with write permissions to the learn notebooks to update. +The push process uses the Kaggle Kernels API, so it requires a valid API key with write permissions to the learn notebooks to update. Additionally, if pushing an exercise, the pushing account should be included in the `ops-kernels-commits-allow-errors` flag, which enables the notebook to be published despite known errors where the Learn users will fill in the missing code. Use ./pushall.sh as ./pushall deep_learning/prod From c3b0c70c95e8b1c5d6ab03ade5fa3bb51233dce2 Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Fri, 9 Feb 2024 20:47:13 +0000 Subject: [PATCH 30/55] Added exemption due to keras upgrade --- Jenkinsfile | 1 + notebooks/test.sh | 13 ++++++++++--- test.sh | 20 ++++++++++++++++++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index ecf88e084..a043dfea3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -20,6 +20,7 @@ pipeline { set -exo pipefail # Ensures the currently released Docker Python image is used. docker pull gcr.io/kaggle-images/python:staging + docker pull gcr.io/kaggle-images/python@sha256:287c4e0e224e592dc6113940a6cf3d099b814c7bff0c1e8da57f8e6bad123ac5 ''' } } diff --git a/notebooks/test.sh b/notebooks/test.sh index ad70b09f2..92d0da007 100755 --- a/notebooks/test.sh +++ b/notebooks/test.sh @@ -11,13 +11,20 @@ if [[ -r /etc/git_commit ]]; then fi # Filter by tracks if first argument set. -TRACKS="ml_explainability intro_to_programming time_series ethics feature_engineering_new computer_vision deep_learning_intro pandas python machine_learning sql data_viz_to_coder ml_intermediate sql_advanced feature_engineering geospatial nlp game_ai data_cleaning" -TESTABLE_NOTEBOOK_TRACKS="ml_intermediate ml_explainability intro_to_programming geospatial time_series ethics feature_engineering_new data_viz_to_coder data_cleaning computer_vision deep_learning_intro python pandas machine_learning game_ai" +TRACKS="ml_explainability intro_to_programming time_series ethics feature_engineering_new pandas python machine_learning sql data_viz_to_coder ml_intermediate sql_advanced feature_engineering geospatial nlp game_ai data_cleaning" +TESTABLE_NOTEBOOK_TRACKS="ml_intermediate ml_explainability intro_to_programming geospatial time_series ethics feature_engineering_new data_viz_to_coder data_cleaning python pandas machine_learning game_ai" +PARTIAL_TESTABLE_NOTEBOOK_TRACKS="computer_vision deep_learning_intro" -if [[ -n $1 && $1 != "all" ]]; then +if [[ -n $1 && $1 != "all" && $1 != "kerasExp" ]]; then TRACKS=$1 TESTABLE_NOTEBOOK_TRACKS=$1 fi + +if [[ -n $1 && $1 == "kerasExp" ]]; then + TRACKS=$PARTIAL_TESTABLE_NOTEBOOK_TRACKS + TESTABLE_NOTEBOOK_TRACKS=$PARTIAL_TESTABLE_NOTEBOOK_TRACKS +fi + readonly TRACKS readonly TESTABLE_NOTEBOOK_TRACKS diff --git a/test.sh b/test.sh index 483d9570e..303df0f58 100755 --- a/test.sh +++ b/test.sh @@ -2,6 +2,7 @@ set -e IMAGE='gcr.io/kaggle-images/python:staging' +PINNED_IMAGE='gcr.io/kaggle-images/python@sha256:287c4e0e224e592dc6113940a6cf3d099b814c7bff0c1e8da57f8e6bad123ac5' TRACK='all' NOTEBOOK='all' @@ -76,6 +77,25 @@ if [[ -z $KAGGLE_KEY && ! ( -r "$HOME/.kaggle/kaggle.json" ) ]]; then fi set -x + +if [[ $NOTEBOOK == "all" ]]; then + docker run --rm -t \ + -e KAGGLE_USERNAME -e KAGGLE_KEY \ + -v ~/.kaggle:/root/.kaggle:ro \ + -v $PWD:/input:ro \ + $PINNED_IMAGE \ + /bin/bash -c "/input/notebooks/test.sh kerasExp" +fi + +if [[ $NOTEBOOK == "computer_vision" || $NOTEBOOK == "deep_learning_intro" ]]; then + docker run --rm -t \ + -e KAGGLE_USERNAME -e KAGGLE_KEY \ + -v ~/.kaggle:/root/.kaggle:ro \ + -v $PWD:/input:ro \ + $PINNED_IMAGE \ + /bin/bash -c "/input/notebooks/test.sh $TRACK $NOTEBOOK" +fi + docker run --rm -t \ -e KAGGLE_USERNAME -e KAGGLE_KEY \ -v ~/.kaggle:/root/.kaggle:ro \ From 9a53ec1d9ab6c10a363ff35aea3f155fd0521433 Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Fri, 9 Feb 2024 22:13:39 +0000 Subject: [PATCH 31/55] reviewer feedback pt1 --- Jenkinsfile | 4 ++-- test.sh | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a043dfea3..81c3580c1 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -18,9 +18,9 @@ pipeline { steps { sh '''#!/bin/bash set -exo pipefail - # Ensures the currently released Docker Python image is used. + # Ensures both pinned and unpinned notebook are supported docker pull gcr.io/kaggle-images/python:staging - docker pull gcr.io/kaggle-images/python@sha256:287c4e0e224e592dc6113940a6cf3d099b814c7bff0c1e8da57f8e6bad123ac5 + docker pull gcr.io/kaggle-images/python:v143 ''' } } diff --git a/test.sh b/test.sh index 303df0f58..1face2433 100755 --- a/test.sh +++ b/test.sh @@ -78,22 +78,25 @@ fi set -x -if [[ $NOTEBOOK == "all" ]]; then +# Allows pinned notebooks to be tested independently. +if [[ $NOTEBOOK == "computer_vision" || $NOTEBOOK == "deep_learning_intro" ]]; then docker run --rm -t \ -e KAGGLE_USERNAME -e KAGGLE_KEY \ -v ~/.kaggle:/root/.kaggle:ro \ -v $PWD:/input:ro \ $PINNED_IMAGE \ - /bin/bash -c "/input/notebooks/test.sh kerasExp" + /bin/bash -c "/input/notebooks/test.sh $TRACK $NOTEBOOK" + exit fi -if [[ $NOTEBOOK == "computer_vision" || $NOTEBOOK == "deep_learning_intro" ]]; then + +if [[ $NOTEBOOK == "all" ]]; then docker run --rm -t \ -e KAGGLE_USERNAME -e KAGGLE_KEY \ -v ~/.kaggle:/root/.kaggle:ro \ -v $PWD:/input:ro \ $PINNED_IMAGE \ - /bin/bash -c "/input/notebooks/test.sh $TRACK $NOTEBOOK" + /bin/bash -c "/input/notebooks/test.sh kerasExp" fi docker run --rm -t \ From 9c70809b7438b1837d75f0535f6f8b690004c20b Mon Sep 17 00:00:00 2001 From: Jim Plotts Date: Wed, 21 Feb 2024 16:16:14 +0000 Subject: [PATCH 32/55] Enable kernels to be pinned to the original docker image Also applies original pinning type to the Computer Vision and Deep Learning tracks, which would be broken by the Keras 3 upgrade. These have already been pinned via direct DB update, but this change is needed to make sure that they don't revert back to latest during the next push. http://b/324889848 --- notebooks/computer_vision/track_meta.py | 26 ++-- notebooks/deep_learning/track_meta.py | 161 +++++++++++++----------- notebooks/nb_utils/track_metadata.py | 4 +- 3 files changed, 110 insertions(+), 81 deletions(-) diff --git a/notebooks/computer_vision/track_meta.py b/notebooks/computer_vision/track_meta.py index 44f244825..133290dc2 100644 --- a/notebooks/computer_vision/track_meta.py +++ b/notebooks/computer_vision/track_meta.py @@ -24,73 +24,85 @@ lesson_idx=0, type='tutorial', enable_gpu=True, + docker_image_pinning_type="original", ), dict( filename="ex1.ipynb", lesson_idx=0, type='exercise', enable_gpu=True, - scriptid=10781907 + scriptid=10781907, + docker_image_pinning_type="original", ), dict( filename="tut2.ipynb", lesson_idx=1, type='tutorial', + docker_image_pinning_type="original", ), dict( filename="ex2.ipynb", lesson_idx=1, type='exercise', - scriptid=11989557 + scriptid=11989557, + docker_image_pinning_type="original", ), dict( filename="tut3.ipynb", lesson_idx=2, type='tutorial', + docker_image_pinning_type="original", ), dict( filename="ex3.ipynb", lesson_idx=2, type='exercise', - scriptid=11989559 + scriptid=11989559, + docker_image_pinning_type="original", ), dict( filename="tut4.ipynb", lesson_idx=3, type='tutorial', + docker_image_pinning_type="original", ), dict( filename="ex4.ipynb", lesson_idx=3, type='exercise', - scriptid=12400209 + scriptid=12400209, + docker_image_pinning_type="original", ), dict( filename="tut5.ipynb", lesson_idx=4, type='tutorial', enable_gpu=True, + docker_image_pinning_type="original", ), dict( filename="ex5.ipynb", lesson_idx=4, type='exercise', enable_gpu=True, - scriptid=11989565 + scriptid=11989565, + docker_image_pinning_type="original", ), dict( filename="tut6.ipynb", lesson_idx=5, type='tutorial', enable_gpu=True, + docker_image_pinning_type="original", ), dict( filename="ex6.ipynb", lesson_idx=5, type='exercise', enable_gpu=True, - scriptid=11991328 - ), + scriptid=11991328, + docker_image_pinning_type="original", + ), ] for nb in notebooks: diff --git a/notebooks/deep_learning/track_meta.py b/notebooks/deep_learning/track_meta.py index 120cb330a..806bdf968 100644 --- a/notebooks/deep_learning/track_meta.py +++ b/notebooks/deep_learning/track_meta.py @@ -2,150 +2,165 @@ author_username='dansbecker', course_name='Deep Learning', course_url='https://www.kaggle.com/learn/deep-learning', - course_forum_url='https://www.kaggle.com/learn-forum/161321' + course_forum_url='https://www.kaggle.com/learn-forum/161321', ) -lessons = [ {'topic': topic_name} for topic_name in - [ - 'Intro to Deep Learning and Computer Vision', - 'Building Models from Convolutions', - 'TensorFlow programming', - 'Transfer Learning', - 'Data Augmentation', - 'A Deeper Understanding of Deep Learning', - 'Deep Learning from Scratch', - 'Dropout and Strides for Larger Models', - 'Create Your First Submission' - ] - ] +lessons = [ {'topic': topic_name} for topic_name in [ + 'Intro to Deep Learning and Computer Vision', + 'Building Models from Convolutions', + 'TensorFlow programming', + 'Transfer Learning', + 'Data Augmentation', + 'A Deeper Understanding of Deep Learning', + 'Deep Learning from Scratch', + 'Dropout and Strides for Larger Models', + 'Create Your First Submission' + ] +] notebooks = [ dict( filename='tut1_intro.ipynb', lesson_idx=0, type='tutorial', - ), + docker_image_pinning_type="original", + ), dict( - filename='ex1_convolutions.ipynb', - lesson_idx=0, - type='exercise', - scriptid=499266, - dataset_sources = ["keras/resnet50"], - competition_sources = ["dog-breed-identification"], - ), + filename='ex1_convolutions.ipynb', + lesson_idx=0, + type='exercise', + scriptid=499266, + dataset_sources=["keras/resnet50"], + competition_sources=["dog-breed-identification"], + docker_image_pinning_type="original", + ), dict( filename='tut2_building_models_from_convolutions.ipynb', lesson_idx=1, type='tutorial', - ), + docker_image_pinning_type="original", + ), dict( filename='tut3_programming_tf_and_keras.ipynb', lesson_idx=2, type='tutorial', - dataset_sources = ["keras/resnet50"], - competition_sources = ["dog-breed-identification"], + dataset_sources=["keras/resnet50"], + competition_sources=["dog-breed-identification"], + docker_image_pinning_type="original", ), dict( filename='ex3_programming_tf_and_keras.ipynb', lesson_idx=2, type='exercise', - enable_gpu=True, - scriptid=521452, - dataset_sources = [ - "alexisbcook/resnet50", - "alexisbcook/vgg16", - "dansbecker/hot-dog-not-hot-dog" - ], + enable_gpu=True, + scriptid=521452, + dataset_sources = [ + "alexisbcook/resnet50", + "alexisbcook/vgg16", + "dansbecker/hot-dog-not-hot-dog" + ], + docker_image_pinning_type="original", ), dict( filename='tut4_transfer_learning.ipynb', lesson_idx=3, type='tutorial', - dataset_sources = [ - "keras/resnet50", - "dansbecker/urban-and-rural-photos" - ], + dataset_sources = [ + "keras/resnet50", + "dansbecker/urban-and-rural-photos" + ], + docker_image_pinning_type="original", ), dict( filename='ex4_transfer_learning.ipynb', lesson_idx=3, type='exercise', - scriptid=532365, - dataset_sources = [ - "alexisbcook/resnet50", - "dansbecker/dogs-gone-sideways" - ], - enable_gpu=True, + scriptid=532365, + dataset_sources = [ + "alexisbcook/resnet50", + "dansbecker/dogs-gone-sideways" + ], + enable_gpu=True, + docker_image_pinning_type="original", ), dict( filename='tut5_data_augmentation.ipynb', lesson_idx=4, type='tutorial', - dataset_sources = [ - "keras/resnet50", - "dansbecker/urban-and-rural-photos", - ], + dataset_sources = [ + "keras/resnet50", + "dansbecker/urban-and-rural-photos", + ], + docker_image_pinning_type="original", ), dict( filename='ex5_data_augmentation.ipynb', lesson_idx=4, type='exercise', - enable_gpu=True, - scriptid=536195, + enable_gpu=True, + scriptid=536195, dataset_sources = [ - "alexisbcook/resnet50", - "dansbecker/dogs-gone-sideways" - ], - ), + "alexisbcook/resnet50", + "dansbecker/dogs-gone-sideways" + ], + docker_image_pinning_type="original", + ), dict( filename='tut6_deep_understanding.ipynb', lesson_idx=5, type='tutorial', - ), + docker_image_pinning_type="original", + ), dict(filename='tut7_dl_from_scratch.ipynb', lesson_idx=6, type='tutorial', - dataset_sources = ['zalando-research/fashionmnist'], - competition_sources=['digit-recognizer'], - ), + dataset_sources=['zalando-research/fashionmnist'], + competition_sources=['digit-recognizer'], + docker_image_pinning_type="original", + ), dict( filename='ex7_from_scratch.ipynb', lesson_idx=6, - enable_gpu=True, + enable_gpu=True, type='exercise', scriptid=574269, - competition_sources=['digit-recognizer'], - dataset_sources = ['zalando-research/fashionmnist'], - ), + competition_sources=['digit-recognizer'], + dataset_sources=['zalando-research/fashionmnist'], + docker_image_pinning_type="original", + ), dict( filename='tut8_dropout_and_strides.ipynb', lesson_idx=7, type='tutorial', - competition_sources=['digit-recognizer'], - dataset_sources = ['zalando-research/fashionmnist'], - ), + competition_sources=['digit-recognizer'], + dataset_sources=['zalando-research/fashionmnist'], + docker_image_pinning_type="original", + ), dict( filename='ex8_dropout_strides.ipynb', lesson_idx=7, - enable_gpu=True, + enable_gpu=True, type='exercise', - scriptid=663261, - competition_sources=['digit-recognizer'], - dataset_sources = ['zalando-research/fashionmnist'], - ), + scriptid=663261, + competition_sources=['digit-recognizer'], + dataset_sources=['zalando-research/fashionmnist'], + docker_image_pinning_type="original", + ), dict( filename='tut_tpus.ipynb', lesson_idx=8, type='tutorial', competition_sources=['tpu-getting-started'], - enable_internet=True - ), + enable_internet=True, + docker_image_pinning_type="original", + ), dict( filename='ex_tpus.ipynb', lesson_idx=8, type='exercise', scriptid=10204702, competition_sources=['tpu-getting-started'], - enable_internet=True - ) - ] + enable_internet=True, + docker_image_pinning_type="original", + ) +] diff --git a/notebooks/nb_utils/track_metadata.py b/notebooks/nb_utils/track_metadata.py index f09dc9bbc..a32b21761 100644 --- a/notebooks/nb_utils/track_metadata.py +++ b/notebooks/nb_utils/track_metadata.py @@ -131,6 +131,7 @@ class Notebook(object): def __init__(self, cfg, filename, type, author=None, title=None, lesson=None, slug=None, scriptid=1, kernel_sources=(), dataset_sources=(), competition_sources=(), keywords=(), enable_gpu=False, enable_internet=None, + docker_image_pinning_type=None ): self.cfg = cfg self.filename = filename @@ -170,6 +171,7 @@ def __init__(self, cfg, filename, type, author=None, title=None, lesson=None, self.keywords = list(keywords) self.enable_gpu = bool(enable_gpu) self.enable_internet = enable_internet + self.docker_image_pinning_type = docker_image_pinning_type @staticmethod def _topic_to_title(topic): @@ -221,5 +223,5 @@ def kernel_metadata(self, cfg): competition_sources=sorted(self.competition_sources), kernel_sources=sorted(self.kernel_sources), keywords=sorted(self.keywords), - docker_image_pinning_type="latest", + docker_image_pinning_type="latest" if self.docker_image_pinning_type is None else self.docker_image_pinning_type, ) From 9ab6e7a7abcf00a59d67a6272ad6a7a6997ebac1 Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Thu, 14 Mar 2024 01:18:16 +0000 Subject: [PATCH 33/55] fix test suite --- test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test.sh b/test.sh index 1face2433..7fd4fd9e4 100755 --- a/test.sh +++ b/test.sh @@ -79,7 +79,7 @@ fi set -x # Allows pinned notebooks to be tested independently. -if [[ $NOTEBOOK == "computer_vision" || $NOTEBOOK == "deep_learning_intro" ]]; then +if [[ $TRACK == "computer_vision" || $TRACK == "deep_learning_intro" ]]; then docker run --rm -t \ -e KAGGLE_USERNAME -e KAGGLE_KEY \ -v ~/.kaggle:/root/.kaggle:ro \ @@ -90,7 +90,7 @@ if [[ $NOTEBOOK == "computer_vision" || $NOTEBOOK == "deep_learning_intro" ]]; fi -if [[ $NOTEBOOK == "all" ]]; then +if [[ $TRACK == "all" ]]; then docker run --rm -t \ -e KAGGLE_USERNAME -e KAGGLE_KEY \ -v ~/.kaggle:/root/.kaggle:ro \ From cad89a2cd85fbdb0935b9f13ddd2785baea7b99f Mon Sep 17 00:00:00 2001 From: Dustin H Date: Fri, 29 Nov 2024 08:27:00 -0500 Subject: [PATCH 34/55] Use regular docker runtime runc Jenkins currently defaults to the nvidia docker runtime, however learntools only uses CPU tests and the colab image conflicts with the nvidia docker runtime when in CPU-only mode. http://b/365782129 --- test.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test.sh b/test.sh index 7fd4fd9e4..acfca8b6b 100755 --- a/test.sh +++ b/test.sh @@ -80,7 +80,7 @@ set -x # Allows pinned notebooks to be tested independently. if [[ $TRACK == "computer_vision" || $TRACK == "deep_learning_intro" ]]; then - docker run --rm -t \ + docker run --runtime runc --rm -t \ -e KAGGLE_USERNAME -e KAGGLE_KEY \ -v ~/.kaggle:/root/.kaggle:ro \ -v $PWD:/input:ro \ @@ -91,7 +91,7 @@ fi if [[ $TRACK == "all" ]]; then - docker run --rm -t \ + docker run --runtime runc --rm -t \ -e KAGGLE_USERNAME -e KAGGLE_KEY \ -v ~/.kaggle:/root/.kaggle:ro \ -v $PWD:/input:ro \ @@ -99,7 +99,7 @@ if [[ $TRACK == "all" ]]; then /bin/bash -c "/input/notebooks/test.sh kerasExp" fi -docker run --rm -t \ +docker run --runtime runc --rm -t \ -e KAGGLE_USERNAME -e KAGGLE_KEY \ -v ~/.kaggle:/root/.kaggle:ro \ -v $PWD:/input:ro \ From 4e63344df63320781f2b1e36d419d529972cddd4 Mon Sep 17 00:00:00 2001 From: John Miller Date: Thu, 26 Dec 2024 10:49:35 -0600 Subject: [PATCH 35/55] Update ex5.py q3 --- learntools/sql/ex5.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/learntools/sql/ex5.py b/learntools/sql/ex5.py index 6d3f6e562..b700f6458 100644 --- a/learntools/sql/ex5.py +++ b/learntools/sql/ex5.py @@ -96,11 +96,13 @@ def check(self, results): # check 2: length of dataframe assert (len(results) == len(rides_per_year_answer)), ("The results don't look right. Try again.") # check 3: one value in particular - year_to_check = list(rides_per_year_answer["year"])[-1] - correct_number = int(rides_per_year_answer.loc[rides_per_year_answer["year"]==year_to_check]["num_trips"].values) - submitted_number = int(results.loc[results["year"]==year_to_check]["num_trips"].values) - assert (correct_number == submitted_number), ("The results don't look right. Try again.") - + year_to_check = rides_per_year_answer["year"].iloc[-1] + correct_number = rides_per_year_answer.loc[rides_per_year_answer["year"] == year_to_check, + "num_trips"].iloc[0] + submitted_number = results.loc[results["year"] == year_to_check, + "num_trips"].iloc[0] + assert(correct_number == submitted_number) + _hint = "Start your query with `SELECT EXTRACT(YEAR FROM trip_start_timestamp) AS year, COUNT(1) AS num_trips`." _solution = CS( """ From 7cedc18f666148d2b74d0df6907e3a82766493c4 Mon Sep 17 00:00:00 2001 From: John Miller Date: Thu, 26 Dec 2024 11:02:57 -0600 Subject: [PATCH 36/55] Update ex6.py q5 --- learntools/sql/ex6.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/learntools/sql/ex6.py b/learntools/sql/ex6.py index a07a279b0..bbcf1a353 100644 --- a/learntools/sql/ex6.py +++ b/learntools/sql/ex6.py @@ -191,9 +191,9 @@ def check(self, query, results): assert ('user_id' in results.columns), ('You do not have a `user_id` column in your results.') assert ('number_of_answers' in results.columns), ('You do not have a `number_of_answers` column in your results.') # check 3: correct user IDs - correct_ids = set([int(i) for i in bigquery_experts_answer.user_id.values if not np.isnan(i)]) - submitted_ids = set([int(i) for i in results.user_id.values if not np.isnan(i)]) - assert (correct_ids == submitted_ids), ('You seem to have the wrong values in the `user_id` column.') + correct_ids = bigquery_experts_answer.loc[bigquery_experts_answer.user_id.notna(), "user_id"].unique() + submitted_ids = results.loc[results.user_id.notna(), "user_id"].unique() + assert(np.array_equal(correct_ids, submitted_ids)) # check 4: check one value from other column first_id = list(bigquery_experts_answer["user_id"])[0] correct_num = int(bigquery_experts_answer[bigquery_experts_answer["user_id"] == first_id]["number_of_answers"]) From 5ce0635d4e1a9fc3f40f2e7da859300781b536df Mon Sep 17 00:00:00 2001 From: John Miller Date: Thu, 26 Dec 2024 12:02:43 -0600 Subject: [PATCH 37/55] Update ex1.py q2 --- learntools/sql_advanced/ex1.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/learntools/sql_advanced/ex1.py b/learntools/sql_advanced/ex1.py index 0df6599a7..1d4da5a33 100644 --- a/learntools/sql_advanced/ex1.py +++ b/learntools/sql_advanced/ex1.py @@ -79,11 +79,11 @@ def check(self, query): "%d rows, but you have %d rows." % (len(correct_answer), len(results))) # check 2: calculated values # correct result - correct_list = [i for i in list(correct_answer["time_to_answer"]) if not math.isnan(i)] - correct_number = int(sum(correct_list)/len(correct_list)) + correct_list = correct_answwer.loc[correct_answer["time_to_answer"].notna(), "time_to_answer"] + correct_number = correct_list.sum()/len(correct_list)) # submitted value - submitted_list = [i for i in list(results["time_to_answer"]) if not math.isnan(i)] - submitted_number = int(sum(submitted_list)/len(submitted_list)) + submitted_list = results.loc[results["time_to_answer"].notna(), "time_to_answer"] + submitted_number = submitted_list.sum()/len(submitted_list)) assert (int(submitted_number)==int(correct_number)), ("The results don't look right. Please make sure that the part of the query " "that calculates the values in the `time_to_answer` column is unmodified.") From 62fda470b7241f64df23dc0ce42f835581cf95ae Mon Sep 17 00:00:00 2001 From: John Miller Date: Thu, 26 Dec 2024 12:05:40 -0600 Subject: [PATCH 38/55] fix bug in Update ex1.py --- learntools/sql_advanced/ex1.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/learntools/sql_advanced/ex1.py b/learntools/sql_advanced/ex1.py index 1d4da5a33..031196863 100644 --- a/learntools/sql_advanced/ex1.py +++ b/learntools/sql_advanced/ex1.py @@ -80,10 +80,10 @@ def check(self, query): # check 2: calculated values # correct result correct_list = correct_answwer.loc[correct_answer["time_to_answer"].notna(), "time_to_answer"] - correct_number = correct_list.sum()/len(correct_list)) + correct_number = correct_list.sum()/len(correct_list) # submitted value submitted_list = results.loc[results["time_to_answer"].notna(), "time_to_answer"] - submitted_number = submitted_list.sum()/len(submitted_list)) + submitted_number = submitted_list.sum()/len(submitted_list) assert (int(submitted_number)==int(correct_number)), ("The results don't look right. Please make sure that the part of the query " "that calculates the values in the `time_to_answer` column is unmodified.") From 31e1dc4578064d7ff8bd758f528b993304dcc5a7 Mon Sep 17 00:00:00 2001 From: John Miller Date: Thu, 26 Dec 2024 12:08:29 -0600 Subject: [PATCH 39/55] Update ex1.py bug fix q1 --- learntools/sql_advanced/ex1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learntools/sql_advanced/ex1.py b/learntools/sql_advanced/ex1.py index 031196863..41c0af19a 100644 --- a/learntools/sql_advanced/ex1.py +++ b/learntools/sql_advanced/ex1.py @@ -79,7 +79,7 @@ def check(self, query): "%d rows, but you have %d rows." % (len(correct_answer), len(results))) # check 2: calculated values # correct result - correct_list = correct_answwer.loc[correct_answer["time_to_answer"].notna(), "time_to_answer"] + correct_list = correct_result.loc[correct_result["time_to_answer"].notna(), "time_to_answer"] correct_number = correct_list.sum()/len(correct_list) # submitted value submitted_list = results.loc[results["time_to_answer"].notna(), "time_to_answer"] From e902f9fa90294173faea73a4da9625aa6785250b Mon Sep 17 00:00:00 2001 From: John Miller Date: Thu, 26 Dec 2024 12:13:15 -0600 Subject: [PATCH 40/55] Update ex1.py --- learntools/sql_advanced/ex1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learntools/sql_advanced/ex1.py b/learntools/sql_advanced/ex1.py index 41c0af19a..f5d0e3194 100644 --- a/learntools/sql_advanced/ex1.py +++ b/learntools/sql_advanced/ex1.py @@ -79,7 +79,7 @@ def check(self, query): "%d rows, but you have %d rows." % (len(correct_answer), len(results))) # check 2: calculated values # correct result - correct_list = correct_result.loc[correct_result["time_to_answer"].notna(), "time_to_answer"] + correct_list = correct_answer.loc[correct_answer["time_to_answer"].notna(), "time_to_answer"] correct_number = correct_list.sum()/len(correct_list) # submitted value submitted_list = results.loc[results["time_to_answer"].notna(), "time_to_answer"] From 16b693411b4de164f9d1a2e64063114a0600a624 Mon Sep 17 00:00:00 2001 From: John Miller Date: Thu, 26 Dec 2024 13:32:02 -0600 Subject: [PATCH 41/55] Update ex2.py q3 --- learntools/sql_advanced/ex2.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/learntools/sql_advanced/ex2.py b/learntools/sql_advanced/ex2.py index c45e48e41..6864161f5 100644 --- a/learntools/sql_advanced/ex2.py +++ b/learntools/sql_advanced/ex2.py @@ -173,12 +173,12 @@ def check(self, query): # check 3: check values, length of dataframe assert (len(results)==len(break_time_answer)), ("Your answer does not have the correct number of rows.") # check 4: specific number - id_to_check = list(break_time_answer["taxi_id"])[0] - correct_ans = [int(i) for i in list(break_time_answer.loc[break_time_answer["taxi_id"] == id_to_check]["prev_break"]) if math.isnan(i)==False] - submitted_ans = [int(i) for i in list(results.loc[results["taxi_id"] == id_to_check]["prev_break"]) if math.isnan(i)==False] + id_to_check = break_time_answer["taxi_id"].iloc[0] + correct_ans = break_time_answer.loc[break_time_answer"taxi_id"].eq(id_to_check) & break_time_answer["prev_break"].notna(), "prev_break"] + submitted_ans = results.loc[results["taxi_id"].eq(id_to_check) & results["prev_break"].notna(), "prev_break"] if len(correct_ans) > 0: - assert (min(correct_ans)==min(submitted_ans)), ("The results don't look right. Try again.") - assert (max(correct_ans)==max(submitted_ans)), ("The results don't look right. Try again.") + assert (correct_ans.min() == submitted_ans.min()), ("The results don't look right. Try again.") + assert (correct_ans.max() == submitted_ans.max()), ("The results don't look right. Try again.") _solution = CS( \ """ From 87e2c0467f58a4f518a323e4148b6d0373eeba92 Mon Sep 17 00:00:00 2001 From: John Miller Date: Thu, 26 Dec 2024 13:37:20 -0600 Subject: [PATCH 42/55] Update ex2.py --- learntools/sql_advanced/ex2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learntools/sql_advanced/ex2.py b/learntools/sql_advanced/ex2.py index 6864161f5..2f4114fc3 100644 --- a/learntools/sql_advanced/ex2.py +++ b/learntools/sql_advanced/ex2.py @@ -174,7 +174,7 @@ def check(self, query): assert (len(results)==len(break_time_answer)), ("Your answer does not have the correct number of rows.") # check 4: specific number id_to_check = break_time_answer["taxi_id"].iloc[0] - correct_ans = break_time_answer.loc[break_time_answer"taxi_id"].eq(id_to_check) & break_time_answer["prev_break"].notna(), "prev_break"] + correct_ans = break_time_answer.loc[break_time_answer["taxi_id"].eq(id_to_check) & break_time_answer["prev_break"].notna(), "prev_break"] submitted_ans = results.loc[results["taxi_id"].eq(id_to_check) & results["prev_break"].notna(), "prev_break"] if len(correct_ans) > 0: assert (correct_ans.min() == submitted_ans.min()), ("The results don't look right. Try again.") From 96e90499fc12d04b6e9327c90f7a74c411590f56 Mon Sep 17 00:00:00 2001 From: John Miller Date: Fri, 27 Dec 2024 19:58:53 -0600 Subject: [PATCH 43/55] Update ex5.py restore assert message --- learntools/sql/ex5.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/learntools/sql/ex5.py b/learntools/sql/ex5.py index b700f6458..5e444db41 100644 --- a/learntools/sql/ex5.py +++ b/learntools/sql/ex5.py @@ -97,11 +97,9 @@ def check(self, results): assert (len(results) == len(rides_per_year_answer)), ("The results don't look right. Try again.") # check 3: one value in particular year_to_check = rides_per_year_answer["year"].iloc[-1] - correct_number = rides_per_year_answer.loc[rides_per_year_answer["year"] == year_to_check, - "num_trips"].iloc[0] - submitted_number = results.loc[results["year"] == year_to_check, - "num_trips"].iloc[0] - assert(correct_number == submitted_number) + correct_number = rides_per_year_answer.loc[rides_per_year_answer["year"] == year_to_check, "num_trips"].iloc[0] + submitted_number = results.loc[results["year"] == year_to_check, "num_trips"].iloc[0] + assert(correct_number == submitted_number), ("The results don't look right. Try again.") _hint = "Start your query with `SELECT EXTRACT(YEAR FROM trip_start_timestamp) AS year, COUNT(1) AS num_trips`." _solution = CS( From 1d7f2173e84845e84b6972d79cb8ba4e33cabc7d Mon Sep 17 00:00:00 2001 From: John Miller Date: Fri, 27 Dec 2024 20:00:15 -0600 Subject: [PATCH 44/55] Update ex6.py restore assert message --- learntools/sql/ex6.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learntools/sql/ex6.py b/learntools/sql/ex6.py index bbcf1a353..909e3e022 100644 --- a/learntools/sql/ex6.py +++ b/learntools/sql/ex6.py @@ -193,7 +193,7 @@ def check(self, query, results): # check 3: correct user IDs correct_ids = bigquery_experts_answer.loc[bigquery_experts_answer.user_id.notna(), "user_id"].unique() submitted_ids = results.loc[results.user_id.notna(), "user_id"].unique() - assert(np.array_equal(correct_ids, submitted_ids)) + assert(np.array_equal(correct_ids, submitted_ids)), ), ('You seem to have the wrong values in the `user_id` column.') # check 4: check one value from other column first_id = list(bigquery_experts_answer["user_id"])[0] correct_num = int(bigquery_experts_answer[bigquery_experts_answer["user_id"] == first_id]["number_of_answers"]) From 5fc37f8757ed1603746a3192c15b16d8cac13899 Mon Sep 17 00:00:00 2001 From: John Miller Date: Mon, 30 Dec 2024 12:20:02 -0600 Subject: [PATCH 45/55] Update ex5.py --- learntools/sql/ex5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learntools/sql/ex5.py b/learntools/sql/ex5.py index 5e444db41..df68af1f4 100644 --- a/learntools/sql/ex5.py +++ b/learntools/sql/ex5.py @@ -99,7 +99,7 @@ def check(self, results): year_to_check = rides_per_year_answer["year"].iloc[-1] correct_number = rides_per_year_answer.loc[rides_per_year_answer["year"] == year_to_check, "num_trips"].iloc[0] submitted_number = results.loc[results["year"] == year_to_check, "num_trips"].iloc[0] - assert(correct_number == submitted_number), ("The results don't look right. Try again.") + assert correct_number == submitted_number, "The results don't look right. Try again." _hint = "Start your query with `SELECT EXTRACT(YEAR FROM trip_start_timestamp) AS year, COUNT(1) AS num_trips`." _solution = CS( From 167e81d4072638576d99145cc06ec1206a93d985 Mon Sep 17 00:00:00 2001 From: John Miller Date: Mon, 30 Dec 2024 12:26:11 -0600 Subject: [PATCH 46/55] Update ex6.py --- learntools/sql/ex6.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learntools/sql/ex6.py b/learntools/sql/ex6.py index 909e3e022..ffd11814f 100644 --- a/learntools/sql/ex6.py +++ b/learntools/sql/ex6.py @@ -193,7 +193,7 @@ def check(self, query, results): # check 3: correct user IDs correct_ids = bigquery_experts_answer.loc[bigquery_experts_answer.user_id.notna(), "user_id"].unique() submitted_ids = results.loc[results.user_id.notna(), "user_id"].unique() - assert(np.array_equal(correct_ids, submitted_ids)), ), ('You seem to have the wrong values in the `user_id` column.') + assert np.array_equal(correct_ids, submitted_ids), 'You seem to have the wrong values in the `user_id` column.' # check 4: check one value from other column first_id = list(bigquery_experts_answer["user_id"])[0] correct_num = int(bigquery_experts_answer[bigquery_experts_answer["user_id"] == first_id]["number_of_answers"]) From 6d0b0001b3c0d64b3a6078c0f0667b6e23a6a3b0 Mon Sep 17 00:00:00 2001 From: John Miller Date: Mon, 30 Dec 2024 12:27:46 -0600 Subject: [PATCH 47/55] Update ex1.py --- learntools/sql_advanced/ex1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learntools/sql_advanced/ex1.py b/learntools/sql_advanced/ex1.py index f5d0e3194..0fec48dd3 100644 --- a/learntools/sql_advanced/ex1.py +++ b/learntools/sql_advanced/ex1.py @@ -84,7 +84,7 @@ def check(self, query): # submitted value submitted_list = results.loc[results["time_to_answer"].notna(), "time_to_answer"] submitted_number = submitted_list.sum()/len(submitted_list) - assert (int(submitted_number)==int(correct_number)), ("The results don't look right. Please make sure that the part of the query " + assert int(submitted_number) == int(correct_number), ("The results don't look right. Please make sure that the part of the query " "that calculates the values in the `time_to_answer` column is unmodified.") _solution = CS(\ From 8132ca6d9caea0d40105d820bbc260d1abe0d112 Mon Sep 17 00:00:00 2001 From: John Miller Date: Mon, 30 Dec 2024 12:28:46 -0600 Subject: [PATCH 48/55] Update ex2.py --- learntools/sql_advanced/ex2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/learntools/sql_advanced/ex2.py b/learntools/sql_advanced/ex2.py index 2f4114fc3..cff2de8a3 100644 --- a/learntools/sql_advanced/ex2.py +++ b/learntools/sql_advanced/ex2.py @@ -177,8 +177,8 @@ def check(self, query): correct_ans = break_time_answer.loc[break_time_answer["taxi_id"].eq(id_to_check) & break_time_answer["prev_break"].notna(), "prev_break"] submitted_ans = results.loc[results["taxi_id"].eq(id_to_check) & results["prev_break"].notna(), "prev_break"] if len(correct_ans) > 0: - assert (correct_ans.min() == submitted_ans.min()), ("The results don't look right. Try again.") - assert (correct_ans.max() == submitted_ans.max()), ("The results don't look right. Try again.") + assert correct_ans.min() == submitted_ans.min(), "The results don't look right. Try again." + assert correct_ans.max() == submitted_ans.max(), "The results don't look right. Try again." _solution = CS( \ """ From 7c0888ba90a8e44542db3e834790ad936e1a0984 Mon Sep 17 00:00:00 2001 From: Jim Plotts Date: Thu, 27 Mar 2025 10:08:09 -0400 Subject: [PATCH 49/55] Rough implementation using kagglesdk This shouldn't impact any existing users, because it depends on an environment variable being set. This will enable us to rollout gradually when we're ready. For now, there are still a few things to be done, primarily improving kagglesdk authorization. http://b/379083750 --- learntools/__init__.py | 2 +- learntools/core/tracking.py | 76 ++++++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/learntools/__init__.py b/learntools/__init__.py index 710c86703..6c13d734c 100644 --- a/learntools/__init__.py +++ b/learntools/__init__.py @@ -2,4 +2,4 @@ machine_learning, ml_explainability, ml_insights, ml_intermediate, python, \ sql -__version__ = '0.3.4' +__version__ = '0.3.5' diff --git a/learntools/core/tracking.py b/learntools/core/tracking.py index cad62b6c7..e86a8e4d4 100644 --- a/learntools/core/tracking.py +++ b/learntools/core/tracking.py @@ -1,12 +1,18 @@ import enum from IPython.display import display, Javascript import json - import learntools +import os # If set to True, then echo logged events as output. DEBUG = False +USE_KAGGLESDK = os.environ.get('LEARN_USE_KAGGLE_SDK') == 'True' +if USE_KAGGLESDK: + from kagglesdk import KaggleClient + from kagglesdk.education.types.education_api_service import ApiTrackExerciseInteractionRequest + from kagglesdk.education.types.education_service import LearnExerciseInteractionType, LearnExerciseOutcomeType, LearnExerciseQuestionType + class InteractionType(enum.Enum): CHECK = 1 HINT = 2 @@ -32,7 +38,67 @@ class QuestionType(enum.Enum): trace = '', ) -def track(event): +def interaction_type_to_kagglesdk(event): + switch = { + InteractionType.CHECK: LearnExerciseInteractionType.CHECK, + InteractionType.HINT: LearnExerciseInteractionType.HINT, + InteractionType.SOLUTION: LearnExerciseInteractionType.SOLUTION, + } + value = event['interactionType'] + assert value in switch + return switch.get(value) + +def outcome_type_to_kagglesdk(interaction_type, event): + switch = { + OutcomeType.PASS: LearnExerciseOutcomeType.PASS, + OutcomeType.FAIL: LearnExerciseOutcomeType.FAIL, + OutcomeType.EXCEPTION: LearnExerciseOutcomeType.EXCEPTION, + OutcomeType.UNATTEMPTED: LearnExerciseOutcomeType.UNATTEMPTED, + } + + value = event.get('outcomeType', None) + if value: + assert value in switch + return switch.get(value) + else: + assert interaction_type != LearnExerciseInteractionType.CHECK, "Check events must have an OutcomeType set: {!r}".format(event) + return LearnExerciseOutcomeType.LEARN_EXERCISE_OUTCOME_TYPE_UNSPECIFIED + +def question_type_to_kagglesdk(event): + switch = { + QuestionType.EQUALITYCHECKPROBLEM: LearnExerciseQuestionType.EQUALITY_CHECK_PROBLEM, + QuestionType.CODINGPROBLEM: LearnExerciseQuestionType.CODING_PROBLEM, + QuestionType.FUNCTIONPROBLEM: LearnExerciseQuestionType.FUNCTION_PROBLEM, + QuestionType.THOUGHTEXPERIMENT: LearnExerciseQuestionType.THOUGHT_EXPERIMENT, + } + + question_type = event.get('questionType', None) + if question_type: + assert question_type in switch + return switch.get(question_type) + return None + +def track_using_kagglesdk(event): + request = ApiTrackExerciseInteractionRequest() + request.learn_tools_version = str(learntools.__version__) + request.value_towards_completion = event.get('valueTowardsCompletion', 0.0) + request.interaction_type = interaction_type_to_kagglesdk(event) + request.outcome_type = outcome_type_to_kagglesdk(request.interaction_type, event) + + question_type = question_type_to_kagglesdk(event) + if question_type: + request.question_type = question_type + + # TODO(b/379083750): the following items are still TBD + # - set request.fork_parent_kernel_session_id + # - automatically handle authorization in KaggleClient + # - post the nudge information back to the client + + client = KaggleClient() + result = client.education.education_api_client.track_exercise_interaction(request) + + +def track_using_iframe(event): # TODO: could be nice to put some validation logic here. for k, v in _EVENT_DEFAULTS.items(): event.setdefault(k, v) @@ -65,3 +131,9 @@ def track(event): display(Javascript(debug_js)) display(message) +def track(event): + if USE_KAGGLESDK: + track_using_kagglesdk(event) + else: + track_using_iframe(event) + \ No newline at end of file From b00a2aab939ec7c377e5c8066174dded8ea91ea1 Mon Sep 17 00:00:00 2001 From: Jim Plotts Date: Tue, 27 May 2025 14:27:08 +0000 Subject: [PATCH 50/55] Sort the results before comparison The SQL statements don't contain an `ORDER BY` clause, so the ordering can't be an intentional check for this exercise. This exercise happens to work most of the time because BigQuery tends to return results in the same order, but sometimes it doesn't. Learn users shouldn't have to deal with this non-determinism in the check. http://b/415948668 --- learntools/sql/ex6.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/learntools/sql/ex6.py b/learntools/sql/ex6.py index ffd11814f..ea04630e1 100644 --- a/learntools/sql/ex6.py +++ b/learntools/sql/ex6.py @@ -186,14 +186,17 @@ def check(self, query, results): assert ('group by' in lower_query), ('Your query should have a **GROUP BY** clause.') assert ('count' in lower_query), ('Your query should have a **COUNT** in the **SELECT** statement.') assert ('%bigquery' in lower_query), ('Your **WHERE** clause is not filtering on the "bigquery" tag correctly.') + # check 2: column names results.columns = [c.lower() for c in results.columns] assert ('user_id' in results.columns), ('You do not have a `user_id` column in your results.') assert ('number_of_answers' in results.columns), ('You do not have a `number_of_answers` column in your results.') + # check 3: correct user IDs correct_ids = bigquery_experts_answer.loc[bigquery_experts_answer.user_id.notna(), "user_id"].unique() submitted_ids = results.loc[results.user_id.notna(), "user_id"].unique() - assert np.array_equal(correct_ids, submitted_ids), 'You seem to have the wrong values in the `user_id` column.' + assert np.array_equal(np.sort(correct_ids), np.sort(submitted_ids)), 'You seem to have the wrong values in the `user_id` column.' + # check 4: check one value from other column first_id = list(bigquery_experts_answer["user_id"])[0] correct_num = int(bigquery_experts_answer[bigquery_experts_answer["user_id"] == first_id]["number_of_answers"]) From a3752b6d7dadcf30ee3b133f4ce69fdf7083099a Mon Sep 17 00:00:00 2001 From: Jim Plotts Date: Tue, 1 Jul 2025 15:01:10 -0400 Subject: [PATCH 51/55] Use the `KAGGLE_LEARN_SESSION_ID` This environment variable was added in kaggleazure PR 36128. It is set on all Learn sessions. It refers to the version of the exercise the user forked from. Note that this is all still behind a feature flag, so this code is a no-op for now. http://b/379083750 --- learntools/core/tracking.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/learntools/core/tracking.py b/learntools/core/tracking.py index e86a8e4d4..9e1b1a069 100644 --- a/learntools/core/tracking.py +++ b/learntools/core/tracking.py @@ -84,14 +84,13 @@ def track_using_kagglesdk(event): request.value_towards_completion = event.get('valueTowardsCompletion', 0.0) request.interaction_type = interaction_type_to_kagglesdk(event) request.outcome_type = outcome_type_to_kagglesdk(request.interaction_type, event) + request.fork_parent_kernel_session_id = os.environ.get('KAGGLE_LEARN_SESSION_ID') question_type = question_type_to_kagglesdk(event) if question_type: request.question_type = question_type # TODO(b/379083750): the following items are still TBD - # - set request.fork_parent_kernel_session_id - # - automatically handle authorization in KaggleClient # - post the nudge information back to the client client = KaggleClient() From a8e97723772af0c0c9d8793304158f78c7f82ff0 Mon Sep 17 00:00:00 2001 From: Jim Plotts Date: Thu, 17 Jul 2025 16:24:34 -0400 Subject: [PATCH 52/55] Post response back to the outer iframe. Enables Kaggle Notebooks to continue to show nudges even if using kagglesdk under the hood. http://b/379083750 --- learntools/core/tracking.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/learntools/core/tracking.py b/learntools/core/tracking.py index 9e1b1a069..52724845e 100644 --- a/learntools/core/tracking.py +++ b/learntools/core/tracking.py @@ -90,12 +90,18 @@ def track_using_kagglesdk(event): if question_type: request.question_type = question_type - # TODO(b/379083750): the following items are still TBD - # - post the nudge information back to the client - client = KaggleClient() result = client.education.education_api_client.track_exercise_interaction(request) + # Post the result back to the outer frame. When running in Kaggle + # Notebooks, the outer frame is listening for this message and may show a + # a nudge. + message = dict( + jupyterEvent='custom.exercise_interaction_result', + data=result.to_json()) + js = 'parent.postMessage({}, "*")'.format(json.dumps(message)) + display(Javascript(js)) + def track_using_iframe(event): # TODO: could be nice to put some validation logic here. From 3f4775a2695469d446b066f8f94a756514604beb Mon Sep 17 00:00:00 2001 From: Jim Plotts Date: Fri, 18 Jul 2025 10:51:21 -0400 Subject: [PATCH 53/55] Fix comment --- learntools/core/tracking.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/learntools/core/tracking.py b/learntools/core/tracking.py index 52724845e..6934b1f5f 100644 --- a/learntools/core/tracking.py +++ b/learntools/core/tracking.py @@ -93,9 +93,8 @@ def track_using_kagglesdk(event): client = KaggleClient() result = client.education.education_api_client.track_exercise_interaction(request) - # Post the result back to the outer frame. When running in Kaggle - # Notebooks, the outer frame is listening for this message and may show a - # a nudge. + # Post the result back to the outer frame. When running in Kaggle Notebooks + # the outer frame is listening for this message and may show a nudge. message = dict( jupyterEvent='custom.exercise_interaction_result', data=result.to_json()) From 2bd908957befe14fede6092b8c4e96960494f764 Mon Sep 17 00:00:00 2001 From: Jim Plotts Date: Fri, 18 Jul 2025 10:52:27 -0400 Subject: [PATCH 54/55] More comment formatting --- learntools/core/tracking.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/learntools/core/tracking.py b/learntools/core/tracking.py index 6934b1f5f..6b35bd286 100644 --- a/learntools/core/tracking.py +++ b/learntools/core/tracking.py @@ -93,8 +93,9 @@ def track_using_kagglesdk(event): client = KaggleClient() result = client.education.education_api_client.track_exercise_interaction(request) - # Post the result back to the outer frame. When running in Kaggle Notebooks - # the outer frame is listening for this message and may show a nudge. + # Post the result back to the outer frame. When running in Kaggle + # Notebooks, the outer frame is listening for this message and may show a + # nudge. message = dict( jupyterEvent='custom.exercise_interaction_result', data=result.to_json()) From 14475cf1caca3244f460ff3759c8d81916f0c9f1 Mon Sep 17 00:00:00 2001 From: Jim Plotts Date: Thu, 24 Jul 2025 15:11:14 -0400 Subject: [PATCH 55/55] Cast KAGGLE_LEARN_SESSION_ID to int In testing, I was assuming this would already be an int, but it looks like KKB puts all environment variables as strings. http://b/379083750 --- learntools/core/tracking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learntools/core/tracking.py b/learntools/core/tracking.py index 6b35bd286..69e34f18c 100644 --- a/learntools/core/tracking.py +++ b/learntools/core/tracking.py @@ -84,7 +84,7 @@ def track_using_kagglesdk(event): request.value_towards_completion = event.get('valueTowardsCompletion', 0.0) request.interaction_type = interaction_type_to_kagglesdk(event) request.outcome_type = outcome_type_to_kagglesdk(request.interaction_type, event) - request.fork_parent_kernel_session_id = os.environ.get('KAGGLE_LEARN_SESSION_ID') + request.fork_parent_kernel_session_id = int(os.environ.get('KAGGLE_LEARN_SESSION_ID')) question_type = question_type_to_kagglesdk(event) if question_type: