In [1]:
import pandas as pd
import pygwalker as pyg
In [2]:
df = pd.read_csv("./netflix_titles.csv")
In [3]:
df["date_added"] = pd.to_datetime(df["date_added"])
df["date_added_year"] = df["date_added"].dt.year.fillna(0).astype(int)
df["date_added_month"] = df["date_added"].dt.month.fillna(0).astype(int)
df["duration"] = df["duration"].str.split(" ").str[0].astype(int)
df = df[df["date_added_year"] <= 2019]

df
Out[3]:
show_id type title director cast country date_added release_year rating duration listed_in description date_added_year date_added_month
1 s2 Movie 7:19 Jorge Michel Grau Demián Bichir, Héctor Bonilla, Oscar Serrano, ... Mexico 2016-12-23 2016 TV-MA 93 Dramas, International Movies After a devastating earthquake hits Mexico Cit... 2016 12
2 s3 Movie 23:59 Gilbert Chan Tedd Chan, Stella Chung, Henley Hii, Lawrence ... Singapore 2018-12-20 2011 R 78 Horror Movies, International Movies When an army recruit is found dead, his fellow... 2018 12
3 s4 Movie 9 Shane Acker Elijah Wood, John C. Reilly, Jennifer Connelly... United States 2017-11-16 2009 PG-13 80 Action & Adventure, Independent Movies, Sci-Fi... In a postapocalyptic world, rag-doll robots hi... 2017 11
5 s6 TV Show 46 Serdar Akar Erdal Beşikçioğlu, Yasemin Allen, Melis Birkan... Turkey 2017-07-01 2016 TV-MA 1 International TV Shows, TV Dramas, TV Mysteries A genetics professor experiments with a treatm... 2017 7
7 s8 Movie 187 Kevin Reynolds Samuel L. Jackson, John Heard, Kelly Rowan, Cl... United States 2019-11-01 1997 R 119 Dramas After one of his high school students attacks ... 2019 11
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
7777 s7778 TV Show Zombie Dumb NaN NaN NaN 2019-07-01 2018 TV-Y7 2 Kids' TV, Korean TV Shows, TV Comedies While living alone in a spooky town, a young g... 2019 7
7778 s7779 Movie Zombieland Ruben Fleischer Jesse Eisenberg, Woody Harrelson, Emma Stone, ... United States 2019-11-01 2009 R 88 Comedies, Horror Movies Looking to survive in a world taken over by zo... 2019 11
7779 s7780 TV Show Zona Rosa NaN Manu NNa, Ana Julia Yeyé, Ray Contreras, Pablo... Mexico 2019-11-26 2019 TV-MA 1 International TV Shows, Spanish-Language TV Sh... An assortment of talent takes the stage for a ... 2019 11
7780 s7781 Movie Zoo Shlok Sharma Shashank Arora, Shweta Tripathi, Rahul Kumar, ... India 2018-07-01 2018 TV-MA 94 Dramas, Independent Movies, International Movies A drug dealer starts having doubts about his t... 2018 7
7783 s7784 Movie Zubaan Mozez Singh Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan... India 2019-03-02 2015 TV-14 111 Dramas, International Movies, Music & Musicals A scrappy but poor boy worms his way into a ty... 2019 3

5661 rows × 14 columns

In [4]:
walker0 = pyg.walk(df, spec="0.json", use_preview=True, store_chart_data=True)
In [5]:
walker0.display_chart("Chart 1", title="Content Type On Netflix")

Content Type On Netflix

In [6]:
walker0.display_chart("Chart 2", title="Content Added Over Year", desc="The number of movies on Netflix is growing much faster than TV shows, movie content has grown substantially after 2016.")
walker0.display_chart("Chart 3", title="Content Release Over Year")
walker0.display_chart("Chart 4", title="Content Added Over Month", desc="")

Content Added Over Year

The number of movies on Netflix is growing much faster than TV shows, movie content has grown substantially after 2016.

Content Release Over Year

Content Added Over Month

In [7]:
walker0.display_chart("Chart 5", title="Content Added Over Year Diff By Rating", desc="TV-MA, TV-14 are the ratings for most of Netflix's content, and R content is also increasing year by year")

Content Added Over Year Diff By Rating

TV-MA, TV-14 are the ratings for most of Netflix's content, and R content is also increasing year by year

In [8]:
walker0.display_chart("Chart 6", title="movie time distribution", desc="Mainly concentrated between 90 and 110 minutes")

movie time distribution

Mainly concentrated between 90 and 110 minutes

In [9]:
walker0.display_chart("Chart 7", title="tv-show season distribution")

tv-show season distribution

In [10]:
country_df = df["country"].str.split(",", expand=True).stack().reset_index(level=1, drop=True).to_frame('country')
country_df["country"] = country_df["country"].str.strip()
walker1 = pyg.walk(country_df, spec="1.json", use_preview=True, store_chart_data=True)
In [11]:
walker1.display_chart("Chart 1", title="Countries Of Most Content")

Countries Of Most Content

In [12]:
category_df = df.loc[:, ("listed_in", "rating", "type")]
category_df["category"] = category_df["listed_in"].str.split(",")
category_df = category_df[["category", "rating", "type"]]
category_df = category_df.explode("category").reset_index(drop=True)
walker2 = pyg.walk(category_df, spec="2.json", use_preview=True, store_chart_data=True)
In [13]:
walker2.display_chart("TV category", title="tv-show category distribution")

tv-show category distribution

In [14]:
walker2.display_chart("Movie category", title="movie category distribution")

movie category distribution

In [15]:
walker2.display_chart("rating category(tv)", title="rating category heatamp(TV-Show)")

rating category heatamp(TV-Show)

In [16]:
walker2.display_chart("rating category(movie)", title="rating category heatamp(movie)")

rating category heatamp(movie)

In [ ]: