Source code for dtale.datasets

import pandas as pd
import requests
import zipfile

from six import BytesIO


[docs]def covid(): from dtale.cli.loaders.csv_loader import loader_func as load_csv data = load_csv( path="https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv", parse_dates=["date"], ) codes = load_csv( path="https://raw.githubusercontent.com/jasonong/List-of-US-States/master/states.csv", ) codes = codes.set_index("State").to_dict()["Abbreviation"] data["state_code"] = data["state"].map(codes) return data, None
[docs]def seinfeld(): from dtale.cli.loaders.csv_loader import loader_func as load_csv episodes = load_csv( path="https://github.com/4m4n5/the-seinfeld-chronicles/raw/master/episode_info.csv" ) episodes = episodes[[c for c in episodes.columns if c not in ["Unnamed: 0"]]] scripts = load_csv( path="https://github.com/4m4n5/the-seinfeld-chronicles/raw/master/scripts.csv" ) scripts = scripts[ [c for c in scripts.columns if c not in ["Unnamed: 0", "Season", "EpisodeNo"]] ] return pd.merge(episodes, scripts, how="inner", on="SEID"), None
[docs]def load_zip(url): response = requests.get(url) with zipfile.ZipFile(BytesIO(response.content)) as thezip: for zipinfo in thezip.infolist(): yield zipinfo.filename, thezip.open(zipinfo.filename)
[docs]def simpsons(): from dtale.cli.loaders.csv_loader import loader_func as load_csv import dtale.global_state as global_state global_state.set_app_settings(dict(max_column_width=100, max_row_height=100)) episodes = load_csv( path="https://github.com/aschonfeld/dtale-media/raw/master/datasets/simpsons_episodes.csv" ) episodes = episodes.rename(columns={"id": "episode_id"}) episodes.loc[:, "image_url"] = episodes["image_url"].apply( lambda x: "<img src='{}' style='height: auto; width: 100px;' />".format(x) ) _, scripts = next( load_zip( "https://github.com/aschonfeld/dtale-media/raw/master/datasets/simpsons_script_lines.csv.zip" ) ) scripts = pd.read_csv(scripts) df = pd.merge(episodes, scripts, how="inner", on="episode_id") formatting = {"image_url": {"fmt": {"html": True}}} return df, {"columnFormats": formatting}
[docs]def video_games(): _, games = next( load_zip( "https://github.com/aschonfeld/dtale-media/raw/master/datasets/vgsales.csv.zip" ) ) return pd.read_csv(games), None
[docs]def movies(): _, movies = next( load_zip( "https://github.com/aschonfeld/dtale-media/raw/master/datasets/IMDb_movies.csv.zip" ) ) movies = pd.read_csv(movies) movies.loc[:, "year"] = ( movies["year"].where(~(movies["year"] == "TV Movie 2019"), "2019").astype("int") ) return movies, None
[docs]def time_dataframe(): try: from pandas._testing import makeTimeDataFrame return makeTimeDataFrame(), None except ImportError: from pandas.util.testing import makeTimeDataFrame return makeTimeDataFrame(), None