import pandas as pd
import numpy as np

def answer_one():
    # skipfooter: Rows at the end to skip (0-indexed)
    energy = pd.read_excel('Energy Indicators.xls', skiprows=17, skipfooter=38)

    # get rid of the 2 first columns
    cols = ['Unnamed: 2', 'Petajoules', 'Gigajoules', '%']
    energy = energy[cols]
    energy.columns = ['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable']

    # For all countries which have missing data (e.g. data with "...") 
    # make sure this is reflected as np.NaN values.
    energy = energy.replace('...', np.nan)


    # Convert Energy Supply to gigajoules (there are 1,000,000 gigajoules in a petajoule)
    energy['Energy Supply'] = energy['Energy Supply'] * 1000000

    # Remove the numbers in the country name
    energy['Country'] = energy['Country'].str.replace(r"[0-9]","")

    energy['Country'] = energy['Country'].replace({
        'China, Hong Kong Special Administrative Region':'Hong Kong',
        'United Kingdom of Great Britain and Northern Ireland':'United Kingdom',
        'Republic of Korea':'South Korea',
        'United States of America':'United States',
        'Iran (Islamic Republic of)':'Iran',
        'Bolivia (Plurinational State of)':'Bolivia'})

    # This removed all instances of where there were parentheses with words in them
    energy['Country'] = energy['Country'].str.replace(r" \(.*\)","")

    GDP = pd.read_csv("world_bank.csv", skiprows=4)
    GDP['Country Name'] = GDP['Country Name'].replace({'Korea, Rep.' : 'South Korea',
                                                      'Iran, Islamic Rep.' : 'Iran',
                                                      'Hong Kong SAR, China' : 'Hong Kong'})

    ScimEn = pd.read_excel('scimagojr-3.xlsx')

    # Join the three datasets: GDP, Energy, and ScimEn into a new dataset 
    # (using the intersection of country names). 
    # Use only the last 10 years (2006-2015) of GDP data and only the top 15 countries 
    # by Scimagojr 'Rank' (Rank 1 through 15).
    cols_GDP = ['Country Name','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']
    GDP_merge = GDP[cols_GDP]
    GDP_merge.columns = ['Country','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']
      
    ScimEn_merge = ScimEn[:15]

    df0 = pd.merge(ScimEn_merge, energy, how='inner', left_on='Country', right_on='Country')
    df = pd.merge(df0, GDP_merge, how='inner', left_on='Country', right_on='Country')

    # The index of this DataFrame should be the name of the country, 
    # and the columns should be ['Rank', 'Documents', 'Citable documents', 'Citations', 'Self-citations', 
    # 'Citations per document', 'H index', 'Energy Supply', 'Energy Supply per Capita', 
    # '% Renewable', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015'].
    df = df.set_index('Country')
    columns = ['Rank', 'Documents', 'Citable documents', 'Citations', 'Self-citations', 
               'Citations per document', 'H index', 'Energy Supply', 'Energy Supply per Capita', 
               '% Renewable', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015']
    df = df[columns]
    return df

answer_one()


%%HTML
<svg width="800" height="300">
  <circle cx="150" cy="180" r="80" fill-opacity="0.2" stroke="black" stroke-width="2" fill="blue" />
  <circle cx="200" cy="100" r="80" fill-opacity="0.2" stroke="black" stroke-width="2" fill="red" />
  <circle cx="100" cy="100" r="80" fill-opacity="0.2" stroke="black" stroke-width="2" fill="green" />
  <line x1="150" y1="125" x2="300" y2="150" stroke="black" stroke-width="2" fill="black" stroke-dasharray="5,3"/>
  <text  x="300" y="165" font-family="Verdana" font-size="35">Everything but this!</text>
</svg>


def answer_two():
    # skipfooter: Rows at the end to skip (0-indexed)
    energy = pd.read_excel('Energy Indicators.xls', skiprows=17, skipfooter=38)

    # get rid of the 2 first columns
    cols = ['Unnamed: 2', 'Petajoules', 'Gigajoules', '%']
    energy = energy[cols]
    energy.columns = ['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable']

    # For all countries which have missing data (e.g. data with "...") 
    # make sure this is reflected as np.NaN values.
    energy = energy.replace('...', np.nan)


    # Convert Energy Supply to gigajoules (there are 1,000,000 gigajoules in a petajoule)
    energy['Energy Supply'] = energy['Energy Supply'] * 1000000

    energy['Country'] = energy['Country'].str.replace(r"[0-9]","")

    energy['Country'] = energy['Country'].replace({
        'China, Hong Kong Special Administrative Region':'Hong Kong',
        'United Kingdom of Great Britain and Northern Ireland':'United Kingdom',
        'Republic of Korea':'South Korea',
        'United States of America':'United States',
        'Iran (Islamic Republic of)':'Iran',
        'Bolivia (Plurinational State of)':'Bolivia'})

    # This removed all instances of where there were parentheses with words in them
    energy['Country'] = energy['Country'].str.replace(r" \(.*\)","")

    GDP = pd.read_csv("world_bank.csv", skiprows=4)
    GDP['Country Name'] = GDP['Country Name'].replace({'Korea, Rep.' : 'South Korea',
                                                      'Iran, Islamic Rep.' : 'Iran',
                                                      'Hong Kong SAR, China' : 'Hong Kong'})

    ScimEn = pd.read_excel('scimagojr-3.xlsx')
 
    df_outer0 = pd.merge(ScimEn, energy, how='outer', left_on='Country', right_on='Country')
    df_outer = pd.merge(df_outer0, GDP, how='outer', left_on='Country', right_on='Country Name')
    len_outer = len(df_outer)
    # print(len_outer)

    df_inner0 = pd.merge(ScimEn, energy, how='inner', left_on='Country', right_on='Country')
    df_inner = pd.merge(df_inner0, GDP, how='inner', left_on='Country', right_on='Country Name')
    len_inner = len(df_inner)
    # print(len_inner)

    return (len_outer)-(len_inner)

answer_two()

156


import numpy as np
def mean_top15(row):   
    data = row[['2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']]               
    return pd.Series({'mean': np.mean(data)})

def answer_three():  
    Top15 = answer_one()
    avgGDP_notOrdered = Top15.apply(mean_top15, axis=1)
    avgGDP = avgGDP_notOrdered.sort_values(by='mean', ascending = False)
    return avgGDP

answer_three()


def answer_three_alter():     
    import numpy as np
    Top15 = answer_one()
    columns = ['2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']
    Top15['Mean'] = Top15[columns].mean(axis=1)
    avgGDP = Top15.sort_values(by = 'Mean', ascending = False)['Mean']
   
    return avgGDP
answer_three_alter()

Country
United States         1.536434e+13
China                 6.348609e+12
Japan                 5.542208e+12
Germany               3.493025e+12
France                2.681725e+12
United Kingdom        2.487907e+12
Brazil                2.189794e+12
Italy                 2.120175e+12
India                 1.769297e+12
Canada                1.660647e+12
Russian Federation    1.565459e+12
Spain                 1.418078e+12
Australia             1.164043e+12
South Korea           1.106715e+12
Iran                  4.441558e+11
Name: Mean, dtype: float64


def answer_four():
    Top15 = answer_one()
    
    avgGDP = answer_three()
    Top6th_Country = avgGDP.index[5]
    
    Top6th = Top15.loc[Top6th_Country]
    
    """
    Or:
    Top15 = Top15.reset_index()
    Top6th = Top15[Top15['Country'] == Top6th_Country]
    span = (Top6th['2015'] - Top6th['2006']).value[0]
    """
    
    span = Top6th['2015'] - Top6th['2006']
    return span
answer_four()

246702696075.3999


def answer_four_alter():
    import pandas as pd
    import numpy as np
    Top15 = answer_one()
    columns = ['2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']
    Top15['Mean'] = Top15[columns].mean(axis=1)
    avgGDP = Top15.sort_values(by = 'Mean', ascending = False)['Mean']
    target = avgGDP.index[5]
    
    target_data = Top15.loc[target]
    ans = target_data['2015'] - target_data['2006']
    
    return ans
answer_four_alter()

246702696075.3999


def answer_five():
    Top15 = answer_one()
    
    return Top15['Energy Supply per Capita'].mean(axis=0)

answer_five()

157.6


def answer_six():
    Top15 = answer_one()
    max_renewable = Top15['% Renewable'].max()
    country = Top15[Top15['% Renewable'] == max_renewable].index[0]
    # country = Top15[Top15['% Renewable'] == max_renewable].index
    # print(country)
    # Index(['Brazil'], dtype='object', name='Country')
    return country, max_renewable

answer_six()

('Brazil', 69.64803)


def answer_seven():
    Top15 = answer_one()
    Top15['Ratio_Citations'] = Top15['Self-citations'] / Top15['Citations']
    max_ratio = Top15['Ratio_Citations'].max()
    country = Top15[Top15['Ratio_Citations'] == max_ratio].index[0]
    return (country, max_ratio)

answer_seven()

('China', 0.6893126179389422)


def answer_eight():
    Top15 = answer_one()
    Top15['Estimated_Population'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']
    population = Top15.sort_values(by='Estimated_Population', ascending=False)['Estimated_Population']
    third_population = Top15[Top15['Estimated_Population'] == population.iloc[2]].index[0]
    return third_population

answer_eight()

'United States'


def answer_eight_alter():
    Top15 = answer_one()
    columns = ['Energy Supply','Energy Supply per Capita']
    target = Top15[columns]
    target['Population'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']
    
    ans = target.sort_values(by = 'Population', ascending = False).iloc[2].name
    
    return ans
answer_eight_alter()

C:\Users\asus\Anaconda3\lib\site-packages\ipykernel_launcher.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """

'United States'


def answer_nine():
    Top15 = answer_one()
    Top15['Estimated_Population'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']
    Top15['Doc per Person'] = Top15['Citable documents'] / Top15['Estimated_Population']
    # Top15['Corr_Citation_Energy'] = Top15['Energy Supply per Capita'].corr(Top15['Doc per Person'])
   
    return Top15['Doc per Person'].corr(Top15['Energy Supply per Capita'])
answer_nine()

0.7940010435442946


def plot9():
    import matplotlib as plt
    %matplotlib inline
    
    Top15 = answer_one()
    Top15['PopEst'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']
    Top15['Citable docs per Capita'] = Top15['Citable documents'] / Top15['PopEst']
    Top15.plot(x='Citable docs per Capita', y='Energy Supply per Capita', kind='scatter', xlim=[0, 0.0006])


plot9() # Be sure to comment out plot9() before submitting the assignment!


import numpy as np
def isAboveMedian(row):
    Top15 = answer_one()
    median = np.nanmedian(Top15['% Renewable'])
    data = row['% Renewable']
    row['HighRenew'] = 1 if data >= median else 0
    return pd.Series(row['HighRenew'])

def answer_ten():
    Top15 = answer_one()
    return Top15.apply(isAboveMedian, axis=1).sort_index()

answer_ten()


def answer_ten_alter():
    import pandas as pd
    Top15 = answer_one()
    med = Top15['% Renewable'].median()       
    Top15['HighRenew'] = [1 if x >= med else 0 for x in Top15['% Renewable']]   
    ans = Top15['HighRenew']    
    return pd.Series(ans).sort_index()
answer_ten_alter()

Country
Australia             0
Brazil                1
Canada                1
China                 1
France                1
Germany               1
India                 0
Iran                  0
Italy                 1
Japan                 0
Russian Federation    1
South Korea           0
Spain                 1
United Kingdom        0
United States         0
Name: HighRenew, dtype: int64


def answer_eleven():
    Top15 = answer_one()
    Top15 = Top15.reset_index()
    Top15['Estimated Population'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']
    
    ContinentDict  = {'China':'Asia', 
                  'United States':'North America', 
                  'Japan':'Asia', 
                  'United Kingdom':'Europe', 
                  'Russian Federation':'Europe', 
                  'Canada':'North America', 
                  'Germany':'Europe', 
                  'India':'Asia',
                  'France':'Europe', 
                  'South Korea':'Asia', 
                  'Italy':'Europe', 
                  'Spain':'Europe', 
                  'Iran':'Asia',
                  'Australia':'Australia', 
                  'Brazil':'South America'}
    
    Top15['Continent'] = [ContinentDict[country] for country in Top15['Country']]
    Top15 = Top15.set_index('Continent')
    summary = Top15.groupby(level=0)['Estimated Population'].agg({'sample size': np.size,
                                                               'sum': np.sum,
                                                               'average': np.nanmean,
                                                               'standard deviation' : np.nanstd})  
    
    return summary

answer_eleven()

C:\Users\asus\Anaconda3\lib\site-packages\ipykernel_launcher.py:27: FutureWarning: using a dict on a Series for aggregation
is deprecated and will be removed in a future version. Use                 named aggregation instead.

    >>> grouper.agg(name_1=func_1, name_2=func_2)


def answer_eleven_alter():
    import pandas as pd
    import numpy as np
    ContinentDict  = {'China':'Asia', 
                  'United States':'North America', 
                  'Japan':'Asia', 
                  'United Kingdom':'Europe', 
                  'Russian Federation':'Europe', 
                  'Canada':'North America', 
                  'Germany':'Europe', 
                  'India':'Asia',
                  'France':'Europe', 
                  'South Korea':'Asia', 
                  'Italy':'Europe', 
                  'Spain':'Europe', 
                  'Iran':'Asia',
                  'Australia':'Australia', 
                  'Brazil':'South America'}
    
    Top15 = answer_one()
    
    Top15['PopEst'] = (Top15['Energy Supply'] / Top15['Energy Supply per Capita'])
    
    Top15 = Top15.reset_index()
    Top15['Continent'] = [ContinentDict[country] for country in Top15['Country']]
#     print(Top15['Continent'])
#     print(ContinentDict.values())
#     Top15['Continent'] = [ContinentDict[country] for country in Top15['Country']]
    
    target = Top15.set_index('Continent').groupby(level = 0)['PopEst'].agg({'size':np.size, 
                                                                            'sum':np.sum, 
                                                                            'mean':np.mean, 
                                                                            'std':np.std})
    ans = target[['size', 'sum', 'mean', 'std']]
    return ans

answer_eleven_alter()

C:\Users\asus\Anaconda3\lib\site-packages\ipykernel_launcher.py:33: FutureWarning: using a dict on a Series for aggregation
is deprecated and will be removed in a future version. Use                 named aggregation instead.

    >>> grouper.agg(name_1=func_1, name_2=func_2)


import pandas as pd
def answer_twelve():
    Top15 = answer_one()
    ContinentDict  = {'China':'Asia', 
                  'United States':'North America', 
                  'Japan':'Asia', 
                  'United Kingdom':'Europe', 
                  'Russian Federation':'Europe', 
                  'Canada':'North America', 
                  'Germany':'Europe', 
                  'India':'Asia',
                  'France':'Europe', 
                  'South Korea':'Asia', 
                  'Italy':'Europe', 
                  'Spain':'Europe', 
                  'Iran':'Asia',
                  'Australia':'Australia', 
                  'Brazil':'South America'}
    Top15 = Top15.reset_index()
    Top15['Continent'] = [ContinentDict[country] for country in Top15['Country']]
    Top15['bins'] = pd.cut(Top15['% Renewable'], 5)
    Top15 = Top15.groupby(['Continent', 'bins'])
    return Top15.size()

answer_twelve()

Continent      bins            
Asia           (2.212, 15.753]     4
               (15.753, 29.227]    1
Australia      (2.212, 15.753]     1
Europe         (2.212, 15.753]     1
               (15.753, 29.227]    3
               (29.227, 42.701]    2
North America  (2.212, 15.753]     1
               (56.174, 69.648]    1
South America  (56.174, 69.648]    1
dtype: int64


import pandas as pd
def answer_thirteen():
    Top15 = answer_one()
    Top15['PopEst'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']
    Top15['PopEst'] = Top15['PopEst'].apply(lambda x: "{:,}".format(x))
    return pd.Series(Top15['PopEst'])

answer_thirteen()

Country
China                 1,367,645,161.2903225
United States          317,615,384.61538464
Japan                  127,409,395.97315437
United Kingdom         63,870,967.741935484
Russian Federation            143,500,000.0
Canada                  35,239,864.86486486
Germany                 80,369,696.96969697
India                 1,276,730,769.2307692
France                  63,837,349.39759036
South Korea            49,805,429.864253394
Italy                  59,908,256.880733944
Spain                    46,443,396.2264151
Iran                    77,075,630.25210084
Australia              23,316,017.316017315
Brazil                 205,915,254.23728815
Name: PopEst, dtype: object


def plot_optional():
    import matplotlib as plt
    %matplotlib inline
    Top15 = answer_one()
    ax = Top15.plot(x='Rank', y='% Renewable', kind='scatter', 
                    c=['#e41a1c','#377eb8','#e41a1c','#4daf4a','#4daf4a','#377eb8','#4daf4a','#e41a1c',
                       '#4daf4a','#e41a1c','#4daf4a','#4daf4a','#e41a1c','#dede00','#ff7f00'], 
                    xticks=range(1,16), s=6*Top15['2014']/10**10, alpha=.75, figsize=[16,6]);

    for i, txt in enumerate(Top15.index):
        ax.annotate(txt, [Top15['Rank'][i], Top15['% Renewable'][i]], ha='center')

    print("This is an example of a visualization that can be created to help understand the data. \
This is a bubble chart showing % Renewable vs. Rank. The size of the bubble corresponds to the countries' \
2014 GDP, and the color corresponds to the continent.")


plot_optional() # Be sure to comment out plot_optional() before submitting the assignment!

This is an example of a visualization that can be created to help understand the data. This is a bubble chart showing % Renewable vs. Rank. The size of the bubble corresponds to the countries' 2014 GDP, and the color corresponds to the continent.

	Rank	Documents	Citable documents	Citations	Self-citations	Citations per document	H index	Energy Supply	Energy Supply per Capita	% Renewable	2006	2007	2008	2009	2010	2011	2012	2013	2014	2015
Country
China	1	127050	126767	597237	411683	4.70	138	1.271910e+11	93.0	19.754910	3.992331e+12	4.559041e+12	4.997775e+12	5.459247e+12	6.039659e+12	6.612490e+12	7.124978e+12	7.672448e+12	8.230121e+12	8.797999e+12
United States	2	96661	94747	792274	265436	8.20	230	9.083800e+10	286.0	11.570980	1.479230e+13	1.505540e+13	1.501149e+13	1.459484e+13	1.496437e+13	1.520402e+13	1.554216e+13	1.577367e+13	1.615662e+13	1.654857e+13
Japan	3	30504	30287	223024	61554	7.31	134	1.898400e+10	149.0	10.232820	5.496542e+12	5.617036e+12	5.558527e+12	5.251308e+12	5.498718e+12	5.473738e+12	5.569102e+12	5.644659e+12	5.642884e+12	5.669563e+12
United Kingdom	4	20944	20357	206091	37874	9.84	139	7.920000e+09	124.0	10.600470	2.419631e+12	2.482203e+12	2.470614e+12	2.367048e+12	2.403504e+12	2.450911e+12	2.479809e+12	2.533370e+12	2.605643e+12	2.666333e+12
Russian Federation	5	18534	18301	34266	12422	1.85	57	3.070900e+10	214.0	17.288680	1.385793e+12	1.504071e+12	1.583004e+12	1.459199e+12	1.524917e+12	1.589943e+12	1.645876e+12	1.666934e+12	1.678709e+12	1.616149e+12
Canada	6	17899	17620	215003	40930	12.01	149	1.043100e+10	296.0	61.945430	1.564469e+12	1.596740e+12	1.612713e+12	1.565145e+12	1.613406e+12	1.664087e+12	1.693133e+12	1.730688e+12	1.773486e+12	1.792609e+12
Germany	7	17027	16831	140566	27426	8.26	126	1.326100e+10	165.0	17.901530	3.332891e+12	3.441561e+12	3.478809e+12	3.283340e+12	3.417298e+12	3.542371e+12	3.556724e+12	3.567317e+12	3.624386e+12	3.685556e+12
India	8	15005	14841	128763	37209	8.58	115	3.319500e+10	26.0	14.969080	1.265894e+12	1.374865e+12	1.428361e+12	1.549483e+12	1.708459e+12	1.821872e+12	1.924235e+12	2.051982e+12	2.200617e+12	2.367206e+12
France	9	13153	12973	130632	28601	9.93	114	1.059700e+10	166.0	17.020280	2.607840e+12	2.669424e+12	2.674637e+12	2.595967e+12	2.646995e+12	2.702032e+12	2.706968e+12	2.722567e+12	2.729632e+12	2.761185e+12
South Korea	10	11983	11923	114675	22595	9.57	104	1.100700e+10	221.0	2.279353	9.410199e+11	9.924316e+11	1.020510e+12	1.027730e+12	1.094499e+12	1.134796e+12	1.160809e+12	1.194429e+12	1.234340e+12	1.266580e+12
Italy	11	10964	10794	111850	26661	10.20	106	6.530000e+09	109.0	33.667230	2.202170e+12	2.234627e+12	2.211154e+12	2.089938e+12	2.125185e+12	2.137439e+12	2.077184e+12	2.040871e+12	2.033868e+12	2.049316e+12
Spain	12	9428	9330	123336	23964	13.08	115	4.923000e+09	106.0	37.968590	1.414823e+12	1.468146e+12	1.484530e+12	1.431475e+12	1.431673e+12	1.417355e+12	1.380216e+12	1.357139e+12	1.375605e+12	1.419821e+12
Iran	13	8896	8819	57470	19125	6.46	72	9.172000e+09	119.0	5.707721	3.895523e+11	4.250646e+11	4.289909e+11	4.389208e+11	4.677902e+11	4.853309e+11	4.532569e+11	4.445926e+11	4.639027e+11	NaN
Australia	14	8831	8725	90765	15606	10.28	107	5.386000e+09	231.0	11.810810	1.021939e+12	1.060340e+12	1.099644e+12	1.119654e+12	1.142251e+12	1.169431e+12	1.211913e+12	1.241484e+12	1.272520e+12	1.301251e+12
Brazil	15	8668	8596	60702	14396	7.00	86	1.214900e+10	59.0	69.648030	1.845080e+12	1.957118e+12	2.056809e+12	2.054215e+12	2.208872e+12	2.295245e+12	2.339209e+12	2.409740e+12	2.412231e+12	2.319423e+12

	sample size	sum	average	standard deviation
Continent
Asia	5.0	2.898666e+09	5.797333e+08	6.790979e+08
Australia	1.0	2.331602e+07	2.331602e+07	NaN
Europe	6.0	4.579297e+08	7.632161e+07	3.464767e+07
North America	2.0	3.528552e+08	1.764276e+08	1.996696e+08
South America	1.0	2.059153e+08	2.059153e+08	NaN

	size	sum	mean	std
Continent
Asia	5.0	2.898666e+09	5.797333e+08	6.790979e+08
Australia	1.0	2.331602e+07	2.331602e+07	NaN
Europe	6.0	4.579297e+08	7.632161e+07	3.464767e+07
North America	2.0	3.528552e+08	1.764276e+08	1.996696e+08
South America	1.0	2.059153e+08	2.059153e+08	NaN

Assignment 3 - More Pandas¶

Question 1 (20%)¶

Question 2 (6.6%)¶

Answer the following questions in the context of only the top 15 countries by Scimagojr Rank (aka the DataFrame returned by `answer_one()`)¶

Question 3 (6.6%)¶

Question 4 (6.6%)¶

Question 5 (6.6%)¶

Question 6 (6.6%)¶

Question 7 (6.6%)¶

Question 8 (6.6%)¶

Question 9 (6.6%)¶

Question 10 (6.6%)¶

Question 11 (6.6%)¶

Question 12 (6.6%)¶

Question 13 (6.6%)¶

Optional¶

	mean
Country
United States	1.536434e+13
China	6.348609e+12
Japan	5.542208e+12
Germany	3.493025e+12
France	2.681725e+12
United Kingdom	2.487907e+12
Brazil	2.189794e+12
Italy	2.120175e+12
India	1.769297e+12
Canada	1.660647e+12
Russian Federation	1.565459e+12
Spain	1.418078e+12
Australia	1.164043e+12
South Korea	1.106715e+12
Iran	4.441558e+11

Assignment 3 - More Pandas¶

Question 1 (20%)¶

Question 2 (6.6%)¶

Answer the following questions in the context of only the top 15 countries by Scimagojr Rank (aka the DataFrame returned by answer_one())¶

Question 3 (6.6%)¶

Question 4 (6.6%)¶

Question 5 (6.6%)¶

Question 6 (6.6%)¶

Question 7 (6.6%)¶

Question 8 (6.6%)¶

Question 9 (6.6%)¶

Question 10 (6.6%)¶

Question 11 (6.6%)¶

Question 12 (6.6%)¶

Question 13 (6.6%)¶

Optional¶

Answer the following questions in the context of only the top 15 countries by Scimagojr Rank (aka the DataFrame returned by `answer_one()`)¶