import os
import sys

from hdfs.client import Client


class ImportStDataType(object):

    def __init__(self, site_name_flag='all', data_type_flag='all', year_week='2022-1'):
        """
        默认导入所有站点的所有data_type类型的表
        """
        self.client = Client("http://hadoop1:50070/")
        if site_name_flag == 'all':
            self.site_name_list = ['us', 'uk', 'de', 'es', 'fr', 'it']
        else:
            self.site_name_list = [site_name_flag]
        if data_type_flag == 'all':
            self.data_type_list = ['zr', 'sp', 'sb', 'ac', 'bs', 'er', 'tr']
        else:
            self.data_type_list = [data_type_flag]
        self.year_week = year_week
        self.year = int(self.year_week.split("-")[0])
        self.week = int(self.year_week.split("-")[-1])
        self.path_default = "/home/ffman/ods"

    def run(self):
        for site_name in self.site_name_list:
            for data_type in self.data_type_list:
                hdfs_path = f"{self.path_default}/ods_search_term_rank_{data_type}/site_name={site_name}/dt={self.year_week}"
                for num in range(1, 4):
                    if num >= 2:
                        print(f"第{num}次重复导入")
                    try:
                        file_list = self.client.list(hdfs_path=hdfs_path, status=True)
                        print(f"{site_name} {data_type} {self.year} {self.week}: file_list--{len(file_list)}")
                        if len(file_list) == 0:
                            os.system(
                                f"/opt/module/spark/demo/py_demo/demo_sqoop/import_st_data_type.sh {site_name} {data_type} {self.year} {self.week}")
                            continue
                        else:
                            break
                    except Exception as e:
                        print(f"{site_name}, {data_type} isn't exists: {hdfs_path}", e)
                        os.system(
                            f"/opt/module/spark/demo/py_demo/demo_sqoop/import_st_data_type.sh {site_name} {data_type} {self.year} {self.week}")
                        continue


if __name__ == '__main__':
    site_name_flag = sys.argv[1]  # 参数1：site_name列表-->all:所有站点
    data_type_flag = sys.argv[2]  # 参数1：data_type列表-->all:所有类型
    year_week = sys.argv[3]  # 参数2：年-周, 比如: 2022-1
    handle_obj = ImportStDataType(site_name_flag=site_name_flag,
                               data_type_flag=data_type_flag,
                               year_week=year_week)
    handle_obj.run()

