pythonで漢字、カタカナ、ひらがなデータをCSVで出す。
以下japanese_characters.pyです
import csv
import unicodedata
# Unicode範囲から文字を生成
def generate_japanese_characters():
characters = []
# ひらがな
for codepoint in range(0x3040, 0x309F + 1):
char = chr(codepoint)
if unicodedata.category(char) in ["Lo", "Mn"]:
characters.append((char, "ひらがな"))
# カタカナ
for codepoint in range(0x30A0, 0x30FF + 1):
char = chr(codepoint)
if unicodedata.category(char) in ["Lo", "Mn"]:
characters.append((char, "カタカナ"))
# 漢字
for codepoint in range(0x4E00, 0x9FFF + 1):
char = chr(codepoint)
if unicodedata.category(char) == "Lo":
characters.append((char, "漢字"))
return characters
# 字画データを仮に割り当てる (APIやオープンデータで取得可能)
def assign_stroke_count(character):
return sum(ord(c) % 10 for c in character) # 仮のロジック
# データをCSVに保存
def save_to_csv(characters, csv_path):
with open(csv_path, mode="w", encoding="utf-8", newline="") as file:
writer = csv.writer(file)
writer.writerow(["character", "type", "strokes"]) # ヘッダー
for char, char_type in characters:
strokes = assign_stroke_count(char) # 仮の画数
writer.writerow([char, char_type, strokes])
# 実行
characters = generate_japanese_characters()
csv_path = "japanese_characters.csv" # 保存先
save_to_csv(characters, csv_path)
print(f"CSVファイルが作成されました: {csv_path}")